cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

preallocate.c (17281B)


      1/*
      2 * preallocate filter driver
      3 *
      4 * The driver performs preallocate operation: it is injected above
      5 * some node, and before each write over EOF it does additional preallocating
      6 * write-zeroes request.
      7 *
      8 * Copyright (c) 2020 Virtuozzo International GmbH.
      9 *
     10 * Author:
     11 *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
     12 *
     13 * This program is free software; you can redistribute it and/or modify
     14 * it under the terms of the GNU General Public License as published by
     15 * the Free Software Foundation; either version 2 of the License, or
     16 * (at your option) any later version.
     17 *
     18 * This program is distributed in the hope that it will be useful,
     19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     21 * GNU General Public License for more details.
     22 *
     23 * You should have received a copy of the GNU General Public License
     24 * along with this program. If not, see <http://www.gnu.org/licenses/>.
     25 */
     26
     27#include "qemu/osdep.h"
     28
     29#include "qapi/error.h"
     30#include "qemu/module.h"
     31#include "qemu/option.h"
     32#include "qemu/units.h"
     33#include "block/block_int.h"
     34
     35
     36typedef struct PreallocateOpts {
     37    int64_t prealloc_size;
     38    int64_t prealloc_align;
     39} PreallocateOpts;
     40
     41typedef struct BDRVPreallocateState {
     42    PreallocateOpts opts;
     43
     44    /*
     45     * Track real data end, to crop preallocation on close. If < 0 the status is
     46     * unknown.
     47     *
     48     * @data_end is a maximum of file size on open (or when we get write/resize
     49     * permissions) and all write request ends after it. So it's safe to
     50     * truncate to data_end if it is valid.
     51     */
     52    int64_t data_end;
     53
     54    /*
     55     * Start of trailing preallocated area which reads as zero. May be smaller
     56     * than data_end, if user does over-EOF write zero operation. If < 0 the
     57     * status is unknown.
     58     *
     59     * If both @zero_start and @file_end are valid, the region
     60     * [@zero_start, @file_end) is known to be preallocated zeroes. If @file_end
     61     * is not valid, @zero_start doesn't make much sense.
     62     */
     63    int64_t zero_start;
     64
     65    /*
     66     * Real end of file. Actually the cache for bdrv_getlength(bs->file->bs),
     67     * to avoid extra lseek() calls on each write operation. If < 0 the status
     68     * is unknown.
     69     */
     70    int64_t file_end;
     71
     72    /*
     73     * All three states @data_end, @zero_start and @file_end are guaranteed to
     74     * be invalid (< 0) when we don't have both exclusive BLK_PERM_RESIZE and
     75     * BLK_PERM_WRITE permissions on file child.
     76     */
     77} BDRVPreallocateState;
     78
     79#define PREALLOCATE_OPT_PREALLOC_ALIGN "prealloc-align"
     80#define PREALLOCATE_OPT_PREALLOC_SIZE "prealloc-size"
     81static QemuOptsList runtime_opts = {
     82    .name = "preallocate",
     83    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
     84    .desc = {
     85        {
     86            .name = PREALLOCATE_OPT_PREALLOC_ALIGN,
     87            .type = QEMU_OPT_SIZE,
     88            .help = "on preallocation, align file length to this number, "
     89                "default 1M",
     90        },
     91        {
     92            .name = PREALLOCATE_OPT_PREALLOC_SIZE,
     93            .type = QEMU_OPT_SIZE,
     94            .help = "how much to preallocate, default 128M",
     95        },
     96        { /* end of list */ }
     97    },
     98};
     99
    100static bool preallocate_absorb_opts(PreallocateOpts *dest, QDict *options,
    101                                    BlockDriverState *child_bs, Error **errp)
    102{
    103    QemuOpts *opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    104
    105    if (!qemu_opts_absorb_qdict(opts, options, errp)) {
    106        return false;
    107    }
    108
    109    dest->prealloc_align =
    110        qemu_opt_get_size(opts, PREALLOCATE_OPT_PREALLOC_ALIGN, 1 * MiB);
    111    dest->prealloc_size =
    112        qemu_opt_get_size(opts, PREALLOCATE_OPT_PREALLOC_SIZE, 128 * MiB);
    113
    114    qemu_opts_del(opts);
    115
    116    if (!QEMU_IS_ALIGNED(dest->prealloc_align, BDRV_SECTOR_SIZE)) {
    117        error_setg(errp, "prealloc-align parameter of preallocate filter "
    118                   "is not aligned to %llu", BDRV_SECTOR_SIZE);
    119        return false;
    120    }
    121
    122    if (!QEMU_IS_ALIGNED(dest->prealloc_align,
    123                         child_bs->bl.request_alignment)) {
    124        error_setg(errp, "prealloc-align parameter of preallocate filter "
    125                   "is not aligned to underlying node request alignment "
    126                   "(%" PRIi32 ")", child_bs->bl.request_alignment);
    127        return false;
    128    }
    129
    130    return true;
    131}
    132
    133static int preallocate_open(BlockDriverState *bs, QDict *options, int flags,
    134                            Error **errp)
    135{
    136    BDRVPreallocateState *s = bs->opaque;
    137
    138    /*
    139     * s->data_end and friends should be initialized on permission update.
    140     * For this to work, mark them invalid.
    141     */
    142    s->file_end = s->zero_start = s->data_end = -EINVAL;
    143
    144    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
    145                               BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
    146                               false, errp);
    147    if (!bs->file) {
    148        return -EINVAL;
    149    }
    150
    151    if (!preallocate_absorb_opts(&s->opts, options, bs->file->bs, errp)) {
    152        return -EINVAL;
    153    }
    154
    155    bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
    156        (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
    157
    158    bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
    159        ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
    160            bs->file->bs->supported_zero_flags);
    161
    162    return 0;
    163}
    164
    165static void preallocate_close(BlockDriverState *bs)
    166{
    167    int ret;
    168    BDRVPreallocateState *s = bs->opaque;
    169
    170    if (s->data_end < 0) {
    171        return;
    172    }
    173
    174    if (s->file_end < 0) {
    175        s->file_end = bdrv_getlength(bs->file->bs);
    176        if (s->file_end < 0) {
    177            return;
    178        }
    179    }
    180
    181    if (s->data_end < s->file_end) {
    182        ret = bdrv_truncate(bs->file, s->data_end, true, PREALLOC_MODE_OFF, 0,
    183                            NULL);
    184        s->file_end = ret < 0 ? ret : s->data_end;
    185    }
    186}
    187
    188
    189/*
    190 * Handle reopen.
    191 *
    192 * We must implement reopen handlers, otherwise reopen just don't work. Handle
    193 * new options and don't care about preallocation state, as it is handled in
    194 * set/check permission handlers.
    195 */
    196
    197static int preallocate_reopen_prepare(BDRVReopenState *reopen_state,
    198                                      BlockReopenQueue *queue, Error **errp)
    199{
    200    PreallocateOpts *opts = g_new0(PreallocateOpts, 1);
    201
    202    if (!preallocate_absorb_opts(opts, reopen_state->options,
    203                                 reopen_state->bs->file->bs, errp)) {
    204        g_free(opts);
    205        return -EINVAL;
    206    }
    207
    208    reopen_state->opaque = opts;
    209
    210    return 0;
    211}
    212
    213static void preallocate_reopen_commit(BDRVReopenState *state)
    214{
    215    BDRVPreallocateState *s = state->bs->opaque;
    216
    217    s->opts = *(PreallocateOpts *)state->opaque;
    218
    219    g_free(state->opaque);
    220    state->opaque = NULL;
    221}
    222
    223static void preallocate_reopen_abort(BDRVReopenState *state)
    224{
    225    g_free(state->opaque);
    226    state->opaque = NULL;
    227}
    228
    229static coroutine_fn int preallocate_co_preadv_part(
    230        BlockDriverState *bs, int64_t offset, int64_t bytes,
    231        QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags)
    232{
    233    return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
    234                               flags);
    235}
    236
    237static int coroutine_fn preallocate_co_pdiscard(BlockDriverState *bs,
    238                                               int64_t offset, int64_t bytes)
    239{
    240    return bdrv_co_pdiscard(bs->file, offset, bytes);
    241}
    242
    243static bool can_write_resize(uint64_t perm)
    244{
    245    return (perm & BLK_PERM_WRITE) && (perm & BLK_PERM_RESIZE);
    246}
    247
    248static bool has_prealloc_perms(BlockDriverState *bs)
    249{
    250    BDRVPreallocateState *s = bs->opaque;
    251
    252    if (can_write_resize(bs->file->perm)) {
    253        assert(!(bs->file->shared_perm & BLK_PERM_WRITE));
    254        assert(!(bs->file->shared_perm & BLK_PERM_RESIZE));
    255        return true;
    256    }
    257
    258    assert(s->data_end < 0);
    259    assert(s->zero_start < 0);
    260    assert(s->file_end < 0);
    261    return false;
    262}
    263
    264/*
    265 * Call on each write. Returns true if @want_merge_zero is true and the region
    266 * [offset, offset + bytes) is zeroed (as a result of this call or earlier
    267 * preallocation).
    268 *
    269 * want_merge_zero is used to merge write-zero request with preallocation in
    270 * one bdrv_co_pwrite_zeroes() call.
    271 */
    272static bool coroutine_fn handle_write(BlockDriverState *bs, int64_t offset,
    273                                      int64_t bytes, bool want_merge_zero)
    274{
    275    BDRVPreallocateState *s = bs->opaque;
    276    int64_t end = offset + bytes;
    277    int64_t prealloc_start, prealloc_end;
    278    int ret;
    279
    280    if (!has_prealloc_perms(bs)) {
    281        /* We don't have state neither should try to recover it */
    282        return false;
    283    }
    284
    285    if (s->data_end < 0) {
    286        s->data_end = bdrv_getlength(bs->file->bs);
    287        if (s->data_end < 0) {
    288            return false;
    289        }
    290
    291        if (s->file_end < 0) {
    292            s->file_end = s->data_end;
    293        }
    294    }
    295
    296    if (end <= s->data_end) {
    297        return false;
    298    }
    299
    300    /* We have valid s->data_end, and request writes beyond it. */
    301
    302    s->data_end = end;
    303    if (s->zero_start < 0 || !want_merge_zero) {
    304        s->zero_start = end;
    305    }
    306
    307    if (s->file_end < 0) {
    308        s->file_end = bdrv_getlength(bs->file->bs);
    309        if (s->file_end < 0) {
    310            return false;
    311        }
    312    }
    313
    314    /* Now s->data_end, s->zero_start and s->file_end are valid. */
    315
    316    if (end <= s->file_end) {
    317        /* No preallocation needed. */
    318        return want_merge_zero && offset >= s->zero_start;
    319    }
    320
    321    /* Now we want new preallocation, as request writes beyond s->file_end. */
    322
    323    prealloc_start = want_merge_zero ? MIN(offset, s->file_end) : s->file_end;
    324    prealloc_end = QEMU_ALIGN_UP(end + s->opts.prealloc_size,
    325                                 s->opts.prealloc_align);
    326
    327    ret = bdrv_co_pwrite_zeroes(
    328            bs->file, prealloc_start, prealloc_end - prealloc_start,
    329            BDRV_REQ_NO_FALLBACK | BDRV_REQ_SERIALISING | BDRV_REQ_NO_WAIT);
    330    if (ret < 0) {
    331        s->file_end = ret;
    332        return false;
    333    }
    334
    335    s->file_end = prealloc_end;
    336    return want_merge_zero;
    337}
    338
    339static int coroutine_fn preallocate_co_pwrite_zeroes(BlockDriverState *bs,
    340        int64_t offset, int64_t bytes, BdrvRequestFlags flags)
    341{
    342    bool want_merge_zero =
    343        !(flags & ~(BDRV_REQ_ZERO_WRITE | BDRV_REQ_NO_FALLBACK));
    344    if (handle_write(bs, offset, bytes, want_merge_zero)) {
    345        return 0;
    346    }
    347
    348    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
    349}
    350
    351static coroutine_fn int preallocate_co_pwritev_part(BlockDriverState *bs,
    352                                                    int64_t offset,
    353                                                    int64_t bytes,
    354                                                    QEMUIOVector *qiov,
    355                                                    size_t qiov_offset,
    356                                                    BdrvRequestFlags flags)
    357{
    358    handle_write(bs, offset, bytes, false);
    359
    360    return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
    361                                flags);
    362}
    363
    364static int coroutine_fn
    365preallocate_co_truncate(BlockDriverState *bs, int64_t offset,
    366                        bool exact, PreallocMode prealloc,
    367                        BdrvRequestFlags flags, Error **errp)
    368{
    369    ERRP_GUARD();
    370    BDRVPreallocateState *s = bs->opaque;
    371    int ret;
    372
    373    if (s->data_end >= 0 && offset > s->data_end) {
    374        if (s->file_end < 0) {
    375            s->file_end = bdrv_getlength(bs->file->bs);
    376            if (s->file_end < 0) {
    377                error_setg(errp, "failed to get file length");
    378                return s->file_end;
    379            }
    380        }
    381
    382        if (prealloc == PREALLOC_MODE_FALLOC) {
    383            /*
    384             * If offset <= s->file_end, the task is already done, just
    385             * update s->data_end, to move part of "filter preallocation"
    386             * to "preallocation requested by user".
    387             * Otherwise just proceed to preallocate missing part.
    388             */
    389            if (offset <= s->file_end) {
    390                s->data_end = offset;
    391                return 0;
    392            }
    393        } else {
    394            /*
    395             * We have to drop our preallocation, to
    396             * - avoid "Cannot use preallocation for shrinking files" in
    397             *   case of offset < file_end
    398             * - give PREALLOC_MODE_OFF a chance to keep small disk
    399             *   usage
    400             * - give PREALLOC_MODE_FULL a chance to actually write the
    401             *   whole region as user expects
    402             */
    403            if (s->file_end > s->data_end) {
    404                ret = bdrv_co_truncate(bs->file, s->data_end, true,
    405                                       PREALLOC_MODE_OFF, 0, errp);
    406                if (ret < 0) {
    407                    s->file_end = ret;
    408                    error_prepend(errp, "preallocate-filter: failed to drop "
    409                                  "write-zero preallocation: ");
    410                    return ret;
    411                }
    412                s->file_end = s->data_end;
    413            }
    414        }
    415
    416        s->data_end = offset;
    417    }
    418
    419    ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
    420    if (ret < 0) {
    421        s->file_end = s->zero_start = s->data_end = ret;
    422        return ret;
    423    }
    424
    425    if (has_prealloc_perms(bs)) {
    426        s->file_end = s->zero_start = s->data_end = offset;
    427    }
    428    return 0;
    429}
    430
    431static int coroutine_fn preallocate_co_flush(BlockDriverState *bs)
    432{
    433    return bdrv_co_flush(bs->file->bs);
    434}
    435
    436static int64_t preallocate_getlength(BlockDriverState *bs)
    437{
    438    int64_t ret;
    439    BDRVPreallocateState *s = bs->opaque;
    440
    441    if (s->data_end >= 0) {
    442        return s->data_end;
    443    }
    444
    445    ret = bdrv_getlength(bs->file->bs);
    446
    447    if (has_prealloc_perms(bs)) {
    448        s->file_end = s->zero_start = s->data_end = ret;
    449    }
    450
    451    return ret;
    452}
    453
    454static int preallocate_check_perm(BlockDriverState *bs,
    455                                  uint64_t perm, uint64_t shared, Error **errp)
    456{
    457    BDRVPreallocateState *s = bs->opaque;
    458
    459    if (s->data_end >= 0 && !can_write_resize(perm)) {
    460        /*
    461         * Lose permissions.
    462         * We should truncate in check_perm, as in set_perm bs->file->perm will
    463         * be already changed, and we should not violate it.
    464         */
    465        if (s->file_end < 0) {
    466            s->file_end = bdrv_getlength(bs->file->bs);
    467            if (s->file_end < 0) {
    468                error_setg(errp, "Failed to get file length");
    469                return s->file_end;
    470            }
    471        }
    472
    473        if (s->data_end < s->file_end) {
    474            int ret = bdrv_truncate(bs->file, s->data_end, true,
    475                                    PREALLOC_MODE_OFF, 0, NULL);
    476            if (ret < 0) {
    477                error_setg(errp, "Failed to drop preallocation");
    478                s->file_end = ret;
    479                return ret;
    480            }
    481            s->file_end = s->data_end;
    482        }
    483    }
    484
    485    return 0;
    486}
    487
    488static void preallocate_set_perm(BlockDriverState *bs,
    489                                 uint64_t perm, uint64_t shared)
    490{
    491    BDRVPreallocateState *s = bs->opaque;
    492
    493    if (can_write_resize(perm)) {
    494        if (s->data_end < 0) {
    495            s->data_end = s->file_end = s->zero_start =
    496                bdrv_getlength(bs->file->bs);
    497        }
    498    } else {
    499        /*
    500         * We drop our permissions, as well as allow shared
    501         * permissions (see preallocate_child_perm), anyone will be able to
    502         * change the child, so mark all states invalid. We'll regain control if
    503         * get good permissions back.
    504         */
    505        s->data_end = s->file_end = s->zero_start = -EINVAL;
    506    }
    507}
    508
    509static void preallocate_child_perm(BlockDriverState *bs, BdrvChild *c,
    510    BdrvChildRole role, BlockReopenQueue *reopen_queue,
    511    uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared)
    512{
    513    bdrv_default_perms(bs, c, role, reopen_queue, perm, shared, nperm, nshared);
    514
    515    if (can_write_resize(perm)) {
    516        /* This should come by default, but let's enforce: */
    517        *nperm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
    518
    519        /*
    520         * Don't share, to keep our states s->file_end, s->data_end and
    521         * s->zero_start valid.
    522         */
    523        *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
    524    }
    525}
    526
    527BlockDriver bdrv_preallocate_filter = {
    528    .format_name = "preallocate",
    529    .instance_size = sizeof(BDRVPreallocateState),
    530
    531    .bdrv_getlength = preallocate_getlength,
    532    .bdrv_open = preallocate_open,
    533    .bdrv_close = preallocate_close,
    534
    535    .bdrv_reopen_prepare  = preallocate_reopen_prepare,
    536    .bdrv_reopen_commit   = preallocate_reopen_commit,
    537    .bdrv_reopen_abort    = preallocate_reopen_abort,
    538
    539    .bdrv_co_preadv_part = preallocate_co_preadv_part,
    540    .bdrv_co_pwritev_part = preallocate_co_pwritev_part,
    541    .bdrv_co_pwrite_zeroes = preallocate_co_pwrite_zeroes,
    542    .bdrv_co_pdiscard = preallocate_co_pdiscard,
    543    .bdrv_co_flush = preallocate_co_flush,
    544    .bdrv_co_truncate = preallocate_co_truncate,
    545
    546    .bdrv_check_perm = preallocate_check_perm,
    547    .bdrv_set_perm = preallocate_set_perm,
    548    .bdrv_child_perm = preallocate_child_perm,
    549
    550    .has_variable_length = true,
    551    .is_filter = true,
    552};
    553
    554static void bdrv_preallocate_init(void)
    555{
    556    bdrv_register(&bdrv_preallocate_filter);
    557}
    558
    559block_init(bdrv_preallocate_init);