cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

migration.c (26704B)


      1/*
      2 * Migration support for VFIO devices
      3 *
      4 * Copyright NVIDIA, Inc. 2020
      5 *
      6 * This work is licensed under the terms of the GNU GPL, version 2. See
      7 * the COPYING file in the top-level directory.
      8 */
      9
     10#include "qemu/osdep.h"
     11#include "qemu/main-loop.h"
     12#include "qemu/cutils.h"
     13#include <linux/vfio.h>
     14#include <sys/ioctl.h>
     15
     16#include "sysemu/runstate.h"
     17#include "hw/vfio/vfio-common.h"
     18#include "migration/migration.h"
     19#include "migration/vmstate.h"
     20#include "migration/qemu-file.h"
     21#include "migration/register.h"
     22#include "migration/blocker.h"
     23#include "migration/misc.h"
     24#include "qapi/error.h"
     25#include "exec/ramlist.h"
     26#include "exec/ram_addr.h"
     27#include "pci.h"
     28#include "trace.h"
     29#include "hw/hw.h"
     30
     31/*
     32 * Flags to be used as unique delimiters for VFIO devices in the migration
     33 * stream. These flags are composed as:
     34 * 0xffffffff => MSB 32-bit all 1s
     35 * 0xef10     => Magic ID, represents emulated (virtual) function IO
     36 * 0x0000     => 16-bits reserved for flags
     37 *
     38 * The beginning of state information is marked by _DEV_CONFIG_STATE,
     39 * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
     40 * certain state information is marked by _END_OF_STATE.
     41 */
     42#define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
     43#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
     44#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
     45#define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
     46
     47static int64_t bytes_transferred;
     48
     49static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
     50                                  off_t off, bool iswrite)
     51{
     52    int ret;
     53
     54    ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
     55                    pread(vbasedev->fd, val, count, off);
     56    if (ret < count) {
     57        error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
     58                     HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
     59                     vbasedev->name, off, strerror(errno));
     60        return (ret < 0) ? ret : -EINVAL;
     61    }
     62    return 0;
     63}
     64
     65static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count,
     66                       off_t off, bool iswrite)
     67{
     68    int ret, done = 0;
     69    __u8 *tbuf = buf;
     70
     71    while (count) {
     72        int bytes = 0;
     73
     74        if (count >= 8 && !(off % 8)) {
     75            bytes = 8;
     76        } else if (count >= 4 && !(off % 4)) {
     77            bytes = 4;
     78        } else if (count >= 2 && !(off % 2)) {
     79            bytes = 2;
     80        } else {
     81            bytes = 1;
     82        }
     83
     84        ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite);
     85        if (ret) {
     86            return ret;
     87        }
     88
     89        count -= bytes;
     90        done += bytes;
     91        off += bytes;
     92        tbuf += bytes;
     93    }
     94    return done;
     95}
     96
     97#define vfio_mig_read(f, v, c, o)       vfio_mig_rw(f, (__u8 *)v, c, o, false)
     98#define vfio_mig_write(f, v, c, o)      vfio_mig_rw(f, (__u8 *)v, c, o, true)
     99
    100#define VFIO_MIG_STRUCT_OFFSET(f)       \
    101                                 offsetof(struct vfio_device_migration_info, f)
    102/*
    103 * Change the device_state register for device @vbasedev. Bits set in @mask
    104 * are preserved, bits set in @value are set, and bits not set in either @mask
    105 * or @value are cleared in device_state. If the register cannot be accessed,
    106 * the resulting state would be invalid, or the device enters an error state,
    107 * an error is returned.
    108 */
    109
    110static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
    111                                    uint32_t value)
    112{
    113    VFIOMigration *migration = vbasedev->migration;
    114    VFIORegion *region = &migration->region;
    115    off_t dev_state_off = region->fd_offset +
    116                          VFIO_MIG_STRUCT_OFFSET(device_state);
    117    uint32_t device_state;
    118    int ret;
    119
    120    ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
    121                        dev_state_off);
    122    if (ret < 0) {
    123        return ret;
    124    }
    125
    126    device_state = (device_state & mask) | value;
    127
    128    if (!VFIO_DEVICE_STATE_VALID(device_state)) {
    129        return -EINVAL;
    130    }
    131
    132    ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state),
    133                         dev_state_off);
    134    if (ret < 0) {
    135        int rret;
    136
    137        rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
    138                             dev_state_off);
    139
    140        if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) {
    141            hw_error("%s: Device in error state 0x%x", vbasedev->name,
    142                     device_state);
    143            return rret ? rret : -EIO;
    144        }
    145        return ret;
    146    }
    147
    148    migration->device_state = device_state;
    149    trace_vfio_migration_set_state(vbasedev->name, device_state);
    150    return 0;
    151}
    152
    153static void *get_data_section_size(VFIORegion *region, uint64_t data_offset,
    154                                   uint64_t data_size, uint64_t *size)
    155{
    156    void *ptr = NULL;
    157    uint64_t limit = 0;
    158    int i;
    159
    160    if (!region->mmaps) {
    161        if (size) {
    162            *size = MIN(data_size, region->size - data_offset);
    163        }
    164        return ptr;
    165    }
    166
    167    for (i = 0; i < region->nr_mmaps; i++) {
    168        VFIOMmap *map = region->mmaps + i;
    169
    170        if ((data_offset >= map->offset) &&
    171            (data_offset < map->offset + map->size)) {
    172
    173            /* check if data_offset is within sparse mmap areas */
    174            ptr = map->mmap + data_offset - map->offset;
    175            if (size) {
    176                *size = MIN(data_size, map->offset + map->size - data_offset);
    177            }
    178            break;
    179        } else if ((data_offset < map->offset) &&
    180                   (!limit || limit > map->offset)) {
    181            /*
    182             * data_offset is not within sparse mmap areas, find size of
    183             * non-mapped area. Check through all list since region->mmaps list
    184             * is not sorted.
    185             */
    186            limit = map->offset;
    187        }
    188    }
    189
    190    if (!ptr && size) {
    191        *size = limit ? MIN(data_size, limit - data_offset) : data_size;
    192    }
    193    return ptr;
    194}
    195
    196static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
    197{
    198    VFIOMigration *migration = vbasedev->migration;
    199    VFIORegion *region = &migration->region;
    200    uint64_t data_offset = 0, data_size = 0, sz;
    201    int ret;
    202
    203    ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
    204                      region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
    205    if (ret < 0) {
    206        return ret;
    207    }
    208
    209    ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size),
    210                        region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
    211    if (ret < 0) {
    212        return ret;
    213    }
    214
    215    trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
    216                           migration->pending_bytes);
    217
    218    qemu_put_be64(f, data_size);
    219    sz = data_size;
    220
    221    while (sz) {
    222        void *buf;
    223        uint64_t sec_size;
    224        bool buf_allocated = false;
    225
    226        buf = get_data_section_size(region, data_offset, sz, &sec_size);
    227
    228        if (!buf) {
    229            buf = g_try_malloc(sec_size);
    230            if (!buf) {
    231                error_report("%s: Error allocating buffer ", __func__);
    232                return -ENOMEM;
    233            }
    234            buf_allocated = true;
    235
    236            ret = vfio_mig_read(vbasedev, buf, sec_size,
    237                                region->fd_offset + data_offset);
    238            if (ret < 0) {
    239                g_free(buf);
    240                return ret;
    241            }
    242        }
    243
    244        qemu_put_buffer(f, buf, sec_size);
    245
    246        if (buf_allocated) {
    247            g_free(buf);
    248        }
    249        sz -= sec_size;
    250        data_offset += sec_size;
    251    }
    252
    253    ret = qemu_file_get_error(f);
    254
    255    if (!ret && size) {
    256        *size = data_size;
    257    }
    258
    259    bytes_transferred += data_size;
    260    return ret;
    261}
    262
    263static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
    264                            uint64_t data_size)
    265{
    266    VFIORegion *region = &vbasedev->migration->region;
    267    uint64_t data_offset = 0, size, report_size;
    268    int ret;
    269
    270    do {
    271        ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
    272                      region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
    273        if (ret < 0) {
    274            return ret;
    275        }
    276
    277        if (data_offset + data_size > region->size) {
    278            /*
    279             * If data_size is greater than the data section of migration region
    280             * then iterate the write buffer operation. This case can occur if
    281             * size of migration region at destination is smaller than size of
    282             * migration region at source.
    283             */
    284            report_size = size = region->size - data_offset;
    285            data_size -= size;
    286        } else {
    287            report_size = size = data_size;
    288            data_size = 0;
    289        }
    290
    291        trace_vfio_load_state_device_data(vbasedev->name, data_offset, size);
    292
    293        while (size) {
    294            void *buf;
    295            uint64_t sec_size;
    296            bool buf_alloc = false;
    297
    298            buf = get_data_section_size(region, data_offset, size, &sec_size);
    299
    300            if (!buf) {
    301                buf = g_try_malloc(sec_size);
    302                if (!buf) {
    303                    error_report("%s: Error allocating buffer ", __func__);
    304                    return -ENOMEM;
    305                }
    306                buf_alloc = true;
    307            }
    308
    309            qemu_get_buffer(f, buf, sec_size);
    310
    311            if (buf_alloc) {
    312                ret = vfio_mig_write(vbasedev, buf, sec_size,
    313                        region->fd_offset + data_offset);
    314                g_free(buf);
    315
    316                if (ret < 0) {
    317                    return ret;
    318                }
    319            }
    320            size -= sec_size;
    321            data_offset += sec_size;
    322        }
    323
    324        ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size),
    325                        region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
    326        if (ret < 0) {
    327            return ret;
    328        }
    329    } while (data_size);
    330
    331    return 0;
    332}
    333
    334static int vfio_update_pending(VFIODevice *vbasedev)
    335{
    336    VFIOMigration *migration = vbasedev->migration;
    337    VFIORegion *region = &migration->region;
    338    uint64_t pending_bytes = 0;
    339    int ret;
    340
    341    ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes),
    342                    region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes));
    343    if (ret < 0) {
    344        migration->pending_bytes = 0;
    345        return ret;
    346    }
    347
    348    migration->pending_bytes = pending_bytes;
    349    trace_vfio_update_pending(vbasedev->name, pending_bytes);
    350    return 0;
    351}
    352
    353static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
    354{
    355    VFIODevice *vbasedev = opaque;
    356
    357    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE);
    358
    359    if (vbasedev->ops && vbasedev->ops->vfio_save_config) {
    360        vbasedev->ops->vfio_save_config(vbasedev, f);
    361    }
    362
    363    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
    364
    365    trace_vfio_save_device_config_state(vbasedev->name);
    366
    367    return qemu_file_get_error(f);
    368}
    369
    370static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
    371{
    372    VFIODevice *vbasedev = opaque;
    373    uint64_t data;
    374
    375    if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
    376        int ret;
    377
    378        ret = vbasedev->ops->vfio_load_config(vbasedev, f);
    379        if (ret) {
    380            error_report("%s: Failed to load device config space",
    381                         vbasedev->name);
    382            return ret;
    383        }
    384    }
    385
    386    data = qemu_get_be64(f);
    387    if (data != VFIO_MIG_FLAG_END_OF_STATE) {
    388        error_report("%s: Failed loading device config space, "
    389                     "end flag incorrect 0x%"PRIx64, vbasedev->name, data);
    390        return -EINVAL;
    391    }
    392
    393    trace_vfio_load_device_config_state(vbasedev->name);
    394    return qemu_file_get_error(f);
    395}
    396
    397static void vfio_migration_cleanup(VFIODevice *vbasedev)
    398{
    399    VFIOMigration *migration = vbasedev->migration;
    400
    401    if (migration->region.mmaps) {
    402        vfio_region_unmap(&migration->region);
    403    }
    404}
    405
    406/* ---------------------------------------------------------------------- */
    407
    408static int vfio_save_setup(QEMUFile *f, void *opaque)
    409{
    410    VFIODevice *vbasedev = opaque;
    411    VFIOMigration *migration = vbasedev->migration;
    412    int ret;
    413
    414    trace_vfio_save_setup(vbasedev->name);
    415
    416    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
    417
    418    if (migration->region.mmaps) {
    419        /*
    420         * Calling vfio_region_mmap() from migration thread. Memory API called
    421         * from this function require locking the iothread when called from
    422         * outside the main loop thread.
    423         */
    424        qemu_mutex_lock_iothread();
    425        ret = vfio_region_mmap(&migration->region);
    426        qemu_mutex_unlock_iothread();
    427        if (ret) {
    428            error_report("%s: Failed to mmap VFIO migration region: %s",
    429                         vbasedev->name, strerror(-ret));
    430            error_report("%s: Falling back to slow path", vbasedev->name);
    431        }
    432    }
    433
    434    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
    435                                   VFIO_DEVICE_STATE_SAVING);
    436    if (ret) {
    437        error_report("%s: Failed to set state SAVING", vbasedev->name);
    438        return ret;
    439    }
    440
    441    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
    442
    443    ret = qemu_file_get_error(f);
    444    if (ret) {
    445        return ret;
    446    }
    447
    448    return 0;
    449}
    450
    451static void vfio_save_cleanup(void *opaque)
    452{
    453    VFIODevice *vbasedev = opaque;
    454
    455    vfio_migration_cleanup(vbasedev);
    456    trace_vfio_save_cleanup(vbasedev->name);
    457}
    458
    459static void vfio_save_pending(QEMUFile *f, void *opaque,
    460                              uint64_t threshold_size,
    461                              uint64_t *res_precopy_only,
    462                              uint64_t *res_compatible,
    463                              uint64_t *res_postcopy_only)
    464{
    465    VFIODevice *vbasedev = opaque;
    466    VFIOMigration *migration = vbasedev->migration;
    467    int ret;
    468
    469    ret = vfio_update_pending(vbasedev);
    470    if (ret) {
    471        return;
    472    }
    473
    474    *res_precopy_only += migration->pending_bytes;
    475
    476    trace_vfio_save_pending(vbasedev->name, *res_precopy_only,
    477                            *res_postcopy_only, *res_compatible);
    478}
    479
    480static int vfio_save_iterate(QEMUFile *f, void *opaque)
    481{
    482    VFIODevice *vbasedev = opaque;
    483    VFIOMigration *migration = vbasedev->migration;
    484    uint64_t data_size;
    485    int ret;
    486
    487    qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
    488
    489    if (migration->pending_bytes == 0) {
    490        ret = vfio_update_pending(vbasedev);
    491        if (ret) {
    492            return ret;
    493        }
    494
    495        if (migration->pending_bytes == 0) {
    496            qemu_put_be64(f, 0);
    497            qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
    498            /* indicates data finished, goto complete phase */
    499            return 1;
    500        }
    501    }
    502
    503    ret = vfio_save_buffer(f, vbasedev, &data_size);
    504    if (ret) {
    505        error_report("%s: vfio_save_buffer failed %s", vbasedev->name,
    506                     strerror(errno));
    507        return ret;
    508    }
    509
    510    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
    511
    512    ret = qemu_file_get_error(f);
    513    if (ret) {
    514        return ret;
    515    }
    516
    517    /*
    518     * Reset pending_bytes as .save_live_pending is not called during savevm or
    519     * snapshot case, in such case vfio_update_pending() at the start of this
    520     * function updates pending_bytes.
    521     */
    522    migration->pending_bytes = 0;
    523    trace_vfio_save_iterate(vbasedev->name, data_size);
    524    return 0;
    525}
    526
    527static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
    528{
    529    VFIODevice *vbasedev = opaque;
    530    VFIOMigration *migration = vbasedev->migration;
    531    uint64_t data_size;
    532    int ret;
    533
    534    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_RUNNING,
    535                                   VFIO_DEVICE_STATE_SAVING);
    536    if (ret) {
    537        error_report("%s: Failed to set state STOP and SAVING",
    538                     vbasedev->name);
    539        return ret;
    540    }
    541
    542    ret = vfio_update_pending(vbasedev);
    543    if (ret) {
    544        return ret;
    545    }
    546
    547    while (migration->pending_bytes > 0) {
    548        qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
    549        ret = vfio_save_buffer(f, vbasedev, &data_size);
    550        if (ret < 0) {
    551            error_report("%s: Failed to save buffer", vbasedev->name);
    552            return ret;
    553        }
    554
    555        if (data_size == 0) {
    556            break;
    557        }
    558
    559        ret = vfio_update_pending(vbasedev);
    560        if (ret) {
    561            return ret;
    562        }
    563    }
    564
    565    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
    566
    567    ret = qemu_file_get_error(f);
    568    if (ret) {
    569        return ret;
    570    }
    571
    572    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_SAVING, 0);
    573    if (ret) {
    574        error_report("%s: Failed to set state STOPPED", vbasedev->name);
    575        return ret;
    576    }
    577
    578    trace_vfio_save_complete_precopy(vbasedev->name);
    579    return ret;
    580}
    581
    582static void vfio_save_state(QEMUFile *f, void *opaque)
    583{
    584    VFIODevice *vbasedev = opaque;
    585    int ret;
    586
    587    ret = vfio_save_device_config_state(f, opaque);
    588    if (ret) {
    589        error_report("%s: Failed to save device config space",
    590                     vbasedev->name);
    591        qemu_file_set_error(f, ret);
    592    }
    593}
    594
    595static int vfio_load_setup(QEMUFile *f, void *opaque)
    596{
    597    VFIODevice *vbasedev = opaque;
    598    VFIOMigration *migration = vbasedev->migration;
    599    int ret = 0;
    600
    601    if (migration->region.mmaps) {
    602        ret = vfio_region_mmap(&migration->region);
    603        if (ret) {
    604            error_report("%s: Failed to mmap VFIO migration region %d: %s",
    605                         vbasedev->name, migration->region.nr,
    606                         strerror(-ret));
    607            error_report("%s: Falling back to slow path", vbasedev->name);
    608        }
    609    }
    610
    611    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
    612                                   VFIO_DEVICE_STATE_RESUMING);
    613    if (ret) {
    614        error_report("%s: Failed to set state RESUMING", vbasedev->name);
    615        if (migration->region.mmaps) {
    616            vfio_region_unmap(&migration->region);
    617        }
    618    }
    619    return ret;
    620}
    621
    622static int vfio_load_cleanup(void *opaque)
    623{
    624    VFIODevice *vbasedev = opaque;
    625
    626    vfio_migration_cleanup(vbasedev);
    627    trace_vfio_load_cleanup(vbasedev->name);
    628    return 0;
    629}
    630
    631static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
    632{
    633    VFIODevice *vbasedev = opaque;
    634    int ret = 0;
    635    uint64_t data;
    636
    637    data = qemu_get_be64(f);
    638    while (data != VFIO_MIG_FLAG_END_OF_STATE) {
    639
    640        trace_vfio_load_state(vbasedev->name, data);
    641
    642        switch (data) {
    643        case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
    644        {
    645            return vfio_load_device_config_state(f, opaque);
    646        }
    647        case VFIO_MIG_FLAG_DEV_SETUP_STATE:
    648        {
    649            data = qemu_get_be64(f);
    650            if (data == VFIO_MIG_FLAG_END_OF_STATE) {
    651                return ret;
    652            } else {
    653                error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64,
    654                             vbasedev->name, data);
    655                return -EINVAL;
    656            }
    657            break;
    658        }
    659        case VFIO_MIG_FLAG_DEV_DATA_STATE:
    660        {
    661            uint64_t data_size = qemu_get_be64(f);
    662
    663            if (data_size) {
    664                ret = vfio_load_buffer(f, vbasedev, data_size);
    665                if (ret < 0) {
    666                    return ret;
    667                }
    668            }
    669            break;
    670        }
    671        default:
    672            error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
    673            return -EINVAL;
    674        }
    675
    676        data = qemu_get_be64(f);
    677        ret = qemu_file_get_error(f);
    678        if (ret) {
    679            return ret;
    680        }
    681    }
    682    return ret;
    683}
    684
    685static SaveVMHandlers savevm_vfio_handlers = {
    686    .save_setup = vfio_save_setup,
    687    .save_cleanup = vfio_save_cleanup,
    688    .save_live_pending = vfio_save_pending,
    689    .save_live_iterate = vfio_save_iterate,
    690    .save_live_complete_precopy = vfio_save_complete_precopy,
    691    .save_state = vfio_save_state,
    692    .load_setup = vfio_load_setup,
    693    .load_cleanup = vfio_load_cleanup,
    694    .load_state = vfio_load_state,
    695};
    696
    697/* ---------------------------------------------------------------------- */
    698
    699static void vfio_vmstate_change(void *opaque, bool running, RunState state)
    700{
    701    VFIODevice *vbasedev = opaque;
    702    VFIOMigration *migration = vbasedev->migration;
    703    uint32_t value, mask;
    704    int ret;
    705
    706    if (vbasedev->migration->vm_running == running) {
    707        return;
    708    }
    709
    710    if (running) {
    711        /*
    712         * Here device state can have one of _SAVING, _RESUMING or _STOP bit.
    713         * Transition from _SAVING to _RUNNING can happen if there is migration
    714         * failure, in that case clear _SAVING bit.
    715         * Transition from _RESUMING to _RUNNING occurs during resuming
    716         * phase, in that case clear _RESUMING bit.
    717         * In both the above cases, set _RUNNING bit.
    718         */
    719        mask = ~VFIO_DEVICE_STATE_MASK;
    720        value = VFIO_DEVICE_STATE_RUNNING;
    721    } else {
    722        /*
    723         * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset
    724         * _RUNNING bit
    725         */
    726        mask = ~VFIO_DEVICE_STATE_RUNNING;
    727
    728        /*
    729         * When VM state transition to stop for savevm command, device should
    730         * start saving data.
    731         */
    732        if (state == RUN_STATE_SAVE_VM) {
    733            value = VFIO_DEVICE_STATE_SAVING;
    734        } else {
    735            value = 0;
    736        }
    737    }
    738
    739    ret = vfio_migration_set_state(vbasedev, mask, value);
    740    if (ret) {
    741        /*
    742         * Migration should be aborted in this case, but vm_state_notify()
    743         * currently does not support reporting failures.
    744         */
    745        error_report("%s: Failed to set device state 0x%x", vbasedev->name,
    746                     (migration->device_state & mask) | value);
    747        qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
    748    }
    749    vbasedev->migration->vm_running = running;
    750    trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
    751            (migration->device_state & mask) | value);
    752}
    753
    754static void vfio_migration_state_notifier(Notifier *notifier, void *data)
    755{
    756    MigrationState *s = data;
    757    VFIOMigration *migration = container_of(notifier, VFIOMigration,
    758                                            migration_state);
    759    VFIODevice *vbasedev = migration->vbasedev;
    760    int ret;
    761
    762    trace_vfio_migration_state_notifier(vbasedev->name,
    763                                        MigrationStatus_str(s->state));
    764
    765    switch (s->state) {
    766    case MIGRATION_STATUS_CANCELLING:
    767    case MIGRATION_STATUS_CANCELLED:
    768    case MIGRATION_STATUS_FAILED:
    769        bytes_transferred = 0;
    770        ret = vfio_migration_set_state(vbasedev,
    771                      ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING),
    772                      VFIO_DEVICE_STATE_RUNNING);
    773        if (ret) {
    774            error_report("%s: Failed to set state RUNNING", vbasedev->name);
    775        }
    776    }
    777}
    778
    779static void vfio_migration_exit(VFIODevice *vbasedev)
    780{
    781    VFIOMigration *migration = vbasedev->migration;
    782
    783    vfio_region_exit(&migration->region);
    784    vfio_region_finalize(&migration->region);
    785    g_free(vbasedev->migration);
    786    vbasedev->migration = NULL;
    787}
    788
    789static int vfio_migration_init(VFIODevice *vbasedev,
    790                               struct vfio_region_info *info)
    791{
    792    int ret;
    793    Object *obj;
    794    VFIOMigration *migration;
    795    char id[256] = "";
    796    g_autofree char *path = NULL, *oid = NULL;
    797
    798    if (!vbasedev->ops->vfio_get_object) {
    799        return -EINVAL;
    800    }
    801
    802    obj = vbasedev->ops->vfio_get_object(vbasedev);
    803    if (!obj) {
    804        return -EINVAL;
    805    }
    806
    807    vbasedev->migration = g_new0(VFIOMigration, 1);
    808
    809    ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region,
    810                            info->index, "migration");
    811    if (ret) {
    812        error_report("%s: Failed to setup VFIO migration region %d: %s",
    813                     vbasedev->name, info->index, strerror(-ret));
    814        goto err;
    815    }
    816
    817    if (!vbasedev->migration->region.size) {
    818        error_report("%s: Invalid zero-sized VFIO migration region %d",
    819                     vbasedev->name, info->index);
    820        ret = -EINVAL;
    821        goto err;
    822    }
    823
    824    migration = vbasedev->migration;
    825    migration->vbasedev = vbasedev;
    826
    827    oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
    828    if (oid) {
    829        path = g_strdup_printf("%s/vfio", oid);
    830    } else {
    831        path = g_strdup("vfio");
    832    }
    833    strpadcpy(id, sizeof(id), path, '\0');
    834
    835    register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
    836                         vbasedev);
    837
    838    migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev,
    839                                                           vfio_vmstate_change,
    840                                                           vbasedev);
    841    migration->migration_state.notify = vfio_migration_state_notifier;
    842    add_migration_state_change_notifier(&migration->migration_state);
    843    return 0;
    844
    845err:
    846    vfio_migration_exit(vbasedev);
    847    return ret;
    848}
    849
    850/* ---------------------------------------------------------------------- */
    851
    852int64_t vfio_mig_bytes_transferred(void)
    853{
    854    return bytes_transferred;
    855}
    856
    857int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
    858{
    859    VFIOContainer *container = vbasedev->group->container;
    860    struct vfio_region_info *info = NULL;
    861    int ret = -ENOTSUP;
    862
    863    if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
    864        goto add_blocker;
    865    }
    866
    867    ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION,
    868                                   VFIO_REGION_SUBTYPE_MIGRATION, &info);
    869    if (ret) {
    870        goto add_blocker;
    871    }
    872
    873    ret = vfio_migration_init(vbasedev, info);
    874    if (ret) {
    875        goto add_blocker;
    876    }
    877
    878    trace_vfio_migration_probe(vbasedev->name, info->index);
    879    g_free(info);
    880    return 0;
    881
    882add_blocker:
    883    error_setg(&vbasedev->migration_blocker,
    884               "VFIO device doesn't support migration");
    885    g_free(info);
    886
    887    ret = migrate_add_blocker(vbasedev->migration_blocker, errp);
    888    if (ret < 0) {
    889        error_free(vbasedev->migration_blocker);
    890        vbasedev->migration_blocker = NULL;
    891    }
    892    return ret;
    893}
    894
    895void vfio_migration_finalize(VFIODevice *vbasedev)
    896{
    897    if (vbasedev->migration) {
    898        VFIOMigration *migration = vbasedev->migration;
    899
    900        remove_migration_state_change_notifier(&migration->migration_state);
    901        qemu_del_vm_change_state_handler(migration->vm_state);
    902        unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev);
    903        vfio_migration_exit(vbasedev);
    904    }
    905
    906    if (vbasedev->migration_blocker) {
    907        migrate_del_blocker(vbasedev->migration_blocker);
    908        error_free(vbasedev->migration_blocker);
    909        vbasedev->migration_blocker = NULL;
    910    }
    911}