cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

ns.c (17235B)


      1/*
      2 * QEMU NVM Express Virtual Namespace
      3 *
      4 * Copyright (c) 2019 CNEX Labs
      5 * Copyright (c) 2020 Samsung Electronics
      6 *
      7 * Authors:
      8 *  Klaus Jensen      <k.jensen@samsung.com>
      9 *
     10 * This work is licensed under the terms of the GNU GPL, version 2. See the
     11 * COPYING file in the top-level directory.
     12 *
     13 */
     14
     15#include "qemu/osdep.h"
     16#include "qemu/units.h"
     17#include "qemu/error-report.h"
     18#include "qapi/error.h"
     19#include "sysemu/sysemu.h"
     20#include "sysemu/block-backend.h"
     21
     22#include "nvme.h"
     23#include "trace.h"
     24
     25#define MIN_DISCARD_GRANULARITY (4 * KiB)
     26#define NVME_DEFAULT_ZONE_SIZE   (128 * MiB)
     27
     28void nvme_ns_init_format(NvmeNamespace *ns)
     29{
     30    NvmeIdNs *id_ns = &ns->id_ns;
     31    BlockDriverInfo bdi;
     32    int npdg, nlbas, ret;
     33
     34    ns->lbaf = id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)];
     35    ns->lbasz = 1 << ns->lbaf.ds;
     36
     37    nlbas = ns->size / (ns->lbasz + ns->lbaf.ms);
     38
     39    id_ns->nsze = cpu_to_le64(nlbas);
     40
     41    /* no thin provisioning */
     42    id_ns->ncap = id_ns->nsze;
     43    id_ns->nuse = id_ns->ncap;
     44
     45    ns->moff = (int64_t)nlbas << ns->lbaf.ds;
     46
     47    npdg = ns->blkconf.discard_granularity / ns->lbasz;
     48
     49    ret = bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi);
     50    if (ret >= 0 && bdi.cluster_size > ns->blkconf.discard_granularity) {
     51        npdg = bdi.cluster_size / ns->lbasz;
     52    }
     53
     54    id_ns->npda = id_ns->npdg = npdg - 1;
     55}
     56
     57static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
     58{
     59    static uint64_t ns_count;
     60    NvmeIdNs *id_ns = &ns->id_ns;
     61    uint8_t ds;
     62    uint16_t ms;
     63    int i;
     64
     65    ns->csi = NVME_CSI_NVM;
     66    ns->status = 0x0;
     67
     68    ns->id_ns.dlfeat = 0x1;
     69
     70    /* support DULBE and I/O optimization fields */
     71    id_ns->nsfeat |= (0x4 | 0x10);
     72
     73    if (ns->params.shared) {
     74        id_ns->nmic |= NVME_NMIC_NS_SHARED;
     75    }
     76
     77    /* Substitute a missing EUI-64 by an autogenerated one */
     78    ++ns_count;
     79    if (!ns->params.eui64 && ns->params.eui64_default) {
     80        ns->params.eui64 = ns_count + NVME_EUI64_DEFAULT;
     81    }
     82
     83    /* simple copy */
     84    id_ns->mssrl = cpu_to_le16(ns->params.mssrl);
     85    id_ns->mcl = cpu_to_le32(ns->params.mcl);
     86    id_ns->msrc = ns->params.msrc;
     87    id_ns->eui64 = cpu_to_be64(ns->params.eui64);
     88
     89    ds = 31 - clz32(ns->blkconf.logical_block_size);
     90    ms = ns->params.ms;
     91
     92    id_ns->mc = NVME_ID_NS_MC_EXTENDED | NVME_ID_NS_MC_SEPARATE;
     93
     94    if (ms && ns->params.mset) {
     95        id_ns->flbas |= NVME_ID_NS_FLBAS_EXTENDED;
     96    }
     97
     98    id_ns->dpc = 0x1f;
     99    id_ns->dps = ns->params.pi;
    100    if (ns->params.pi && ns->params.pil) {
    101        id_ns->dps |= NVME_ID_NS_DPS_FIRST_EIGHT;
    102    }
    103
    104    static const NvmeLBAF lbaf[16] = {
    105        [0] = { .ds =  9           },
    106        [1] = { .ds =  9, .ms =  8 },
    107        [2] = { .ds =  9, .ms = 16 },
    108        [3] = { .ds =  9, .ms = 64 },
    109        [4] = { .ds = 12           },
    110        [5] = { .ds = 12, .ms =  8 },
    111        [6] = { .ds = 12, .ms = 16 },
    112        [7] = { .ds = 12, .ms = 64 },
    113    };
    114
    115    memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf));
    116    id_ns->nlbaf = 7;
    117
    118    for (i = 0; i <= id_ns->nlbaf; i++) {
    119        NvmeLBAF *lbaf = &id_ns->lbaf[i];
    120        if (lbaf->ds == ds) {
    121            if (lbaf->ms == ms) {
    122                id_ns->flbas |= i;
    123                goto lbaf_found;
    124            }
    125        }
    126    }
    127
    128    /* add non-standard lba format */
    129    id_ns->nlbaf++;
    130    id_ns->lbaf[id_ns->nlbaf].ds = ds;
    131    id_ns->lbaf[id_ns->nlbaf].ms = ms;
    132    id_ns->flbas |= id_ns->nlbaf;
    133
    134lbaf_found:
    135    nvme_ns_init_format(ns);
    136
    137    return 0;
    138}
    139
    140static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp)
    141{
    142    bool read_only;
    143
    144    if (!blkconf_blocksizes(&ns->blkconf, errp)) {
    145        return -1;
    146    }
    147
    148    read_only = !blk_supports_write_perm(ns->blkconf.blk);
    149    if (!blkconf_apply_backend_options(&ns->blkconf, read_only, false, errp)) {
    150        return -1;
    151    }
    152
    153    if (ns->blkconf.discard_granularity == -1) {
    154        ns->blkconf.discard_granularity =
    155            MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY);
    156    }
    157
    158    ns->size = blk_getlength(ns->blkconf.blk);
    159    if (ns->size < 0) {
    160        error_setg_errno(errp, -ns->size, "could not get blockdev size");
    161        return -1;
    162    }
    163
    164    return 0;
    165}
    166
    167static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp)
    168{
    169    uint64_t zone_size, zone_cap;
    170
    171    /* Make sure that the values of ZNS properties are sane */
    172    if (ns->params.zone_size_bs) {
    173        zone_size = ns->params.zone_size_bs;
    174    } else {
    175        zone_size = NVME_DEFAULT_ZONE_SIZE;
    176    }
    177    if (ns->params.zone_cap_bs) {
    178        zone_cap = ns->params.zone_cap_bs;
    179    } else {
    180        zone_cap = zone_size;
    181    }
    182    if (zone_cap > zone_size) {
    183        error_setg(errp, "zone capacity %"PRIu64"B exceeds "
    184                   "zone size %"PRIu64"B", zone_cap, zone_size);
    185        return -1;
    186    }
    187    if (zone_size < ns->lbasz) {
    188        error_setg(errp, "zone size %"PRIu64"B too small, "
    189                   "must be at least %zuB", zone_size, ns->lbasz);
    190        return -1;
    191    }
    192    if (zone_cap < ns->lbasz) {
    193        error_setg(errp, "zone capacity %"PRIu64"B too small, "
    194                   "must be at least %zuB", zone_cap, ns->lbasz);
    195        return -1;
    196    }
    197
    198    /*
    199     * Save the main zone geometry values to avoid
    200     * calculating them later again.
    201     */
    202    ns->zone_size = zone_size / ns->lbasz;
    203    ns->zone_capacity = zone_cap / ns->lbasz;
    204    ns->num_zones = le64_to_cpu(ns->id_ns.nsze) / ns->zone_size;
    205
    206    /* Do a few more sanity checks of ZNS properties */
    207    if (!ns->num_zones) {
    208        error_setg(errp,
    209                   "insufficient drive capacity, must be at least the size "
    210                   "of one zone (%"PRIu64"B)", zone_size);
    211        return -1;
    212    }
    213
    214    return 0;
    215}
    216
    217static void nvme_ns_zoned_init_state(NvmeNamespace *ns)
    218{
    219    uint64_t start = 0, zone_size = ns->zone_size;
    220    uint64_t capacity = ns->num_zones * zone_size;
    221    NvmeZone *zone;
    222    int i;
    223
    224    ns->zone_array = g_new0(NvmeZone, ns->num_zones);
    225    if (ns->params.zd_extension_size) {
    226        ns->zd_extensions = g_malloc0(ns->params.zd_extension_size *
    227                                      ns->num_zones);
    228    }
    229
    230    QTAILQ_INIT(&ns->exp_open_zones);
    231    QTAILQ_INIT(&ns->imp_open_zones);
    232    QTAILQ_INIT(&ns->closed_zones);
    233    QTAILQ_INIT(&ns->full_zones);
    234
    235    zone = ns->zone_array;
    236    for (i = 0; i < ns->num_zones; i++, zone++) {
    237        if (start + zone_size > capacity) {
    238            zone_size = capacity - start;
    239        }
    240        zone->d.zt = NVME_ZONE_TYPE_SEQ_WRITE;
    241        nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
    242        zone->d.za = 0;
    243        zone->d.zcap = ns->zone_capacity;
    244        zone->d.zslba = start;
    245        zone->d.wp = start;
    246        zone->w_ptr = start;
    247        start += zone_size;
    248    }
    249
    250    ns->zone_size_log2 = 0;
    251    if (is_power_of_2(ns->zone_size)) {
    252        ns->zone_size_log2 = 63 - clz64(ns->zone_size);
    253    }
    254}
    255
    256static void nvme_ns_init_zoned(NvmeNamespace *ns)
    257{
    258    NvmeIdNsZoned *id_ns_z;
    259    int i;
    260
    261    nvme_ns_zoned_init_state(ns);
    262
    263    id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned));
    264
    265    /* MAR/MOR are zeroes-based, FFFFFFFFFh means no limit */
    266    id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1);
    267    id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1);
    268    id_ns_z->zoc = 0;
    269    id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00;
    270
    271    for (i = 0; i <= ns->id_ns.nlbaf; i++) {
    272        id_ns_z->lbafe[i].zsze = cpu_to_le64(ns->zone_size);
    273        id_ns_z->lbafe[i].zdes =
    274            ns->params.zd_extension_size >> 6; /* Units of 64B */
    275    }
    276
    277    ns->csi = NVME_CSI_ZONED;
    278    ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size);
    279    ns->id_ns.ncap = ns->id_ns.nsze;
    280    ns->id_ns.nuse = ns->id_ns.ncap;
    281
    282    /*
    283     * The device uses the BDRV_BLOCK_ZERO flag to determine the "deallocated"
    284     * status of logical blocks. Since the spec defines that logical blocks
    285     * SHALL be deallocated when then zone is in the Empty or Offline states,
    286     * we can only support DULBE if the zone size is a multiple of the
    287     * calculated NPDG.
    288     */
    289    if (ns->zone_size % (ns->id_ns.npdg + 1)) {
    290        warn_report("the zone size (%"PRIu64" blocks) is not a multiple of "
    291                    "the calculated deallocation granularity (%d blocks); "
    292                    "DULBE support disabled",
    293                    ns->zone_size, ns->id_ns.npdg + 1);
    294
    295        ns->id_ns.nsfeat &= ~0x4;
    296    }
    297
    298    ns->id_ns_zoned = id_ns_z;
    299}
    300
    301static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone *zone)
    302{
    303    uint8_t state;
    304
    305    zone->w_ptr = zone->d.wp;
    306    state = nvme_get_zone_state(zone);
    307    if (zone->d.wp != zone->d.zslba ||
    308        (zone->d.za & NVME_ZA_ZD_EXT_VALID)) {
    309        if (state != NVME_ZONE_STATE_CLOSED) {
    310            trace_pci_nvme_clear_ns_close(state, zone->d.zslba);
    311            nvme_set_zone_state(zone, NVME_ZONE_STATE_CLOSED);
    312        }
    313        nvme_aor_inc_active(ns);
    314        QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry);
    315    } else {
    316        trace_pci_nvme_clear_ns_reset(state, zone->d.zslba);
    317        nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
    318    }
    319}
    320
    321/*
    322 * Close all the zones that are currently open.
    323 */
    324static void nvme_zoned_ns_shutdown(NvmeNamespace *ns)
    325{
    326    NvmeZone *zone, *next;
    327
    328    QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) {
    329        QTAILQ_REMOVE(&ns->closed_zones, zone, entry);
    330        nvme_aor_dec_active(ns);
    331        nvme_clear_zone(ns, zone);
    332    }
    333    QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) {
    334        QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
    335        nvme_aor_dec_open(ns);
    336        nvme_aor_dec_active(ns);
    337        nvme_clear_zone(ns, zone);
    338    }
    339    QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) {
    340        QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry);
    341        nvme_aor_dec_open(ns);
    342        nvme_aor_dec_active(ns);
    343        nvme_clear_zone(ns, zone);
    344    }
    345
    346    assert(ns->nr_open_zones == 0);
    347}
    348
    349static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
    350{
    351    if (!ns->blkconf.blk) {
    352        error_setg(errp, "block backend not configured");
    353        return -1;
    354    }
    355
    356    if (ns->params.pi && ns->params.ms < 8) {
    357        error_setg(errp, "at least 8 bytes of metadata required to enable "
    358                   "protection information");
    359        return -1;
    360    }
    361
    362    if (ns->params.nsid > NVME_MAX_NAMESPACES) {
    363        error_setg(errp, "invalid namespace id (must be between 0 and %d)",
    364                   NVME_MAX_NAMESPACES);
    365        return -1;
    366    }
    367
    368    if (ns->params.zoned) {
    369        if (ns->params.max_active_zones) {
    370            if (ns->params.max_open_zones > ns->params.max_active_zones) {
    371                error_setg(errp, "max_open_zones (%u) exceeds "
    372                           "max_active_zones (%u)", ns->params.max_open_zones,
    373                           ns->params.max_active_zones);
    374                return -1;
    375            }
    376
    377            if (!ns->params.max_open_zones) {
    378                ns->params.max_open_zones = ns->params.max_active_zones;
    379            }
    380        }
    381
    382        if (ns->params.zd_extension_size) {
    383            if (ns->params.zd_extension_size & 0x3f) {
    384                error_setg(errp, "zone descriptor extension size must be a "
    385                           "multiple of 64B");
    386                return -1;
    387            }
    388            if ((ns->params.zd_extension_size >> 6) > 0xff) {
    389                error_setg(errp,
    390                           "zone descriptor extension size is too large");
    391                return -1;
    392            }
    393        }
    394    }
    395
    396    return 0;
    397}
    398
    399int nvme_ns_setup(NvmeNamespace *ns, Error **errp)
    400{
    401    if (nvme_ns_check_constraints(ns, errp)) {
    402        return -1;
    403    }
    404
    405    if (nvme_ns_init_blk(ns, errp)) {
    406        return -1;
    407    }
    408
    409    if (nvme_ns_init(ns, errp)) {
    410        return -1;
    411    }
    412    if (ns->params.zoned) {
    413        if (nvme_ns_zoned_check_calc_geometry(ns, errp) != 0) {
    414            return -1;
    415        }
    416        nvme_ns_init_zoned(ns);
    417    }
    418
    419    return 0;
    420}
    421
    422void nvme_ns_drain(NvmeNamespace *ns)
    423{
    424    blk_drain(ns->blkconf.blk);
    425}
    426
    427void nvme_ns_shutdown(NvmeNamespace *ns)
    428{
    429    blk_flush(ns->blkconf.blk);
    430    if (ns->params.zoned) {
    431        nvme_zoned_ns_shutdown(ns);
    432    }
    433}
    434
    435void nvme_ns_cleanup(NvmeNamespace *ns)
    436{
    437    if (ns->params.zoned) {
    438        g_free(ns->id_ns_zoned);
    439        g_free(ns->zone_array);
    440        g_free(ns->zd_extensions);
    441    }
    442}
    443
    444static void nvme_ns_unrealize(DeviceState *dev)
    445{
    446    NvmeNamespace *ns = NVME_NS(dev);
    447
    448    nvme_ns_drain(ns);
    449    nvme_ns_shutdown(ns);
    450    nvme_ns_cleanup(ns);
    451}
    452
    453static void nvme_ns_realize(DeviceState *dev, Error **errp)
    454{
    455    NvmeNamespace *ns = NVME_NS(dev);
    456    BusState *s = qdev_get_parent_bus(dev);
    457    NvmeCtrl *n = NVME(s->parent);
    458    NvmeSubsystem *subsys = n->subsys;
    459    uint32_t nsid = ns->params.nsid;
    460    int i;
    461
    462    if (!n->subsys) {
    463        if (ns->params.detached) {
    464            error_setg(errp, "detached requires that the nvme device is "
    465                       "linked to an nvme-subsys device");
    466            return;
    467        }
    468
    469        if (ns->params.shared) {
    470            error_setg(errp, "shared requires that the nvme device is "
    471                       "linked to an nvme-subsys device");
    472            return;
    473        }
    474    } else {
    475        /*
    476         * If this namespace belongs to a subsystem (through a link on the
    477         * controller device), reparent the device.
    478         */
    479        if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
    480            return;
    481        }
    482    }
    483
    484    if (nvme_ns_setup(ns, errp)) {
    485        return;
    486    }
    487
    488    if (!nsid) {
    489        for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
    490            if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) {
    491                continue;
    492            }
    493
    494            nsid = ns->params.nsid = i;
    495            break;
    496        }
    497
    498        if (!nsid) {
    499            error_setg(errp, "no free namespace id");
    500            return;
    501        }
    502    } else {
    503        if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) {
    504            error_setg(errp, "namespace id '%d' already allocated", nsid);
    505            return;
    506        }
    507    }
    508
    509    if (subsys) {
    510        subsys->namespaces[nsid] = ns;
    511
    512        if (ns->params.detached) {
    513            return;
    514        }
    515
    516        if (ns->params.shared) {
    517            for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
    518                NvmeCtrl *ctrl = subsys->ctrls[i];
    519
    520                if (ctrl) {
    521                    nvme_attach_ns(ctrl, ns);
    522                }
    523            }
    524
    525            return;
    526        }
    527    }
    528
    529    nvme_attach_ns(n, ns);
    530}
    531
    532static Property nvme_ns_props[] = {
    533    DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
    534    DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false),
    535    DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, false),
    536    DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
    537    DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
    538    DEFINE_PROP_UINT64("eui64", NvmeNamespace, params.eui64, 0),
    539    DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0),
    540    DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0),
    541    DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0),
    542    DEFINE_PROP_UINT8("pil", NvmeNamespace, params.pil, 0),
    543    DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128),
    544    DEFINE_PROP_UINT32("mcl", NvmeNamespace, params.mcl, 128),
    545    DEFINE_PROP_UINT8("msrc", NvmeNamespace, params.msrc, 127),
    546    DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
    547    DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs,
    548                     NVME_DEFAULT_ZONE_SIZE),
    549    DEFINE_PROP_SIZE("zoned.zone_capacity", NvmeNamespace, params.zone_cap_bs,
    550                     0),
    551    DEFINE_PROP_BOOL("zoned.cross_read", NvmeNamespace,
    552                     params.cross_zone_read, false),
    553    DEFINE_PROP_UINT32("zoned.max_active", NvmeNamespace,
    554                       params.max_active_zones, 0),
    555    DEFINE_PROP_UINT32("zoned.max_open", NvmeNamespace,
    556                       params.max_open_zones, 0),
    557    DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace,
    558                       params.zd_extension_size, 0),
    559    DEFINE_PROP_BOOL("eui64-default", NvmeNamespace, params.eui64_default,
    560                     true),
    561    DEFINE_PROP_END_OF_LIST(),
    562};
    563
    564static void nvme_ns_class_init(ObjectClass *oc, void *data)
    565{
    566    DeviceClass *dc = DEVICE_CLASS(oc);
    567
    568    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
    569
    570    dc->bus_type = TYPE_NVME_BUS;
    571    dc->realize = nvme_ns_realize;
    572    dc->unrealize = nvme_ns_unrealize;
    573    device_class_set_props(dc, nvme_ns_props);
    574    dc->desc = "Virtual NVMe namespace";
    575}
    576
    577static void nvme_ns_instance_init(Object *obj)
    578{
    579    NvmeNamespace *ns = NVME_NS(obj);
    580    char *bootindex = g_strdup_printf("/namespace@%d,0", ns->params.nsid);
    581
    582    device_add_bootindex_property(obj, &ns->bootindex, "bootindex",
    583                                  bootindex, DEVICE(obj));
    584
    585    g_free(bootindex);
    586}
    587
    588static const TypeInfo nvme_ns_info = {
    589    .name = TYPE_NVME_NS,
    590    .parent = TYPE_DEVICE,
    591    .class_init = nvme_ns_class_init,
    592    .instance_size = sizeof(NvmeNamespace),
    593    .instance_init = nvme_ns_instance_init,
    594};
    595
    596static void nvme_ns_register_types(void)
    597{
    598    type_register_static(&nvme_ns_info);
    599}
    600
    601type_init(nvme_ns_register_types)