nvme.h (16440B)
1/* 2 * QEMU NVM Express 3 * 4 * Copyright (c) 2012 Intel Corporation 5 * Copyright (c) 2021 Minwoo Im 6 * Copyright (c) 2021 Samsung Electronics Co., Ltd. 7 * 8 * Authors: 9 * Keith Busch <kbusch@kernel.org> 10 * Klaus Jensen <k.jensen@samsung.com> 11 * Gollu Appalanaidu <anaidu.gollu@samsung.com> 12 * Dmitry Fomichev <dmitry.fomichev@wdc.com> 13 * Minwoo Im <minwoo.im.dev@gmail.com> 14 * 15 * This code is licensed under the GNU GPL v2 or later. 16 */ 17 18#ifndef HW_NVME_INTERNAL_H 19#define HW_NVME_INTERNAL_H 20 21#include "qemu/uuid.h" 22#include "hw/pci/pci.h" 23#include "hw/block/block.h" 24 25#include "block/nvme.h" 26 27#define NVME_MAX_CONTROLLERS 32 28#define NVME_MAX_NAMESPACES 256 29#define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000) 30 31QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1); 32 33typedef struct NvmeCtrl NvmeCtrl; 34typedef struct NvmeNamespace NvmeNamespace; 35 36#define TYPE_NVME_BUS "nvme-bus" 37OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS) 38 39typedef struct NvmeBus { 40 BusState parent_bus; 41} NvmeBus; 42 43#define TYPE_NVME_SUBSYS "nvme-subsys" 44#define NVME_SUBSYS(obj) \ 45 OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) 46 47typedef struct NvmeSubsystem { 48 DeviceState parent_obj; 49 NvmeBus bus; 50 uint8_t subnqn[256]; 51 52 NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS]; 53 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; 54 55 struct { 56 char *nqn; 57 } params; 58} NvmeSubsystem; 59 60int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp); 61void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n); 62 63static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, 64 uint32_t cntlid) 65{ 66 if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) { 67 return NULL; 68 } 69 70 return subsys->ctrls[cntlid]; 71} 72 73static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys, 74 uint32_t nsid) 75{ 76 if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) { 77 return NULL; 78 } 79 80 return subsys->namespaces[nsid]; 81} 82 83#define TYPE_NVME_NS "nvme-ns" 84#define NVME_NS(obj) \ 85 OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS) 86 87typedef struct NvmeZone { 88 NvmeZoneDescr d; 89 uint64_t w_ptr; 90 QTAILQ_ENTRY(NvmeZone) entry; 91} NvmeZone; 92 93typedef struct NvmeNamespaceParams { 94 bool detached; 95 bool shared; 96 uint32_t nsid; 97 QemuUUID uuid; 98 uint64_t eui64; 99 bool eui64_default; 100 101 uint16_t ms; 102 uint8_t mset; 103 uint8_t pi; 104 uint8_t pil; 105 106 uint16_t mssrl; 107 uint32_t mcl; 108 uint8_t msrc; 109 110 bool zoned; 111 bool cross_zone_read; 112 uint64_t zone_size_bs; 113 uint64_t zone_cap_bs; 114 uint32_t max_active_zones; 115 uint32_t max_open_zones; 116 uint32_t zd_extension_size; 117} NvmeNamespaceParams; 118 119typedef struct NvmeNamespace { 120 DeviceState parent_obj; 121 BlockConf blkconf; 122 int32_t bootindex; 123 int64_t size; 124 int64_t moff; 125 NvmeIdNs id_ns; 126 NvmeLBAF lbaf; 127 size_t lbasz; 128 const uint32_t *iocs; 129 uint8_t csi; 130 uint16_t status; 131 int attached; 132 133 QTAILQ_ENTRY(NvmeNamespace) entry; 134 135 NvmeIdNsZoned *id_ns_zoned; 136 NvmeZone *zone_array; 137 QTAILQ_HEAD(, NvmeZone) exp_open_zones; 138 QTAILQ_HEAD(, NvmeZone) imp_open_zones; 139 QTAILQ_HEAD(, NvmeZone) closed_zones; 140 QTAILQ_HEAD(, NvmeZone) full_zones; 141 uint32_t num_zones; 142 uint64_t zone_size; 143 uint64_t zone_capacity; 144 uint32_t zone_size_log2; 145 uint8_t *zd_extensions; 146 int32_t nr_open_zones; 147 int32_t nr_active_zones; 148 149 NvmeNamespaceParams params; 150 151 struct { 152 uint32_t err_rec; 153 } features; 154} NvmeNamespace; 155 156static inline uint32_t nvme_nsid(NvmeNamespace *ns) 157{ 158 if (ns) { 159 return ns->params.nsid; 160 } 161 162 return 0; 163} 164 165static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba) 166{ 167 return lba << ns->lbaf.ds; 168} 169 170static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba) 171{ 172 return ns->lbaf.ms * lba; 173} 174 175static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba) 176{ 177 return ns->moff + nvme_m2b(ns, lba); 178} 179 180static inline bool nvme_ns_ext(NvmeNamespace *ns) 181{ 182 return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas); 183} 184 185static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone) 186{ 187 return zone->d.zs >> 4; 188} 189 190static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state) 191{ 192 zone->d.zs = state << 4; 193} 194 195static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone) 196{ 197 return zone->d.zslba + ns->zone_size; 198} 199 200static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone) 201{ 202 return zone->d.zslba + zone->d.zcap; 203} 204 205static inline bool nvme_wp_is_valid(NvmeZone *zone) 206{ 207 uint8_t st = nvme_get_zone_state(zone); 208 209 return st != NVME_ZONE_STATE_FULL && 210 st != NVME_ZONE_STATE_READ_ONLY && 211 st != NVME_ZONE_STATE_OFFLINE; 212} 213 214static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns, 215 uint32_t zone_idx) 216{ 217 return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size]; 218} 219 220static inline void nvme_aor_inc_open(NvmeNamespace *ns) 221{ 222 assert(ns->nr_open_zones >= 0); 223 if (ns->params.max_open_zones) { 224 ns->nr_open_zones++; 225 assert(ns->nr_open_zones <= ns->params.max_open_zones); 226 } 227} 228 229static inline void nvme_aor_dec_open(NvmeNamespace *ns) 230{ 231 if (ns->params.max_open_zones) { 232 assert(ns->nr_open_zones > 0); 233 ns->nr_open_zones--; 234 } 235 assert(ns->nr_open_zones >= 0); 236} 237 238static inline void nvme_aor_inc_active(NvmeNamespace *ns) 239{ 240 assert(ns->nr_active_zones >= 0); 241 if (ns->params.max_active_zones) { 242 ns->nr_active_zones++; 243 assert(ns->nr_active_zones <= ns->params.max_active_zones); 244 } 245} 246 247static inline void nvme_aor_dec_active(NvmeNamespace *ns) 248{ 249 if (ns->params.max_active_zones) { 250 assert(ns->nr_active_zones > 0); 251 ns->nr_active_zones--; 252 assert(ns->nr_active_zones >= ns->nr_open_zones); 253 } 254 assert(ns->nr_active_zones >= 0); 255} 256 257void nvme_ns_init_format(NvmeNamespace *ns); 258int nvme_ns_setup(NvmeNamespace *ns, Error **errp); 259void nvme_ns_drain(NvmeNamespace *ns); 260void nvme_ns_shutdown(NvmeNamespace *ns); 261void nvme_ns_cleanup(NvmeNamespace *ns); 262 263typedef struct NvmeAsyncEvent { 264 QTAILQ_ENTRY(NvmeAsyncEvent) entry; 265 NvmeAerResult result; 266} NvmeAsyncEvent; 267 268enum { 269 NVME_SG_ALLOC = 1 << 0, 270 NVME_SG_DMA = 1 << 1, 271}; 272 273typedef struct NvmeSg { 274 int flags; 275 276 union { 277 QEMUSGList qsg; 278 QEMUIOVector iov; 279 }; 280} NvmeSg; 281 282typedef enum NvmeTxDirection { 283 NVME_TX_DIRECTION_TO_DEVICE = 0, 284 NVME_TX_DIRECTION_FROM_DEVICE = 1, 285} NvmeTxDirection; 286 287typedef struct NvmeRequest { 288 struct NvmeSQueue *sq; 289 struct NvmeNamespace *ns; 290 BlockAIOCB *aiocb; 291 uint16_t status; 292 void *opaque; 293 NvmeCqe cqe; 294 NvmeCmd cmd; 295 BlockAcctCookie acct; 296 NvmeSg sg; 297 QTAILQ_ENTRY(NvmeRequest)entry; 298} NvmeRequest; 299 300typedef struct NvmeBounceContext { 301 NvmeRequest *req; 302 303 struct { 304 QEMUIOVector iov; 305 uint8_t *bounce; 306 } data, mdata; 307} NvmeBounceContext; 308 309static inline const char *nvme_adm_opc_str(uint8_t opc) 310{ 311 switch (opc) { 312 case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ"; 313 case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ"; 314 case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE"; 315 case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ"; 316 case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ"; 317 case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY"; 318 case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT"; 319 case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES"; 320 case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; 321 case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; 322 case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT"; 323 case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; 324 default: return "NVME_ADM_CMD_UNKNOWN"; 325 } 326} 327 328static inline const char *nvme_io_opc_str(uint8_t opc) 329{ 330 switch (opc) { 331 case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH"; 332 case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE"; 333 case NVME_CMD_READ: return "NVME_NVM_CMD_READ"; 334 case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE"; 335 case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES"; 336 case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM"; 337 case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY"; 338 case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY"; 339 case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND"; 340 case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV"; 341 case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND"; 342 default: return "NVME_NVM_CMD_UNKNOWN"; 343 } 344} 345 346typedef struct NvmeSQueue { 347 struct NvmeCtrl *ctrl; 348 uint16_t sqid; 349 uint16_t cqid; 350 uint32_t head; 351 uint32_t tail; 352 uint32_t size; 353 uint64_t dma_addr; 354 QEMUTimer *timer; 355 NvmeRequest *io_req; 356 QTAILQ_HEAD(, NvmeRequest) req_list; 357 QTAILQ_HEAD(, NvmeRequest) out_req_list; 358 QTAILQ_ENTRY(NvmeSQueue) entry; 359} NvmeSQueue; 360 361typedef struct NvmeCQueue { 362 struct NvmeCtrl *ctrl; 363 uint8_t phase; 364 uint16_t cqid; 365 uint16_t irq_enabled; 366 uint32_t head; 367 uint32_t tail; 368 uint32_t vector; 369 uint32_t size; 370 uint64_t dma_addr; 371 QEMUTimer *timer; 372 QTAILQ_HEAD(, NvmeSQueue) sq_list; 373 QTAILQ_HEAD(, NvmeRequest) req_list; 374} NvmeCQueue; 375 376#define TYPE_NVME "nvme" 377#define NVME(obj) \ 378 OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) 379 380typedef struct NvmeParams { 381 char *serial; 382 uint32_t num_queues; /* deprecated since 5.1 */ 383 uint32_t max_ioqpairs; 384 uint16_t msix_qsize; 385 uint32_t cmb_size_mb; 386 uint8_t aerl; 387 uint32_t aer_max_queued; 388 uint8_t mdts; 389 uint8_t vsl; 390 bool use_intel_id; 391 uint8_t zasl; 392 bool auto_transition_zones; 393 bool legacy_cmb; 394} NvmeParams; 395 396typedef struct NvmeCtrl { 397 PCIDevice parent_obj; 398 MemoryRegion bar0; 399 MemoryRegion iomem; 400 NvmeBar bar; 401 NvmeParams params; 402 NvmeBus bus; 403 404 uint16_t cntlid; 405 bool qs_created; 406 uint32_t page_size; 407 uint16_t page_bits; 408 uint16_t max_prp_ents; 409 uint16_t cqe_size; 410 uint16_t sqe_size; 411 uint32_t reg_size; 412 uint32_t max_q_ents; 413 uint8_t outstanding_aers; 414 uint32_t irq_status; 415 int cq_pending; 416 uint64_t host_timestamp; /* Timestamp sent by the host */ 417 uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ 418 uint64_t starttime_ms; 419 uint16_t temperature; 420 uint8_t smart_critical_warning; 421 422 struct { 423 MemoryRegion mem; 424 uint8_t *buf; 425 bool cmse; 426 hwaddr cba; 427 } cmb; 428 429 struct { 430 HostMemoryBackend *dev; 431 bool cmse; 432 hwaddr cba; 433 } pmr; 434 435 uint8_t aer_mask; 436 NvmeRequest **aer_reqs; 437 QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue; 438 int aer_queued; 439 440 uint32_t dmrsl; 441 442 /* Namespace ID is started with 1 so bitmap should be 1-based */ 443#define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1) 444 DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE); 445 446 NvmeSubsystem *subsys; 447 448 NvmeNamespace namespace; 449 NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; 450 NvmeSQueue **sq; 451 NvmeCQueue **cq; 452 NvmeSQueue admin_sq; 453 NvmeCQueue admin_cq; 454 NvmeIdCtrl id_ctrl; 455 456 struct { 457 struct { 458 uint16_t temp_thresh_hi; 459 uint16_t temp_thresh_low; 460 }; 461 uint32_t async_config; 462 } features; 463} NvmeCtrl; 464 465static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid) 466{ 467 if (!nsid || nsid > NVME_MAX_NAMESPACES) { 468 return NULL; 469 } 470 471 return n->namespaces[nsid]; 472} 473 474static inline NvmeCQueue *nvme_cq(NvmeRequest *req) 475{ 476 NvmeSQueue *sq = req->sq; 477 NvmeCtrl *n = sq->ctrl; 478 479 return n->cq[sq->cqid]; 480} 481 482static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req) 483{ 484 NvmeSQueue *sq = req->sq; 485 return sq->ctrl; 486} 487 488static inline uint16_t nvme_cid(NvmeRequest *req) 489{ 490 if (!req) { 491 return 0xffff; 492 } 493 494 return le16_to_cpu(req->cqe.cid); 495} 496 497void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns); 498uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, 499 NvmeTxDirection dir, NvmeRequest *req); 500uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, 501 NvmeTxDirection dir, NvmeRequest *req); 502void nvme_rw_complete_cb(void *opaque, int ret); 503uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, 504 NvmeCmd *cmd); 505 506/* from Linux kernel (crypto/crct10dif_common.c) */ 507static const uint16_t t10_dif_crc_table[256] = { 508 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, 509 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, 510 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, 511 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, 512 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, 513 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, 514 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, 515 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, 516 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, 517 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, 518 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, 519 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, 520 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, 521 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, 522 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, 523 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, 524 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, 525 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, 526 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, 527 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, 528 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, 529 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, 530 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, 531 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, 532 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, 533 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, 534 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, 535 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, 536 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, 537 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, 538 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, 539 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 540}; 541 542uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba, 543 uint32_t reftag); 544uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, 545 uint64_t slba); 546void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, 547 uint8_t *mbuf, size_t mlen, uint16_t apptag, 548 uint32_t *reftag); 549uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, 550 uint8_t *mbuf, size_t mlen, uint8_t prinfo, 551 uint64_t slba, uint16_t apptag, 552 uint16_t appmask, uint32_t *reftag); 553uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); 554 555 556#endif /* HW_NVME_INTERNAL_H */