passthru.c (17229B)
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * NVMe Over Fabrics Target Passthrough command implementation. 4 * 5 * Copyright (c) 2017-2018 Western Digital Corporation or its 6 * affiliates. 7 * Copyright (c) 2019-2020, Eideticom Inc. 8 * 9 */ 10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11#include <linux/module.h> 12 13#include "../host/nvme.h" 14#include "nvmet.h" 15 16MODULE_IMPORT_NS(NVME_TARGET_PASSTHRU); 17 18/* 19 * xarray to maintain one passthru subsystem per nvme controller. 20 */ 21static DEFINE_XARRAY(passthru_subsystems); 22 23void nvmet_passthrough_override_cap(struct nvmet_ctrl *ctrl) 24{ 25 /* 26 * Multiple command set support can only be declared if the underlying 27 * controller actually supports it. 28 */ 29 if (!nvme_multi_css(ctrl->subsys->passthru_ctrl)) 30 ctrl->cap &= ~(1ULL << 43); 31} 32 33static u16 nvmet_passthru_override_id_descs(struct nvmet_req *req) 34{ 35 struct nvmet_ctrl *ctrl = req->sq->ctrl; 36 u16 status = NVME_SC_SUCCESS; 37 int pos, len; 38 bool csi_seen = false; 39 void *data; 40 u8 csi; 41 42 if (!ctrl->subsys->clear_ids) 43 return status; 44 45 data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); 46 if (!data) 47 return NVME_SC_INTERNAL; 48 49 status = nvmet_copy_from_sgl(req, 0, data, NVME_IDENTIFY_DATA_SIZE); 50 if (status) 51 goto out_free; 52 53 for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) { 54 struct nvme_ns_id_desc *cur = data + pos; 55 56 if (cur->nidl == 0) 57 break; 58 if (cur->nidt == NVME_NIDT_CSI) { 59 memcpy(&csi, cur + 1, NVME_NIDT_CSI_LEN); 60 csi_seen = true; 61 break; 62 } 63 len = sizeof(struct nvme_ns_id_desc) + cur->nidl; 64 } 65 66 memset(data, 0, NVME_IDENTIFY_DATA_SIZE); 67 if (csi_seen) { 68 struct nvme_ns_id_desc *cur = data; 69 70 cur->nidt = NVME_NIDT_CSI; 71 cur->nidl = NVME_NIDT_CSI_LEN; 72 memcpy(cur + 1, &csi, NVME_NIDT_CSI_LEN); 73 } 74 status = nvmet_copy_to_sgl(req, 0, data, NVME_IDENTIFY_DATA_SIZE); 75out_free: 76 kfree(data); 77 return status; 78} 79 80static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) 81{ 82 struct nvmet_ctrl *ctrl = req->sq->ctrl; 83 struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl; 84 u16 status = NVME_SC_SUCCESS; 85 struct nvme_id_ctrl *id; 86 unsigned int max_hw_sectors; 87 int page_shift; 88 89 id = kzalloc(sizeof(*id), GFP_KERNEL); 90 if (!id) 91 return NVME_SC_INTERNAL; 92 93 status = nvmet_copy_from_sgl(req, 0, id, sizeof(*id)); 94 if (status) 95 goto out_free; 96 97 id->cntlid = cpu_to_le16(ctrl->cntlid); 98 id->ver = cpu_to_le32(ctrl->subsys->ver); 99 100 /* 101 * The passthru NVMe driver may have a limit on the number of segments 102 * which depends on the host's memory fragementation. To solve this, 103 * ensure mdts is limited to the pages equal to the number of segments. 104 */ 105 max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9), 106 pctrl->max_hw_sectors); 107 108 /* 109 * nvmet_passthru_map_sg is limitted to using a single bio so limit 110 * the mdts based on BIO_MAX_VECS as well 111 */ 112 max_hw_sectors = min_not_zero(BIO_MAX_VECS << (PAGE_SHIFT - 9), 113 max_hw_sectors); 114 115 page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12; 116 117 id->mdts = ilog2(max_hw_sectors) + 9 - page_shift; 118 119 id->acl = 3; 120 /* 121 * We export aerl limit for the fabrics controller, update this when 122 * passthru based aerl support is added. 123 */ 124 id->aerl = NVMET_ASYNC_EVENTS - 1; 125 126 /* emulate kas as most of the PCIe ctrl don't have a support for kas */ 127 id->kas = cpu_to_le16(NVMET_KAS); 128 129 /* don't support host memory buffer */ 130 id->hmpre = 0; 131 id->hmmin = 0; 132 133 id->sqes = min_t(__u8, ((0x6 << 4) | 0x6), id->sqes); 134 id->cqes = min_t(__u8, ((0x4 << 4) | 0x4), id->cqes); 135 id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); 136 137 /* don't support fuse commands */ 138 id->fuses = 0; 139 140 id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */ 141 if (ctrl->ops->flags & NVMF_KEYED_SGLS) 142 id->sgls |= cpu_to_le32(1 << 2); 143 if (req->port->inline_data_size) 144 id->sgls |= cpu_to_le32(1 << 20); 145 146 /* 147 * When passthru controller is setup using nvme-loop transport it will 148 * export the passthru ctrl subsysnqn (PCIe NVMe ctrl) and will fail in 149 * the nvme/host/core.c in the nvme_init_subsystem()->nvme_active_ctrl() 150 * code path with duplicate ctr subsynqn. In order to prevent that we 151 * mask the passthru-ctrl subsysnqn with the target ctrl subsysnqn. 152 */ 153 memcpy(id->subnqn, ctrl->subsysnqn, sizeof(id->subnqn)); 154 155 /* use fabric id-ctrl values */ 156 id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) + 157 req->port->inline_data_size) / 16); 158 id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16); 159 160 id->msdbd = ctrl->ops->msdbd; 161 162 /* Support multipath connections with fabrics */ 163 id->cmic |= 1 << 1; 164 165 /* Disable reservations, see nvmet_parse_passthru_io_cmd() */ 166 id->oncs &= cpu_to_le16(~NVME_CTRL_ONCS_RESERVATIONS); 167 168 status = nvmet_copy_to_sgl(req, 0, id, sizeof(struct nvme_id_ctrl)); 169 170out_free: 171 kfree(id); 172 return status; 173} 174 175static u16 nvmet_passthru_override_id_ns(struct nvmet_req *req) 176{ 177 u16 status = NVME_SC_SUCCESS; 178 struct nvme_id_ns *id; 179 int i; 180 181 id = kzalloc(sizeof(*id), GFP_KERNEL); 182 if (!id) 183 return NVME_SC_INTERNAL; 184 185 status = nvmet_copy_from_sgl(req, 0, id, sizeof(struct nvme_id_ns)); 186 if (status) 187 goto out_free; 188 189 for (i = 0; i < (id->nlbaf + 1); i++) 190 if (id->lbaf[i].ms) 191 memset(&id->lbaf[i], 0, sizeof(id->lbaf[i])); 192 193 id->flbas = id->flbas & ~(1 << 4); 194 195 /* 196 * Presently the NVMEof target code does not support sending 197 * metadata, so we must disable it here. This should be updated 198 * once target starts supporting metadata. 199 */ 200 id->mc = 0; 201 202 if (req->sq->ctrl->subsys->clear_ids) { 203 memset(id->nguid, 0, NVME_NIDT_NGUID_LEN); 204 memset(id->eui64, 0, NVME_NIDT_EUI64_LEN); 205 } 206 207 status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); 208 209out_free: 210 kfree(id); 211 return status; 212} 213 214static void nvmet_passthru_execute_cmd_work(struct work_struct *w) 215{ 216 struct nvmet_req *req = container_of(w, struct nvmet_req, p.work); 217 struct request *rq = req->p.rq; 218 int status; 219 220 status = nvme_execute_passthru_rq(rq); 221 222 if (status == NVME_SC_SUCCESS && 223 req->cmd->common.opcode == nvme_admin_identify) { 224 switch (req->cmd->identify.cns) { 225 case NVME_ID_CNS_CTRL: 226 nvmet_passthru_override_id_ctrl(req); 227 break; 228 case NVME_ID_CNS_NS: 229 nvmet_passthru_override_id_ns(req); 230 break; 231 case NVME_ID_CNS_NS_DESC_LIST: 232 nvmet_passthru_override_id_descs(req); 233 break; 234 } 235 } else if (status < 0) 236 status = NVME_SC_INTERNAL; 237 238 req->cqe->result = nvme_req(rq)->result; 239 nvmet_req_complete(req, status); 240 blk_mq_free_request(rq); 241} 242 243static void nvmet_passthru_req_done(struct request *rq, 244 blk_status_t blk_status) 245{ 246 struct nvmet_req *req = rq->end_io_data; 247 248 req->cqe->result = nvme_req(rq)->result; 249 nvmet_req_complete(req, nvme_req(rq)->status); 250 blk_mq_free_request(rq); 251} 252 253static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) 254{ 255 struct scatterlist *sg; 256 struct bio *bio; 257 int i; 258 259 if (req->sg_cnt > BIO_MAX_VECS) 260 return -EINVAL; 261 262 if (nvmet_use_inline_bvec(req)) { 263 bio = &req->p.inline_bio; 264 bio_init(bio, NULL, req->inline_bvec, 265 ARRAY_SIZE(req->inline_bvec), req_op(rq)); 266 } else { 267 bio = bio_alloc(NULL, bio_max_segs(req->sg_cnt), req_op(rq), 268 GFP_KERNEL); 269 bio->bi_end_io = bio_put; 270 } 271 272 for_each_sg(req->sg, sg, req->sg_cnt, i) { 273 if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length, 274 sg->offset) < sg->length) { 275 nvmet_req_bio_put(req, bio); 276 return -EINVAL; 277 } 278 } 279 280 blk_rq_bio_prep(rq, bio, req->sg_cnt); 281 282 return 0; 283} 284 285static void nvmet_passthru_execute_cmd(struct nvmet_req *req) 286{ 287 struct nvme_ctrl *ctrl = nvmet_req_subsys(req)->passthru_ctrl; 288 struct request_queue *q = ctrl->admin_q; 289 struct nvme_ns *ns = NULL; 290 struct request *rq = NULL; 291 unsigned int timeout; 292 u32 effects; 293 u16 status; 294 int ret; 295 296 if (likely(req->sq->qid != 0)) { 297 u32 nsid = le32_to_cpu(req->cmd->common.nsid); 298 299 ns = nvme_find_get_ns(ctrl, nsid); 300 if (unlikely(!ns)) { 301 pr_err("failed to get passthru ns nsid:%u\n", nsid); 302 status = NVME_SC_INVALID_NS | NVME_SC_DNR; 303 goto out; 304 } 305 306 q = ns->queue; 307 timeout = nvmet_req_subsys(req)->io_timeout; 308 } else { 309 timeout = nvmet_req_subsys(req)->admin_timeout; 310 } 311 312 rq = blk_mq_alloc_request(q, nvme_req_op(req->cmd), 0); 313 if (IS_ERR(rq)) { 314 status = NVME_SC_INTERNAL; 315 goto out_put_ns; 316 } 317 nvme_init_request(rq, req->cmd); 318 319 if (timeout) 320 rq->timeout = timeout; 321 322 if (req->sg_cnt) { 323 ret = nvmet_passthru_map_sg(req, rq); 324 if (unlikely(ret)) { 325 status = NVME_SC_INTERNAL; 326 goto out_put_req; 327 } 328 } 329 330 /* 331 * If there are effects for the command we are about to execute, or 332 * an end_req function we need to use nvme_execute_passthru_rq() 333 * synchronously in a work item seeing the end_req function and 334 * nvme_passthru_end() can't be called in the request done callback 335 * which is typically in interrupt context. 336 */ 337 effects = nvme_command_effects(ctrl, ns, req->cmd->common.opcode); 338 if (req->p.use_workqueue || effects) { 339 INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work); 340 req->p.rq = rq; 341 queue_work(nvmet_wq, &req->p.work); 342 } else { 343 rq->end_io = nvmet_passthru_req_done; 344 rq->end_io_data = req; 345 blk_execute_rq_nowait(rq, false); 346 } 347 348 if (ns) 349 nvme_put_ns(ns); 350 351 return; 352 353out_put_req: 354 blk_mq_free_request(rq); 355out_put_ns: 356 if (ns) 357 nvme_put_ns(ns); 358out: 359 nvmet_req_complete(req, status); 360} 361 362/* 363 * We need to emulate set host behaviour to ensure that any requested 364 * behaviour of the target's host matches the requested behaviour 365 * of the device's host and fail otherwise. 366 */ 367static void nvmet_passthru_set_host_behaviour(struct nvmet_req *req) 368{ 369 struct nvme_ctrl *ctrl = nvmet_req_subsys(req)->passthru_ctrl; 370 struct nvme_feat_host_behavior *host; 371 u16 status = NVME_SC_INTERNAL; 372 int ret; 373 374 host = kzalloc(sizeof(*host) * 2, GFP_KERNEL); 375 if (!host) 376 goto out_complete_req; 377 378 ret = nvme_get_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0, 379 host, sizeof(*host), NULL); 380 if (ret) 381 goto out_free_host; 382 383 status = nvmet_copy_from_sgl(req, 0, &host[1], sizeof(*host)); 384 if (status) 385 goto out_free_host; 386 387 if (memcmp(&host[0], &host[1], sizeof(host[0]))) { 388 pr_warn("target host has requested different behaviour from the local host\n"); 389 status = NVME_SC_INTERNAL; 390 } 391 392out_free_host: 393 kfree(host); 394out_complete_req: 395 nvmet_req_complete(req, status); 396} 397 398static u16 nvmet_setup_passthru_command(struct nvmet_req *req) 399{ 400 req->p.use_workqueue = false; 401 req->execute = nvmet_passthru_execute_cmd; 402 return NVME_SC_SUCCESS; 403} 404 405u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req) 406{ 407 /* Reject any commands with non-sgl flags set (ie. fused commands) */ 408 if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL) 409 return NVME_SC_INVALID_FIELD; 410 411 switch (req->cmd->common.opcode) { 412 case nvme_cmd_resv_register: 413 case nvme_cmd_resv_report: 414 case nvme_cmd_resv_acquire: 415 case nvme_cmd_resv_release: 416 /* 417 * Reservations cannot be supported properly because the 418 * underlying device has no way of differentiating different 419 * hosts that connect via fabrics. This could potentially be 420 * emulated in the future if regular targets grow support for 421 * this feature. 422 */ 423 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 424 } 425 426 return nvmet_setup_passthru_command(req); 427} 428 429/* 430 * Only features that are emulated or specifically allowed in the list are 431 * passed down to the controller. This function implements the allow list for 432 * both get and set features. 433 */ 434static u16 nvmet_passthru_get_set_features(struct nvmet_req *req) 435{ 436 switch (le32_to_cpu(req->cmd->features.fid)) { 437 case NVME_FEAT_ARBITRATION: 438 case NVME_FEAT_POWER_MGMT: 439 case NVME_FEAT_LBA_RANGE: 440 case NVME_FEAT_TEMP_THRESH: 441 case NVME_FEAT_ERR_RECOVERY: 442 case NVME_FEAT_VOLATILE_WC: 443 case NVME_FEAT_WRITE_ATOMIC: 444 case NVME_FEAT_AUTO_PST: 445 case NVME_FEAT_TIMESTAMP: 446 case NVME_FEAT_HCTM: 447 case NVME_FEAT_NOPSC: 448 case NVME_FEAT_RRL: 449 case NVME_FEAT_PLM_CONFIG: 450 case NVME_FEAT_PLM_WINDOW: 451 case NVME_FEAT_HOST_BEHAVIOR: 452 case NVME_FEAT_SANITIZE: 453 case NVME_FEAT_VENDOR_START ... NVME_FEAT_VENDOR_END: 454 return nvmet_setup_passthru_command(req); 455 456 case NVME_FEAT_ASYNC_EVENT: 457 /* There is no support for forwarding ASYNC events */ 458 case NVME_FEAT_IRQ_COALESCE: 459 case NVME_FEAT_IRQ_CONFIG: 460 /* The IRQ settings will not apply to the target controller */ 461 case NVME_FEAT_HOST_MEM_BUF: 462 /* 463 * Any HMB that's set will not be passed through and will 464 * not work as expected 465 */ 466 case NVME_FEAT_SW_PROGRESS: 467 /* 468 * The Pre-Boot Software Load Count doesn't make much 469 * sense for a target to export 470 */ 471 case NVME_FEAT_RESV_MASK: 472 case NVME_FEAT_RESV_PERSIST: 473 /* No reservations, see nvmet_parse_passthru_io_cmd() */ 474 default: 475 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 476 } 477} 478 479u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req) 480{ 481 /* Reject any commands with non-sgl flags set (ie. fused commands) */ 482 if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL) 483 return NVME_SC_INVALID_FIELD; 484 485 /* 486 * Passthru all vendor specific commands 487 */ 488 if (req->cmd->common.opcode >= nvme_admin_vendor_start) 489 return nvmet_setup_passthru_command(req); 490 491 switch (req->cmd->common.opcode) { 492 case nvme_admin_async_event: 493 req->execute = nvmet_execute_async_event; 494 return NVME_SC_SUCCESS; 495 case nvme_admin_keep_alive: 496 /* 497 * Most PCIe ctrls don't support keep alive cmd, we route keep 498 * alive to the non-passthru mode. In future please change this 499 * code when PCIe ctrls with keep alive support available. 500 */ 501 req->execute = nvmet_execute_keep_alive; 502 return NVME_SC_SUCCESS; 503 case nvme_admin_set_features: 504 switch (le32_to_cpu(req->cmd->features.fid)) { 505 case NVME_FEAT_ASYNC_EVENT: 506 case NVME_FEAT_KATO: 507 case NVME_FEAT_NUM_QUEUES: 508 case NVME_FEAT_HOST_ID: 509 req->execute = nvmet_execute_set_features; 510 return NVME_SC_SUCCESS; 511 case NVME_FEAT_HOST_BEHAVIOR: 512 req->execute = nvmet_passthru_set_host_behaviour; 513 return NVME_SC_SUCCESS; 514 default: 515 return nvmet_passthru_get_set_features(req); 516 } 517 break; 518 case nvme_admin_get_features: 519 switch (le32_to_cpu(req->cmd->features.fid)) { 520 case NVME_FEAT_ASYNC_EVENT: 521 case NVME_FEAT_KATO: 522 case NVME_FEAT_NUM_QUEUES: 523 case NVME_FEAT_HOST_ID: 524 req->execute = nvmet_execute_get_features; 525 return NVME_SC_SUCCESS; 526 default: 527 return nvmet_passthru_get_set_features(req); 528 } 529 break; 530 case nvme_admin_identify: 531 switch (req->cmd->identify.cns) { 532 case NVME_ID_CNS_CTRL: 533 req->execute = nvmet_passthru_execute_cmd; 534 req->p.use_workqueue = true; 535 return NVME_SC_SUCCESS; 536 case NVME_ID_CNS_CS_CTRL: 537 switch (req->cmd->identify.csi) { 538 case NVME_CSI_ZNS: 539 req->execute = nvmet_passthru_execute_cmd; 540 req->p.use_workqueue = true; 541 return NVME_SC_SUCCESS; 542 } 543 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 544 case NVME_ID_CNS_NS: 545 req->execute = nvmet_passthru_execute_cmd; 546 req->p.use_workqueue = true; 547 return NVME_SC_SUCCESS; 548 case NVME_ID_CNS_CS_NS: 549 switch (req->cmd->identify.csi) { 550 case NVME_CSI_ZNS: 551 req->execute = nvmet_passthru_execute_cmd; 552 req->p.use_workqueue = true; 553 return NVME_SC_SUCCESS; 554 } 555 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 556 default: 557 return nvmet_setup_passthru_command(req); 558 } 559 case nvme_admin_get_log_page: 560 return nvmet_setup_passthru_command(req); 561 default: 562 /* Reject commands not in the allowlist above */ 563 return nvmet_report_invalid_opcode(req); 564 } 565} 566 567int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys) 568{ 569 struct nvme_ctrl *ctrl; 570 struct file *file; 571 int ret = -EINVAL; 572 void *old; 573 574 mutex_lock(&subsys->lock); 575 if (!subsys->passthru_ctrl_path) 576 goto out_unlock; 577 if (subsys->passthru_ctrl) 578 goto out_unlock; 579 580 if (subsys->nr_namespaces) { 581 pr_info("cannot enable both passthru and regular namespaces for a single subsystem"); 582 goto out_unlock; 583 } 584 585 file = filp_open(subsys->passthru_ctrl_path, O_RDWR, 0); 586 if (IS_ERR(file)) { 587 ret = PTR_ERR(file); 588 goto out_unlock; 589 } 590 591 ctrl = nvme_ctrl_from_file(file); 592 if (!ctrl) { 593 pr_err("failed to open nvme controller %s\n", 594 subsys->passthru_ctrl_path); 595 596 goto out_put_file; 597 } 598 599 old = xa_cmpxchg(&passthru_subsystems, ctrl->cntlid, NULL, 600 subsys, GFP_KERNEL); 601 if (xa_is_err(old)) { 602 ret = xa_err(old); 603 goto out_put_file; 604 } 605 606 if (old) 607 goto out_put_file; 608 609 subsys->passthru_ctrl = ctrl; 610 subsys->ver = ctrl->vs; 611 612 if (subsys->ver < NVME_VS(1, 2, 1)) { 613 pr_warn("nvme controller version is too old: %llu.%llu.%llu, advertising 1.2.1\n", 614 NVME_MAJOR(subsys->ver), NVME_MINOR(subsys->ver), 615 NVME_TERTIARY(subsys->ver)); 616 subsys->ver = NVME_VS(1, 2, 1); 617 } 618 nvme_get_ctrl(ctrl); 619 __module_get(subsys->passthru_ctrl->ops->module); 620 ret = 0; 621 622out_put_file: 623 filp_close(file, NULL); 624out_unlock: 625 mutex_unlock(&subsys->lock); 626 return ret; 627} 628 629static void __nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys) 630{ 631 if (subsys->passthru_ctrl) { 632 xa_erase(&passthru_subsystems, subsys->passthru_ctrl->cntlid); 633 module_put(subsys->passthru_ctrl->ops->module); 634 nvme_put_ctrl(subsys->passthru_ctrl); 635 } 636 subsys->passthru_ctrl = NULL; 637 subsys->ver = NVMET_DEFAULT_VS; 638} 639 640void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys) 641{ 642 mutex_lock(&subsys->lock); 643 __nvmet_passthru_ctrl_disable(subsys); 644 mutex_unlock(&subsys->lock); 645} 646 647void nvmet_passthru_subsys_free(struct nvmet_subsys *subsys) 648{ 649 mutex_lock(&subsys->lock); 650 __nvmet_passthru_ctrl_disable(subsys); 651 mutex_unlock(&subsys->lock); 652 kfree(subsys->passthru_ctrl_path); 653}