pvrdma_main.c (21602B)
1/* 2 * QEMU paravirtual RDMA 3 * 4 * Copyright (C) 2018 Oracle 5 * Copyright (C) 2018 Red Hat Inc 6 * 7 * Authors: 8 * Yuval Shaia <yuval.shaia@oracle.com> 9 * Marcel Apfelbaum <marcel@redhat.com> 10 * 11 * This work is licensed under the terms of the GNU GPL, version 2 or later. 12 * See the COPYING file in the top-level directory. 13 * 14 */ 15 16#include "qemu/osdep.h" 17#include "qapi/error.h" 18#include "qemu/module.h" 19#include "hw/pci/pci.h" 20#include "hw/pci/pci_ids.h" 21#include "hw/pci/msi.h" 22#include "hw/pci/msix.h" 23#include "hw/qdev-properties.h" 24#include "hw/qdev-properties-system.h" 25#include "cpu.h" 26#include "trace.h" 27#include "monitor/monitor.h" 28#include "hw/rdma/rdma.h" 29 30#include "../rdma_rm.h" 31#include "../rdma_backend.h" 32#include "../rdma_utils.h" 33 34#include <infiniband/verbs.h> 35#include "pvrdma.h" 36#include "standard-headers/rdma/vmw_pvrdma-abi.h" 37#include "sysemu/runstate.h" 38#include "standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h" 39#include "pvrdma_qp_ops.h" 40 41static Property pvrdma_dev_properties[] = { 42 DEFINE_PROP_STRING("netdev", PVRDMADev, backend_eth_device_name), 43 DEFINE_PROP_STRING("ibdev", PVRDMADev, backend_device_name), 44 DEFINE_PROP_UINT8("ibport", PVRDMADev, backend_port_num, 1), 45 DEFINE_PROP_UINT64("dev-caps-max-mr-size", PVRDMADev, dev_attr.max_mr_size, 46 MAX_MR_SIZE), 47 DEFINE_PROP_INT32("dev-caps-max-qp", PVRDMADev, dev_attr.max_qp, MAX_QP), 48 DEFINE_PROP_INT32("dev-caps-max-cq", PVRDMADev, dev_attr.max_cq, MAX_CQ), 49 DEFINE_PROP_INT32("dev-caps-max-mr", PVRDMADev, dev_attr.max_mr, MAX_MR), 50 DEFINE_PROP_INT32("dev-caps-max-pd", PVRDMADev, dev_attr.max_pd, MAX_PD), 51 DEFINE_PROP_INT32("dev-caps-qp-rd-atom", PVRDMADev, dev_attr.max_qp_rd_atom, 52 MAX_QP_RD_ATOM), 53 DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev, 54 dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM), 55 DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH), 56 DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ), 57 DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr), 58 DEFINE_PROP_END_OF_LIST(), 59}; 60 61static void pvrdma_print_statistics(Monitor *mon, RdmaProvider *obj) 62{ 63 PVRDMADev *dev = PVRDMA_DEV(obj); 64 PCIDevice *pdev = PCI_DEVICE(dev); 65 66 monitor_printf(mon, "%s, %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn), 67 PCI_FUNC(pdev->devfn)); 68 monitor_printf(mon, "\tcommands : %" PRId64 "\n", 69 dev->stats.commands); 70 monitor_printf(mon, "\tregs_reads : %" PRId64 "\n", 71 dev->stats.regs_reads); 72 monitor_printf(mon, "\tregs_writes : %" PRId64 "\n", 73 dev->stats.regs_writes); 74 monitor_printf(mon, "\tuar_writes : %" PRId64 "\n", 75 dev->stats.uar_writes); 76 monitor_printf(mon, "\tinterrupts : %" PRId64 "\n", 77 dev->stats.interrupts); 78 rdma_dump_device_counters(mon, &dev->rdma_dev_res); 79} 80 81static void free_dev_ring(PCIDevice *pci_dev, PvrdmaRing *ring, 82 void *ring_state) 83{ 84 pvrdma_ring_free(ring); 85 rdma_pci_dma_unmap(pci_dev, ring_state, TARGET_PAGE_SIZE); 86} 87 88static int init_dev_ring(PvrdmaRing *ring, PvrdmaRingState **ring_state, 89 const char *name, PCIDevice *pci_dev, 90 dma_addr_t dir_addr, uint32_t num_pages) 91{ 92 uint64_t *dir, *tbl; 93 int rc = 0; 94 95 if (!num_pages) { 96 rdma_error_report("Ring pages count must be strictly positive"); 97 return -EINVAL; 98 } 99 100 dir = rdma_pci_dma_map(pci_dev, dir_addr, TARGET_PAGE_SIZE); 101 if (!dir) { 102 rdma_error_report("Failed to map to page directory (ring %s)", name); 103 rc = -ENOMEM; 104 goto out; 105 } 106 tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE); 107 if (!tbl) { 108 rdma_error_report("Failed to map to page table (ring %s)", name); 109 rc = -ENOMEM; 110 goto out_free_dir; 111 } 112 113 *ring_state = rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE); 114 if (!*ring_state) { 115 rdma_error_report("Failed to map to ring state (ring %s)", name); 116 rc = -ENOMEM; 117 goto out_free_tbl; 118 } 119 /* RX ring is the second */ 120 (*ring_state)++; 121 rc = pvrdma_ring_init(ring, name, pci_dev, 122 (PvrdmaRingState *)*ring_state, 123 (num_pages - 1) * TARGET_PAGE_SIZE / 124 sizeof(struct pvrdma_cqne), 125 sizeof(struct pvrdma_cqne), 126 (dma_addr_t *)&tbl[1], (dma_addr_t)num_pages - 1); 127 if (rc) { 128 rc = -ENOMEM; 129 goto out_free_ring_state; 130 } 131 132 goto out_free_tbl; 133 134out_free_ring_state: 135 rdma_pci_dma_unmap(pci_dev, *ring_state, TARGET_PAGE_SIZE); 136 137out_free_tbl: 138 rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE); 139 140out_free_dir: 141 rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE); 142 143out: 144 return rc; 145} 146 147static void free_dsr(PVRDMADev *dev) 148{ 149 PCIDevice *pci_dev = PCI_DEVICE(dev); 150 151 if (!dev->dsr_info.dsr) { 152 return; 153 } 154 155 free_dev_ring(pci_dev, &dev->dsr_info.async, 156 dev->dsr_info.async_ring_state); 157 158 free_dev_ring(pci_dev, &dev->dsr_info.cq, dev->dsr_info.cq_ring_state); 159 160 rdma_pci_dma_unmap(pci_dev, dev->dsr_info.req, 161 sizeof(union pvrdma_cmd_req)); 162 163 rdma_pci_dma_unmap(pci_dev, dev->dsr_info.rsp, 164 sizeof(union pvrdma_cmd_resp)); 165 166 rdma_pci_dma_unmap(pci_dev, dev->dsr_info.dsr, 167 sizeof(struct pvrdma_device_shared_region)); 168 169 dev->dsr_info.dsr = NULL; 170} 171 172static int load_dsr(PVRDMADev *dev) 173{ 174 int rc = 0; 175 PCIDevice *pci_dev = PCI_DEVICE(dev); 176 DSRInfo *dsr_info; 177 struct pvrdma_device_shared_region *dsr; 178 179 free_dsr(dev); 180 181 /* Map to DSR */ 182 dev->dsr_info.dsr = rdma_pci_dma_map(pci_dev, dev->dsr_info.dma, 183 sizeof(struct pvrdma_device_shared_region)); 184 if (!dev->dsr_info.dsr) { 185 rdma_error_report("Failed to map to DSR"); 186 rc = -ENOMEM; 187 goto out; 188 } 189 190 /* Shortcuts */ 191 dsr_info = &dev->dsr_info; 192 dsr = dsr_info->dsr; 193 194 /* Map to command slot */ 195 dsr_info->req = rdma_pci_dma_map(pci_dev, dsr->cmd_slot_dma, 196 sizeof(union pvrdma_cmd_req)); 197 if (!dsr_info->req) { 198 rdma_error_report("Failed to map to command slot address"); 199 rc = -ENOMEM; 200 goto out_free_dsr; 201 } 202 203 /* Map to response slot */ 204 dsr_info->rsp = rdma_pci_dma_map(pci_dev, dsr->resp_slot_dma, 205 sizeof(union pvrdma_cmd_resp)); 206 if (!dsr_info->rsp) { 207 rdma_error_report("Failed to map to response slot address"); 208 rc = -ENOMEM; 209 goto out_free_req; 210 } 211 212 /* Map to CQ notification ring */ 213 rc = init_dev_ring(&dsr_info->cq, &dsr_info->cq_ring_state, "dev_cq", 214 pci_dev, dsr->cq_ring_pages.pdir_dma, 215 dsr->cq_ring_pages.num_pages); 216 if (rc) { 217 rc = -ENOMEM; 218 goto out_free_rsp; 219 } 220 221 /* Map to event notification ring */ 222 rc = init_dev_ring(&dsr_info->async, &dsr_info->async_ring_state, 223 "dev_async", pci_dev, dsr->async_ring_pages.pdir_dma, 224 dsr->async_ring_pages.num_pages); 225 if (rc) { 226 rc = -ENOMEM; 227 goto out_free_rsp; 228 } 229 230 goto out; 231 232out_free_rsp: 233 rdma_pci_dma_unmap(pci_dev, dsr_info->rsp, sizeof(union pvrdma_cmd_resp)); 234 235out_free_req: 236 rdma_pci_dma_unmap(pci_dev, dsr_info->req, sizeof(union pvrdma_cmd_req)); 237 238out_free_dsr: 239 rdma_pci_dma_unmap(pci_dev, dsr_info->dsr, 240 sizeof(struct pvrdma_device_shared_region)); 241 dsr_info->dsr = NULL; 242 243out: 244 return rc; 245} 246 247static void init_dsr_dev_caps(PVRDMADev *dev) 248{ 249 struct pvrdma_device_shared_region *dsr; 250 251 if (dev->dsr_info.dsr == NULL) { 252 rdma_error_report("Can't initialized DSR"); 253 return; 254 } 255 256 dsr = dev->dsr_info.dsr; 257 dsr->caps.fw_ver = PVRDMA_FW_VERSION; 258 dsr->caps.mode = PVRDMA_DEVICE_MODE_ROCE; 259 dsr->caps.gid_types |= PVRDMA_GID_TYPE_FLAG_ROCE_V1; 260 dsr->caps.max_uar = RDMA_BAR2_UAR_SIZE; 261 dsr->caps.max_mr_size = dev->dev_attr.max_mr_size; 262 dsr->caps.max_qp = dev->dev_attr.max_qp; 263 dsr->caps.max_qp_wr = dev->dev_attr.max_qp_wr; 264 dsr->caps.max_sge = dev->dev_attr.max_sge; 265 dsr->caps.max_cq = dev->dev_attr.max_cq; 266 dsr->caps.max_cqe = dev->dev_attr.max_cqe; 267 dsr->caps.max_mr = dev->dev_attr.max_mr; 268 dsr->caps.max_pd = dev->dev_attr.max_pd; 269 dsr->caps.max_ah = dev->dev_attr.max_ah; 270 dsr->caps.max_srq = dev->dev_attr.max_srq; 271 dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr; 272 dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge; 273 dsr->caps.gid_tbl_len = MAX_GIDS; 274 dsr->caps.sys_image_guid = 0; 275 dsr->caps.node_guid = dev->node_guid; 276 dsr->caps.phys_port_cnt = MAX_PORTS; 277 dsr->caps.max_pkeys = MAX_PKEYS; 278} 279 280static void uninit_msix(PCIDevice *pdev, int used_vectors) 281{ 282 PVRDMADev *dev = PVRDMA_DEV(pdev); 283 int i; 284 285 for (i = 0; i < used_vectors; i++) { 286 msix_vector_unuse(pdev, i); 287 } 288 289 msix_uninit(pdev, &dev->msix, &dev->msix); 290} 291 292static int init_msix(PCIDevice *pdev) 293{ 294 PVRDMADev *dev = PVRDMA_DEV(pdev); 295 int i; 296 int rc; 297 298 rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX, 299 RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX, 300 RDMA_MSIX_PBA, 0, NULL); 301 302 if (rc < 0) { 303 rdma_error_report("Failed to initialize MSI-X"); 304 return rc; 305 } 306 307 for (i = 0; i < RDMA_MAX_INTRS; i++) { 308 rc = msix_vector_use(PCI_DEVICE(dev), i); 309 if (rc < 0) { 310 rdma_error_report("Fail mark MSI-X vector %d", i); 311 uninit_msix(pdev, i); 312 return rc; 313 } 314 } 315 316 return 0; 317} 318 319static void pvrdma_fini(PCIDevice *pdev) 320{ 321 PVRDMADev *dev = PVRDMA_DEV(pdev); 322 323 notifier_remove(&dev->shutdown_notifier); 324 325 pvrdma_qp_ops_fini(); 326 327 rdma_backend_stop(&dev->backend_dev); 328 329 rdma_rm_fini(&dev->rdma_dev_res, &dev->backend_dev, 330 dev->backend_eth_device_name); 331 332 rdma_backend_fini(&dev->backend_dev); 333 334 free_dsr(dev); 335 336 if (msix_enabled(pdev)) { 337 uninit_msix(pdev, RDMA_MAX_INTRS); 338 } 339 340 rdma_info_report("Device %s %x.%x is down", pdev->name, 341 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 342} 343 344static void pvrdma_stop(PVRDMADev *dev) 345{ 346 rdma_backend_stop(&dev->backend_dev); 347} 348 349static void pvrdma_start(PVRDMADev *dev) 350{ 351 rdma_backend_start(&dev->backend_dev); 352} 353 354static void activate_device(PVRDMADev *dev) 355{ 356 pvrdma_start(dev); 357 set_reg_val(dev, PVRDMA_REG_ERR, 0); 358} 359 360static int unquiesce_device(PVRDMADev *dev) 361{ 362 return 0; 363} 364 365static void reset_device(PVRDMADev *dev) 366{ 367 pvrdma_stop(dev); 368} 369 370static uint64_t pvrdma_regs_read(void *opaque, hwaddr addr, unsigned size) 371{ 372 PVRDMADev *dev = opaque; 373 uint32_t val; 374 375 dev->stats.regs_reads++; 376 377 if (get_reg_val(dev, addr, &val)) { 378 rdma_error_report("Failed to read REG value from address 0x%x", 379 (uint32_t)addr); 380 return -EINVAL; 381 } 382 383 trace_pvrdma_regs_read(addr, val); 384 385 return val; 386} 387 388static void pvrdma_regs_write(void *opaque, hwaddr addr, uint64_t val, 389 unsigned size) 390{ 391 PVRDMADev *dev = opaque; 392 393 dev->stats.regs_writes++; 394 395 if (set_reg_val(dev, addr, val)) { 396 rdma_error_report("Failed to set REG value, addr=0x%"PRIx64 ", val=0x%"PRIx64, 397 addr, val); 398 return; 399 } 400 401 switch (addr) { 402 case PVRDMA_REG_DSRLOW: 403 trace_pvrdma_regs_write(addr, val, "DSRLOW", ""); 404 dev->dsr_info.dma = val; 405 break; 406 case PVRDMA_REG_DSRHIGH: 407 trace_pvrdma_regs_write(addr, val, "DSRHIGH", ""); 408 dev->dsr_info.dma |= val << 32; 409 load_dsr(dev); 410 init_dsr_dev_caps(dev); 411 break; 412 case PVRDMA_REG_CTL: 413 switch (val) { 414 case PVRDMA_DEVICE_CTL_ACTIVATE: 415 trace_pvrdma_regs_write(addr, val, "CTL", "ACTIVATE"); 416 activate_device(dev); 417 break; 418 case PVRDMA_DEVICE_CTL_UNQUIESCE: 419 trace_pvrdma_regs_write(addr, val, "CTL", "UNQUIESCE"); 420 unquiesce_device(dev); 421 break; 422 case PVRDMA_DEVICE_CTL_RESET: 423 trace_pvrdma_regs_write(addr, val, "CTL", "URESET"); 424 reset_device(dev); 425 break; 426 } 427 break; 428 case PVRDMA_REG_IMR: 429 trace_pvrdma_regs_write(addr, val, "INTR_MASK", ""); 430 dev->interrupt_mask = val; 431 break; 432 case PVRDMA_REG_REQUEST: 433 if (val == 0) { 434 trace_pvrdma_regs_write(addr, val, "REQUEST", ""); 435 pvrdma_exec_cmd(dev); 436 } 437 break; 438 default: 439 break; 440 } 441} 442 443static const MemoryRegionOps regs_ops = { 444 .read = pvrdma_regs_read, 445 .write = pvrdma_regs_write, 446 .endianness = DEVICE_LITTLE_ENDIAN, 447 .impl = { 448 .min_access_size = sizeof(uint32_t), 449 .max_access_size = sizeof(uint32_t), 450 }, 451}; 452 453static uint64_t pvrdma_uar_read(void *opaque, hwaddr addr, unsigned size) 454{ 455 return 0xffffffff; 456} 457 458static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val, 459 unsigned size) 460{ 461 PVRDMADev *dev = opaque; 462 463 dev->stats.uar_writes++; 464 465 switch (addr & 0xFFF) { /* Mask with 0xFFF as each UC gets page */ 466 case PVRDMA_UAR_QP_OFFSET: 467 if (val & PVRDMA_UAR_QP_SEND) { 468 trace_pvrdma_uar_write(addr, val, "QP", "SEND", 469 val & PVRDMA_UAR_HANDLE_MASK, 0); 470 pvrdma_qp_send(dev, val & PVRDMA_UAR_HANDLE_MASK); 471 } 472 if (val & PVRDMA_UAR_QP_RECV) { 473 trace_pvrdma_uar_write(addr, val, "QP", "RECV", 474 val & PVRDMA_UAR_HANDLE_MASK, 0); 475 pvrdma_qp_recv(dev, val & PVRDMA_UAR_HANDLE_MASK); 476 } 477 break; 478 case PVRDMA_UAR_CQ_OFFSET: 479 if (val & PVRDMA_UAR_CQ_ARM) { 480 trace_pvrdma_uar_write(addr, val, "CQ", "ARM", 481 val & PVRDMA_UAR_HANDLE_MASK, 482 !!(val & PVRDMA_UAR_CQ_ARM_SOL)); 483 rdma_rm_req_notify_cq(&dev->rdma_dev_res, 484 val & PVRDMA_UAR_HANDLE_MASK, 485 !!(val & PVRDMA_UAR_CQ_ARM_SOL)); 486 } 487 if (val & PVRDMA_UAR_CQ_ARM_SOL) { 488 trace_pvrdma_uar_write(addr, val, "CQ", "ARMSOL - not supported", 0, 489 0); 490 } 491 if (val & PVRDMA_UAR_CQ_POLL) { 492 trace_pvrdma_uar_write(addr, val, "CQ", "POLL", 493 val & PVRDMA_UAR_HANDLE_MASK, 0); 494 pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK); 495 } 496 break; 497 case PVRDMA_UAR_SRQ_OFFSET: 498 if (val & PVRDMA_UAR_SRQ_RECV) { 499 trace_pvrdma_uar_write(addr, val, "QP", "SRQ", 500 val & PVRDMA_UAR_HANDLE_MASK, 0); 501 pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK); 502 } 503 break; 504 default: 505 rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64, 506 addr, val); 507 break; 508 } 509} 510 511static const MemoryRegionOps uar_ops = { 512 .read = pvrdma_uar_read, 513 .write = pvrdma_uar_write, 514 .endianness = DEVICE_LITTLE_ENDIAN, 515 .impl = { 516 .min_access_size = sizeof(uint32_t), 517 .max_access_size = sizeof(uint32_t), 518 }, 519}; 520 521static void init_pci_config(PCIDevice *pdev) 522{ 523 pdev->config[PCI_INTERRUPT_PIN] = 1; 524} 525 526static void init_bars(PCIDevice *pdev) 527{ 528 PVRDMADev *dev = PVRDMA_DEV(pdev); 529 530 /* BAR 0 - MSI-X */ 531 memory_region_init(&dev->msix, OBJECT(dev), "pvrdma-msix", 532 RDMA_BAR0_MSIX_SIZE); 533 pci_register_bar(pdev, RDMA_MSIX_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY, 534 &dev->msix); 535 536 /* BAR 1 - Registers */ 537 memset(&dev->regs_data, 0, sizeof(dev->regs_data)); 538 memory_region_init_io(&dev->regs, OBJECT(dev), ®s_ops, dev, 539 "pvrdma-regs", sizeof(dev->regs_data)); 540 pci_register_bar(pdev, RDMA_REG_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY, 541 &dev->regs); 542 543 /* BAR 2 - UAR */ 544 memset(&dev->uar_data, 0, sizeof(dev->uar_data)); 545 memory_region_init_io(&dev->uar, OBJECT(dev), &uar_ops, dev, "rdma-uar", 546 sizeof(dev->uar_data)); 547 pci_register_bar(pdev, RDMA_UAR_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY, 548 &dev->uar); 549} 550 551static void init_regs(PCIDevice *pdev) 552{ 553 PVRDMADev *dev = PVRDMA_DEV(pdev); 554 555 set_reg_val(dev, PVRDMA_REG_VERSION, PVRDMA_HW_VERSION); 556 set_reg_val(dev, PVRDMA_REG_ERR, 0xFFFF); 557} 558 559static void init_dev_caps(PVRDMADev *dev) 560{ 561 size_t pg_tbl_bytes = TARGET_PAGE_SIZE * 562 (TARGET_PAGE_SIZE / sizeof(uint64_t)); 563 size_t wr_sz = MAX(sizeof(struct pvrdma_sq_wqe_hdr), 564 sizeof(struct pvrdma_rq_wqe_hdr)); 565 566 dev->dev_attr.max_qp_wr = pg_tbl_bytes / 567 (wr_sz + sizeof(struct pvrdma_sge) * 568 dev->dev_attr.max_sge) - TARGET_PAGE_SIZE; 569 /* First page is ring state ^^^^ */ 570 571 dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) - 572 TARGET_PAGE_SIZE; /* First page is ring state */ 573 574 dev->dev_attr.max_srq_wr = pg_tbl_bytes / 575 ((sizeof(struct pvrdma_rq_wqe_hdr) + 576 sizeof(struct pvrdma_sge)) * 577 dev->dev_attr.max_sge) - TARGET_PAGE_SIZE; 578} 579 580static int pvrdma_check_ram_shared(Object *obj, void *opaque) 581{ 582 bool *shared = opaque; 583 584 if (object_dynamic_cast(obj, "memory-backend-ram")) { 585 *shared = object_property_get_bool(obj, "share", NULL); 586 } 587 588 return 0; 589} 590 591static void pvrdma_shutdown_notifier(Notifier *n, void *opaque) 592{ 593 PVRDMADev *dev = container_of(n, PVRDMADev, shutdown_notifier); 594 PCIDevice *pci_dev = PCI_DEVICE(dev); 595 596 pvrdma_fini(pci_dev); 597} 598 599static void pvrdma_realize(PCIDevice *pdev, Error **errp) 600{ 601 int rc = 0; 602 PVRDMADev *dev = PVRDMA_DEV(pdev); 603 Object *memdev_root; 604 bool ram_shared = false; 605 PCIDevice *func0; 606 607 rdma_info_report("Initializing device %s %x.%x", pdev->name, 608 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 609 610 if (TARGET_PAGE_SIZE != qemu_real_host_page_size) { 611 error_setg(errp, "Target page size must be the same as host page size"); 612 return; 613 } 614 615 func0 = pci_get_function_0(pdev); 616 /* Break if not vmxnet3 device in slot 0 */ 617 if (strcmp(object_get_typename(OBJECT(func0)), TYPE_VMXNET3)) { 618 error_setg(errp, "Device on %x.0 must be %s", PCI_SLOT(pdev->devfn), 619 TYPE_VMXNET3); 620 return; 621 } 622 dev->func0 = VMXNET3(func0); 623 624 addrconf_addr_eui48((unsigned char *)&dev->node_guid, 625 (const char *)&dev->func0->conf.macaddr.a); 626 627 memdev_root = object_resolve_path("/objects", NULL); 628 if (memdev_root) { 629 object_child_foreach(memdev_root, pvrdma_check_ram_shared, &ram_shared); 630 } 631 if (!ram_shared) { 632 error_setg(errp, "Only shared memory backed ram is supported"); 633 return; 634 } 635 636 dev->dsr_info.dsr = NULL; 637 638 init_pci_config(pdev); 639 640 init_bars(pdev); 641 642 init_regs(pdev); 643 644 rc = init_msix(pdev); 645 if (rc) { 646 goto out; 647 } 648 649 rc = rdma_backend_init(&dev->backend_dev, pdev, &dev->rdma_dev_res, 650 dev->backend_device_name, dev->backend_port_num, 651 &dev->dev_attr, &dev->mad_chr); 652 if (rc) { 653 goto out; 654 } 655 656 init_dev_caps(dev); 657 658 rc = rdma_rm_init(&dev->rdma_dev_res, &dev->dev_attr); 659 if (rc) { 660 goto out; 661 } 662 663 rc = pvrdma_qp_ops_init(); 664 if (rc) { 665 goto out; 666 } 667 668 memset(&dev->stats, 0, sizeof(dev->stats)); 669 670 dev->shutdown_notifier.notify = pvrdma_shutdown_notifier; 671 qemu_register_shutdown_notifier(&dev->shutdown_notifier); 672 673#ifdef LEGACY_RDMA_REG_MR 674 rdma_info_report("Using legacy reg_mr"); 675#else 676 rdma_info_report("Using iova reg_mr"); 677#endif 678 679out: 680 if (rc) { 681 pvrdma_fini(pdev); 682 error_append_hint(errp, "Device failed to load\n"); 683 } 684} 685 686static void pvrdma_class_init(ObjectClass *klass, void *data) 687{ 688 DeviceClass *dc = DEVICE_CLASS(klass); 689 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 690 RdmaProviderClass *ir = RDMA_PROVIDER_CLASS(klass); 691 692 k->realize = pvrdma_realize; 693 k->vendor_id = PCI_VENDOR_ID_VMWARE; 694 k->device_id = PCI_DEVICE_ID_VMWARE_PVRDMA; 695 k->revision = 0x00; 696 k->class_id = PCI_CLASS_NETWORK_OTHER; 697 698 dc->desc = "RDMA Device"; 699 device_class_set_props(dc, pvrdma_dev_properties); 700 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); 701 702 ir->print_statistics = pvrdma_print_statistics; 703} 704 705static const TypeInfo pvrdma_info = { 706 .name = PVRDMA_HW_NAME, 707 .parent = TYPE_PCI_DEVICE, 708 .instance_size = sizeof(PVRDMADev), 709 .class_init = pvrdma_class_init, 710 .interfaces = (InterfaceInfo[]) { 711 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 712 { INTERFACE_RDMA_PROVIDER }, 713 { } 714 } 715}; 716 717static void register_types(void) 718{ 719 type_register_static(&pvrdma_info); 720} 721 722type_init(register_types)