pvrdma_main.c (30986B)
1/* 2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of EITHER the GNU General Public License 6 * version 2 as published by the Free Software Foundation or the BSD 7 * 2-Clause License. This program is distributed in the hope that it 8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED 9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 10 * See the GNU General Public License version 2 for more details at 11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program available in the file COPYING in the main 15 * directory of this source tree. 16 * 17 * The BSD 2-Clause License 18 * 19 * Redistribution and use in source and binary forms, with or 20 * without modification, are permitted provided that the following 21 * conditions are met: 22 * 23 * - Redistributions of source code must retain the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer. 26 * 27 * - Redistributions in binary form must reproduce the above 28 * copyright notice, this list of conditions and the following 29 * disclaimer in the documentation and/or other materials 30 * provided with the distribution. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 43 * OF THE POSSIBILITY OF SUCH DAMAGE. 44 */ 45 46#include <linux/errno.h> 47#include <linux/inetdevice.h> 48#include <linux/init.h> 49#include <linux/module.h> 50#include <linux/slab.h> 51#include <rdma/ib_addr.h> 52#include <rdma/ib_smi.h> 53#include <rdma/ib_user_verbs.h> 54#include <net/addrconf.h> 55 56#include "pvrdma.h" 57 58#define DRV_NAME "vmw_pvrdma" 59#define DRV_VERSION "1.0.1.0-k" 60 61static DEFINE_MUTEX(pvrdma_device_list_lock); 62static LIST_HEAD(pvrdma_device_list); 63static struct workqueue_struct *event_wq; 64 65static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context); 66static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); 67 68static ssize_t hca_type_show(struct device *device, 69 struct device_attribute *attr, char *buf) 70{ 71 return sysfs_emit(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); 72} 73static DEVICE_ATTR_RO(hca_type); 74 75static ssize_t hw_rev_show(struct device *device, 76 struct device_attribute *attr, char *buf) 77{ 78 return sysfs_emit(buf, "%d\n", PVRDMA_REV_ID); 79} 80static DEVICE_ATTR_RO(hw_rev); 81 82static ssize_t board_id_show(struct device *device, 83 struct device_attribute *attr, char *buf) 84{ 85 return sysfs_emit(buf, "%d\n", PVRDMA_BOARD_ID); 86} 87static DEVICE_ATTR_RO(board_id); 88 89static struct attribute *pvrdma_class_attributes[] = { 90 &dev_attr_hw_rev.attr, 91 &dev_attr_hca_type.attr, 92 &dev_attr_board_id.attr, 93 NULL, 94}; 95 96static const struct attribute_group pvrdma_attr_group = { 97 .attrs = pvrdma_class_attributes, 98}; 99 100static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str) 101{ 102 struct pvrdma_dev *dev = 103 container_of(device, struct pvrdma_dev, ib_dev); 104 snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d\n", 105 (int) (dev->dsr->caps.fw_ver >> 32), 106 (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff, 107 (int) dev->dsr->caps.fw_ver & 0xffff); 108} 109 110static int pvrdma_init_device(struct pvrdma_dev *dev) 111{ 112 /* Initialize some device related stuff */ 113 spin_lock_init(&dev->cmd_lock); 114 sema_init(&dev->cmd_sema, 1); 115 atomic_set(&dev->num_qps, 0); 116 atomic_set(&dev->num_srqs, 0); 117 atomic_set(&dev->num_cqs, 0); 118 atomic_set(&dev->num_pds, 0); 119 atomic_set(&dev->num_ahs, 0); 120 121 return 0; 122} 123 124static int pvrdma_port_immutable(struct ib_device *ibdev, u32 port_num, 125 struct ib_port_immutable *immutable) 126{ 127 struct pvrdma_dev *dev = to_vdev(ibdev); 128 struct ib_port_attr attr; 129 int err; 130 131 if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1) 132 immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE; 133 else if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V2) 134 immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; 135 136 err = ib_query_port(ibdev, port_num, &attr); 137 if (err) 138 return err; 139 140 immutable->pkey_tbl_len = attr.pkey_tbl_len; 141 immutable->gid_tbl_len = attr.gid_tbl_len; 142 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 143 return 0; 144} 145 146static const struct ib_device_ops pvrdma_dev_ops = { 147 .owner = THIS_MODULE, 148 .driver_id = RDMA_DRIVER_VMW_PVRDMA, 149 .uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION, 150 151 .add_gid = pvrdma_add_gid, 152 .alloc_mr = pvrdma_alloc_mr, 153 .alloc_pd = pvrdma_alloc_pd, 154 .alloc_ucontext = pvrdma_alloc_ucontext, 155 .create_ah = pvrdma_create_ah, 156 .create_cq = pvrdma_create_cq, 157 .create_qp = pvrdma_create_qp, 158 .dealloc_pd = pvrdma_dealloc_pd, 159 .dealloc_ucontext = pvrdma_dealloc_ucontext, 160 .del_gid = pvrdma_del_gid, 161 .dereg_mr = pvrdma_dereg_mr, 162 .destroy_ah = pvrdma_destroy_ah, 163 .destroy_cq = pvrdma_destroy_cq, 164 .destroy_qp = pvrdma_destroy_qp, 165 .device_group = &pvrdma_attr_group, 166 .get_dev_fw_str = pvrdma_get_fw_ver_str, 167 .get_dma_mr = pvrdma_get_dma_mr, 168 .get_link_layer = pvrdma_port_link_layer, 169 .get_port_immutable = pvrdma_port_immutable, 170 .map_mr_sg = pvrdma_map_mr_sg, 171 .mmap = pvrdma_mmap, 172 .modify_port = pvrdma_modify_port, 173 .modify_qp = pvrdma_modify_qp, 174 .poll_cq = pvrdma_poll_cq, 175 .post_recv = pvrdma_post_recv, 176 .post_send = pvrdma_post_send, 177 .query_device = pvrdma_query_device, 178 .query_gid = pvrdma_query_gid, 179 .query_pkey = pvrdma_query_pkey, 180 .query_port = pvrdma_query_port, 181 .query_qp = pvrdma_query_qp, 182 .reg_user_mr = pvrdma_reg_user_mr, 183 .req_notify_cq = pvrdma_req_notify_cq, 184 185 INIT_RDMA_OBJ_SIZE(ib_ah, pvrdma_ah, ibah), 186 INIT_RDMA_OBJ_SIZE(ib_cq, pvrdma_cq, ibcq), 187 INIT_RDMA_OBJ_SIZE(ib_pd, pvrdma_pd, ibpd), 188 INIT_RDMA_OBJ_SIZE(ib_qp, pvrdma_qp, ibqp), 189 INIT_RDMA_OBJ_SIZE(ib_ucontext, pvrdma_ucontext, ibucontext), 190}; 191 192static const struct ib_device_ops pvrdma_dev_srq_ops = { 193 .create_srq = pvrdma_create_srq, 194 .destroy_srq = pvrdma_destroy_srq, 195 .modify_srq = pvrdma_modify_srq, 196 .query_srq = pvrdma_query_srq, 197 198 INIT_RDMA_OBJ_SIZE(ib_srq, pvrdma_srq, ibsrq), 199}; 200 201static int pvrdma_register_device(struct pvrdma_dev *dev) 202{ 203 int ret = -1; 204 205 dev->ib_dev.node_guid = dev->dsr->caps.node_guid; 206 dev->sys_image_guid = dev->dsr->caps.sys_image_guid; 207 dev->flags = 0; 208 dev->ib_dev.num_comp_vectors = 1; 209 dev->ib_dev.dev.parent = &dev->pdev->dev; 210 211 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 212 dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; 213 214 ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_ops); 215 216 mutex_init(&dev->port_mutex); 217 spin_lock_init(&dev->desc_lock); 218 219 dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(struct pvrdma_cq *), 220 GFP_KERNEL); 221 if (!dev->cq_tbl) 222 return ret; 223 spin_lock_init(&dev->cq_tbl_lock); 224 225 dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(struct pvrdma_qp *), 226 GFP_KERNEL); 227 if (!dev->qp_tbl) 228 goto err_cq_free; 229 spin_lock_init(&dev->qp_tbl_lock); 230 231 /* Check if SRQ is supported by backend */ 232 if (dev->dsr->caps.max_srq) { 233 ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops); 234 235 dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, 236 sizeof(struct pvrdma_srq *), 237 GFP_KERNEL); 238 if (!dev->srq_tbl) 239 goto err_qp_free; 240 } 241 ret = ib_device_set_netdev(&dev->ib_dev, dev->netdev, 1); 242 if (ret) 243 goto err_srq_free; 244 spin_lock_init(&dev->srq_tbl_lock); 245 246 ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", &dev->pdev->dev); 247 if (ret) 248 goto err_srq_free; 249 250 dev->ib_active = true; 251 252 return 0; 253 254err_srq_free: 255 kfree(dev->srq_tbl); 256err_qp_free: 257 kfree(dev->qp_tbl); 258err_cq_free: 259 kfree(dev->cq_tbl); 260 261 return ret; 262} 263 264static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id) 265{ 266 u32 icr = PVRDMA_INTR_CAUSE_RESPONSE; 267 struct pvrdma_dev *dev = dev_id; 268 269 dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n"); 270 271 if (!dev->pdev->msix_enabled) { 272 /* Legacy intr */ 273 icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR); 274 if (icr == 0) 275 return IRQ_NONE; 276 } 277 278 if (icr == PVRDMA_INTR_CAUSE_RESPONSE) 279 complete(&dev->cmd_done); 280 281 return IRQ_HANDLED; 282} 283 284static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type) 285{ 286 struct pvrdma_qp *qp; 287 unsigned long flags; 288 289 spin_lock_irqsave(&dev->qp_tbl_lock, flags); 290 qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp]; 291 if (qp) 292 refcount_inc(&qp->refcnt); 293 spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); 294 295 if (qp && qp->ibqp.event_handler) { 296 struct ib_qp *ibqp = &qp->ibqp; 297 struct ib_event e; 298 299 e.device = ibqp->device; 300 e.element.qp = ibqp; 301 e.event = type; /* 1:1 mapping for now. */ 302 ibqp->event_handler(&e, ibqp->qp_context); 303 } 304 if (qp) { 305 if (refcount_dec_and_test(&qp->refcnt)) 306 complete(&qp->free); 307 } 308} 309 310static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type) 311{ 312 struct pvrdma_cq *cq; 313 unsigned long flags; 314 315 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 316 cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq]; 317 if (cq) 318 refcount_inc(&cq->refcnt); 319 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 320 321 if (cq && cq->ibcq.event_handler) { 322 struct ib_cq *ibcq = &cq->ibcq; 323 struct ib_event e; 324 325 e.device = ibcq->device; 326 e.element.cq = ibcq; 327 e.event = type; /* 1:1 mapping for now. */ 328 ibcq->event_handler(&e, ibcq->cq_context); 329 } 330 if (cq) { 331 if (refcount_dec_and_test(&cq->refcnt)) 332 complete(&cq->free); 333 } 334} 335 336static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type) 337{ 338 struct pvrdma_srq *srq; 339 unsigned long flags; 340 341 spin_lock_irqsave(&dev->srq_tbl_lock, flags); 342 if (dev->srq_tbl) 343 srq = dev->srq_tbl[srqn % dev->dsr->caps.max_srq]; 344 else 345 srq = NULL; 346 if (srq) 347 refcount_inc(&srq->refcnt); 348 spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); 349 350 if (srq && srq->ibsrq.event_handler) { 351 struct ib_srq *ibsrq = &srq->ibsrq; 352 struct ib_event e; 353 354 e.device = ibsrq->device; 355 e.element.srq = ibsrq; 356 e.event = type; /* 1:1 mapping for now. */ 357 ibsrq->event_handler(&e, ibsrq->srq_context); 358 } 359 if (srq) { 360 if (refcount_dec_and_test(&srq->refcnt)) 361 complete(&srq->free); 362 } 363} 364 365static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port, 366 enum ib_event_type event) 367{ 368 struct ib_event ib_event; 369 370 memset(&ib_event, 0, sizeof(ib_event)); 371 ib_event.device = &dev->ib_dev; 372 ib_event.element.port_num = port; 373 ib_event.event = event; 374 ib_dispatch_event(&ib_event); 375} 376 377static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type) 378{ 379 if (port < 1 || port > dev->dsr->caps.phys_port_cnt) { 380 dev_warn(&dev->pdev->dev, "event on port %d\n", port); 381 return; 382 } 383 384 pvrdma_dispatch_event(dev, port, type); 385} 386 387static inline struct pvrdma_eqe *get_eqe(struct pvrdma_dev *dev, unsigned int i) 388{ 389 return (struct pvrdma_eqe *)pvrdma_page_dir_get_ptr( 390 &dev->async_pdir, 391 PAGE_SIZE + 392 sizeof(struct pvrdma_eqe) * i); 393} 394 395static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id) 396{ 397 struct pvrdma_dev *dev = dev_id; 398 struct pvrdma_ring *ring = &dev->async_ring_state->rx; 399 int ring_slots = (dev->dsr->async_ring_pages.num_pages - 1) * 400 PAGE_SIZE / sizeof(struct pvrdma_eqe); 401 unsigned int head; 402 403 dev_dbg(&dev->pdev->dev, "interrupt 1 (async event) handler\n"); 404 405 /* 406 * Don't process events until the IB device is registered. Otherwise 407 * we'll try to ib_dispatch_event() on an invalid device. 408 */ 409 if (!dev->ib_active) 410 return IRQ_HANDLED; 411 412 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { 413 struct pvrdma_eqe *eqe; 414 415 eqe = get_eqe(dev, head); 416 417 switch (eqe->type) { 418 case PVRDMA_EVENT_QP_FATAL: 419 case PVRDMA_EVENT_QP_REQ_ERR: 420 case PVRDMA_EVENT_QP_ACCESS_ERR: 421 case PVRDMA_EVENT_COMM_EST: 422 case PVRDMA_EVENT_SQ_DRAINED: 423 case PVRDMA_EVENT_PATH_MIG: 424 case PVRDMA_EVENT_PATH_MIG_ERR: 425 case PVRDMA_EVENT_QP_LAST_WQE_REACHED: 426 pvrdma_qp_event(dev, eqe->info, eqe->type); 427 break; 428 429 case PVRDMA_EVENT_CQ_ERR: 430 pvrdma_cq_event(dev, eqe->info, eqe->type); 431 break; 432 433 case PVRDMA_EVENT_SRQ_ERR: 434 case PVRDMA_EVENT_SRQ_LIMIT_REACHED: 435 pvrdma_srq_event(dev, eqe->info, eqe->type); 436 break; 437 438 case PVRDMA_EVENT_PORT_ACTIVE: 439 case PVRDMA_EVENT_PORT_ERR: 440 case PVRDMA_EVENT_LID_CHANGE: 441 case PVRDMA_EVENT_PKEY_CHANGE: 442 case PVRDMA_EVENT_SM_CHANGE: 443 case PVRDMA_EVENT_CLIENT_REREGISTER: 444 case PVRDMA_EVENT_GID_CHANGE: 445 pvrdma_dev_event(dev, eqe->info, eqe->type); 446 break; 447 448 case PVRDMA_EVENT_DEVICE_FATAL: 449 pvrdma_dev_event(dev, 1, eqe->type); 450 break; 451 452 default: 453 break; 454 } 455 456 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 457 } 458 459 return IRQ_HANDLED; 460} 461 462static inline struct pvrdma_cqne *get_cqne(struct pvrdma_dev *dev, 463 unsigned int i) 464{ 465 return (struct pvrdma_cqne *)pvrdma_page_dir_get_ptr( 466 &dev->cq_pdir, 467 PAGE_SIZE + 468 sizeof(struct pvrdma_cqne) * i); 469} 470 471static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) 472{ 473 struct pvrdma_dev *dev = dev_id; 474 struct pvrdma_ring *ring = &dev->cq_ring_state->rx; 475 int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE / 476 sizeof(struct pvrdma_cqne); 477 unsigned int head; 478 479 dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n"); 480 481 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) { 482 struct pvrdma_cqne *cqne; 483 struct pvrdma_cq *cq; 484 485 cqne = get_cqne(dev, head); 486 spin_lock(&dev->cq_tbl_lock); 487 cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq]; 488 if (cq) 489 refcount_inc(&cq->refcnt); 490 spin_unlock(&dev->cq_tbl_lock); 491 492 if (cq && cq->ibcq.comp_handler) 493 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 494 if (cq) { 495 if (refcount_dec_and_test(&cq->refcnt)) 496 complete(&cq->free); 497 } 498 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 499 } 500 501 return IRQ_HANDLED; 502} 503 504static void pvrdma_free_irq(struct pvrdma_dev *dev) 505{ 506 int i; 507 508 dev_dbg(&dev->pdev->dev, "freeing interrupts\n"); 509 for (i = 0; i < dev->nr_vectors; i++) 510 free_irq(pci_irq_vector(dev->pdev, i), dev); 511} 512 513static void pvrdma_enable_intrs(struct pvrdma_dev *dev) 514{ 515 dev_dbg(&dev->pdev->dev, "enable interrupts\n"); 516 pvrdma_write_reg(dev, PVRDMA_REG_IMR, 0); 517} 518 519static void pvrdma_disable_intrs(struct pvrdma_dev *dev) 520{ 521 dev_dbg(&dev->pdev->dev, "disable interrupts\n"); 522 pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0); 523} 524 525static int pvrdma_alloc_intrs(struct pvrdma_dev *dev) 526{ 527 struct pci_dev *pdev = dev->pdev; 528 int ret = 0, i; 529 530 ret = pci_alloc_irq_vectors(pdev, 1, PVRDMA_MAX_INTERRUPTS, 531 PCI_IRQ_MSIX); 532 if (ret < 0) { 533 ret = pci_alloc_irq_vectors(pdev, 1, 1, 534 PCI_IRQ_MSI | PCI_IRQ_LEGACY); 535 if (ret < 0) 536 return ret; 537 } 538 dev->nr_vectors = ret; 539 540 ret = request_irq(pci_irq_vector(dev->pdev, 0), pvrdma_intr0_handler, 541 pdev->msix_enabled ? 0 : IRQF_SHARED, DRV_NAME, dev); 542 if (ret) { 543 dev_err(&dev->pdev->dev, 544 "failed to request interrupt 0\n"); 545 goto out_free_vectors; 546 } 547 548 for (i = 1; i < dev->nr_vectors; i++) { 549 ret = request_irq(pci_irq_vector(dev->pdev, i), 550 i == 1 ? pvrdma_intr1_handler : 551 pvrdma_intrx_handler, 552 0, DRV_NAME, dev); 553 if (ret) { 554 dev_err(&dev->pdev->dev, 555 "failed to request interrupt %d\n", i); 556 goto free_irqs; 557 } 558 } 559 560 return 0; 561 562free_irqs: 563 while (--i >= 0) 564 free_irq(pci_irq_vector(dev->pdev, i), dev); 565out_free_vectors: 566 pci_free_irq_vectors(pdev); 567 return ret; 568} 569 570static void pvrdma_free_slots(struct pvrdma_dev *dev) 571{ 572 struct pci_dev *pdev = dev->pdev; 573 574 if (dev->resp_slot) 575 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->resp_slot, 576 dev->dsr->resp_slot_dma); 577 if (dev->cmd_slot) 578 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->cmd_slot, 579 dev->dsr->cmd_slot_dma); 580} 581 582static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, 583 const union ib_gid *gid, 584 u8 gid_type, 585 int index) 586{ 587 int ret; 588 union pvrdma_cmd_req req; 589 struct pvrdma_cmd_create_bind *cmd_bind = &req.create_bind; 590 591 if (!dev->sgid_tbl) { 592 dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); 593 return -EINVAL; 594 } 595 596 memset(cmd_bind, 0, sizeof(*cmd_bind)); 597 cmd_bind->hdr.cmd = PVRDMA_CMD_CREATE_BIND; 598 memcpy(cmd_bind->new_gid, gid->raw, 16); 599 cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024); 600 cmd_bind->vlan = 0xfff; 601 cmd_bind->index = index; 602 cmd_bind->gid_type = gid_type; 603 604 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 605 if (ret < 0) { 606 dev_warn(&dev->pdev->dev, 607 "could not create binding, error: %d\n", ret); 608 return -EFAULT; 609 } 610 memcpy(&dev->sgid_tbl[index], gid, sizeof(*gid)); 611 return 0; 612} 613 614static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context) 615{ 616 struct pvrdma_dev *dev = to_vdev(attr->device); 617 618 return pvrdma_add_gid_at_index(dev, &attr->gid, 619 ib_gid_type_to_pvrdma(attr->gid_type), 620 attr->index); 621} 622 623static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index) 624{ 625 int ret; 626 union pvrdma_cmd_req req; 627 struct pvrdma_cmd_destroy_bind *cmd_dest = &req.destroy_bind; 628 629 /* Update sgid table. */ 630 if (!dev->sgid_tbl) { 631 dev_warn(&dev->pdev->dev, "sgid table not initialized\n"); 632 return -EINVAL; 633 } 634 635 memset(cmd_dest, 0, sizeof(*cmd_dest)); 636 cmd_dest->hdr.cmd = PVRDMA_CMD_DESTROY_BIND; 637 memcpy(cmd_dest->dest_gid, &dev->sgid_tbl[index], 16); 638 cmd_dest->index = index; 639 640 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 641 if (ret < 0) { 642 dev_warn(&dev->pdev->dev, 643 "could not destroy binding, error: %d\n", ret); 644 return ret; 645 } 646 memset(&dev->sgid_tbl[index], 0, 16); 647 return 0; 648} 649 650static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context) 651{ 652 struct pvrdma_dev *dev = to_vdev(attr->device); 653 654 dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s", 655 attr->index, dev->netdev->name); 656 657 return pvrdma_del_gid_at_index(dev, attr->index); 658} 659 660static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, 661 struct net_device *ndev, 662 unsigned long event) 663{ 664 struct pci_dev *pdev_net; 665 unsigned int slot; 666 667 switch (event) { 668 case NETDEV_REBOOT: 669 case NETDEV_DOWN: 670 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); 671 break; 672 case NETDEV_UP: 673 pvrdma_write_reg(dev, PVRDMA_REG_CTL, 674 PVRDMA_DEVICE_CTL_UNQUIESCE); 675 676 mb(); 677 678 if (pvrdma_read_reg(dev, PVRDMA_REG_ERR)) 679 dev_err(&dev->pdev->dev, 680 "failed to activate device during link up\n"); 681 else 682 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); 683 break; 684 case NETDEV_UNREGISTER: 685 ib_device_set_netdev(&dev->ib_dev, NULL, 1); 686 dev_put(dev->netdev); 687 dev->netdev = NULL; 688 break; 689 case NETDEV_REGISTER: 690 /* vmxnet3 will have same bus, slot. But func will be 0 */ 691 slot = PCI_SLOT(dev->pdev->devfn); 692 pdev_net = pci_get_slot(dev->pdev->bus, 693 PCI_DEVFN(slot, 0)); 694 if ((dev->netdev == NULL) && 695 (pci_get_drvdata(pdev_net) == ndev)) { 696 /* this is our netdev */ 697 ib_device_set_netdev(&dev->ib_dev, ndev, 1); 698 dev->netdev = ndev; 699 dev_hold(ndev); 700 } 701 pci_dev_put(pdev_net); 702 break; 703 704 default: 705 dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", 706 event, dev_name(&dev->ib_dev.dev)); 707 break; 708 } 709} 710 711static void pvrdma_netdevice_event_work(struct work_struct *work) 712{ 713 struct pvrdma_netdevice_work *netdev_work; 714 struct pvrdma_dev *dev; 715 716 netdev_work = container_of(work, struct pvrdma_netdevice_work, work); 717 718 mutex_lock(&pvrdma_device_list_lock); 719 list_for_each_entry(dev, &pvrdma_device_list, device_link) { 720 if ((netdev_work->event == NETDEV_REGISTER) || 721 (dev->netdev == netdev_work->event_netdev)) { 722 pvrdma_netdevice_event_handle(dev, 723 netdev_work->event_netdev, 724 netdev_work->event); 725 break; 726 } 727 } 728 mutex_unlock(&pvrdma_device_list_lock); 729 730 kfree(netdev_work); 731} 732 733static int pvrdma_netdevice_event(struct notifier_block *this, 734 unsigned long event, void *ptr) 735{ 736 struct net_device *event_netdev = netdev_notifier_info_to_dev(ptr); 737 struct pvrdma_netdevice_work *netdev_work; 738 739 netdev_work = kmalloc(sizeof(*netdev_work), GFP_ATOMIC); 740 if (!netdev_work) 741 return NOTIFY_BAD; 742 743 INIT_WORK(&netdev_work->work, pvrdma_netdevice_event_work); 744 netdev_work->event_netdev = event_netdev; 745 netdev_work->event = event; 746 queue_work(event_wq, &netdev_work->work); 747 748 return NOTIFY_DONE; 749} 750 751static int pvrdma_pci_probe(struct pci_dev *pdev, 752 const struct pci_device_id *id) 753{ 754 struct pci_dev *pdev_net; 755 struct pvrdma_dev *dev; 756 int ret; 757 unsigned long start; 758 unsigned long len; 759 dma_addr_t slot_dma = 0; 760 761 dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev)); 762 763 /* Allocate zero-out device */ 764 dev = ib_alloc_device(pvrdma_dev, ib_dev); 765 if (!dev) { 766 dev_err(&pdev->dev, "failed to allocate IB device\n"); 767 return -ENOMEM; 768 } 769 770 mutex_lock(&pvrdma_device_list_lock); 771 list_add(&dev->device_link, &pvrdma_device_list); 772 mutex_unlock(&pvrdma_device_list_lock); 773 774 ret = pvrdma_init_device(dev); 775 if (ret) 776 goto err_free_device; 777 778 dev->pdev = pdev; 779 pci_set_drvdata(pdev, dev); 780 781 ret = pci_enable_device(pdev); 782 if (ret) { 783 dev_err(&pdev->dev, "cannot enable PCI device\n"); 784 goto err_free_device; 785 } 786 787 dev_dbg(&pdev->dev, "PCI resource flags BAR0 %#lx\n", 788 pci_resource_flags(pdev, 0)); 789 dev_dbg(&pdev->dev, "PCI resource len %#llx\n", 790 (unsigned long long)pci_resource_len(pdev, 0)); 791 dev_dbg(&pdev->dev, "PCI resource start %#llx\n", 792 (unsigned long long)pci_resource_start(pdev, 0)); 793 dev_dbg(&pdev->dev, "PCI resource flags BAR1 %#lx\n", 794 pci_resource_flags(pdev, 1)); 795 dev_dbg(&pdev->dev, "PCI resource len %#llx\n", 796 (unsigned long long)pci_resource_len(pdev, 1)); 797 dev_dbg(&pdev->dev, "PCI resource start %#llx\n", 798 (unsigned long long)pci_resource_start(pdev, 1)); 799 800 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || 801 !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { 802 dev_err(&pdev->dev, "PCI BAR region not MMIO\n"); 803 ret = -ENOMEM; 804 goto err_disable_pdev; 805 } 806 807 ret = pci_request_regions(pdev, DRV_NAME); 808 if (ret) { 809 dev_err(&pdev->dev, "cannot request PCI resources\n"); 810 goto err_disable_pdev; 811 } 812 813 /* Enable 64-Bit DMA */ 814 ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 815 if (ret) { 816 dev_err(&pdev->dev, "dma_set_mask failed\n"); 817 goto err_free_resource; 818 } 819 dma_set_max_seg_size(&pdev->dev, UINT_MAX); 820 pci_set_master(pdev); 821 822 /* Map register space */ 823 start = pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_REG); 824 len = pci_resource_len(dev->pdev, PVRDMA_PCI_RESOURCE_REG); 825 dev->regs = ioremap(start, len); 826 if (!dev->regs) { 827 dev_err(&pdev->dev, "register mapping failed\n"); 828 ret = -ENOMEM; 829 goto err_free_resource; 830 } 831 832 /* Setup per-device UAR. */ 833 dev->driver_uar.index = 0; 834 dev->driver_uar.pfn = 835 pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >> 836 PAGE_SHIFT; 837 dev->driver_uar.map = 838 ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); 839 if (!dev->driver_uar.map) { 840 dev_err(&pdev->dev, "failed to remap UAR pages\n"); 841 ret = -ENOMEM; 842 goto err_unmap_regs; 843 } 844 845 dev->dsr_version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION); 846 dev_info(&pdev->dev, "device version %d, driver version %d\n", 847 dev->dsr_version, PVRDMA_VERSION); 848 849 dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr), 850 &dev->dsrbase, GFP_KERNEL); 851 if (!dev->dsr) { 852 dev_err(&pdev->dev, "failed to allocate shared region\n"); 853 ret = -ENOMEM; 854 goto err_uar_unmap; 855 } 856 857 /* Setup the shared region */ 858 dev->dsr->driver_version = PVRDMA_VERSION; 859 dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ? 860 PVRDMA_GOS_BITS_32 : 861 PVRDMA_GOS_BITS_64; 862 dev->dsr->gos_info.gos_type = PVRDMA_GOS_TYPE_LINUX; 863 dev->dsr->gos_info.gos_ver = 1; 864 865 if (dev->dsr_version < PVRDMA_PPN64_VERSION) 866 dev->dsr->uar_pfn = dev->driver_uar.pfn; 867 else 868 dev->dsr->uar_pfn64 = dev->driver_uar.pfn; 869 870 /* Command slot. */ 871 dev->cmd_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, 872 &slot_dma, GFP_KERNEL); 873 if (!dev->cmd_slot) { 874 ret = -ENOMEM; 875 goto err_free_dsr; 876 } 877 878 dev->dsr->cmd_slot_dma = (u64)slot_dma; 879 880 /* Response slot. */ 881 dev->resp_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, 882 &slot_dma, GFP_KERNEL); 883 if (!dev->resp_slot) { 884 ret = -ENOMEM; 885 goto err_free_slots; 886 } 887 888 dev->dsr->resp_slot_dma = (u64)slot_dma; 889 890 /* Async event ring */ 891 dev->dsr->async_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES; 892 ret = pvrdma_page_dir_init(dev, &dev->async_pdir, 893 dev->dsr->async_ring_pages.num_pages, true); 894 if (ret) 895 goto err_free_slots; 896 dev->async_ring_state = dev->async_pdir.pages[0]; 897 dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma; 898 899 /* CQ notification ring */ 900 dev->dsr->cq_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES; 901 ret = pvrdma_page_dir_init(dev, &dev->cq_pdir, 902 dev->dsr->cq_ring_pages.num_pages, true); 903 if (ret) 904 goto err_free_async_ring; 905 dev->cq_ring_state = dev->cq_pdir.pages[0]; 906 dev->dsr->cq_ring_pages.pdir_dma = dev->cq_pdir.dir_dma; 907 908 /* 909 * Write the PA of the shared region to the device. The writes must be 910 * ordered such that the high bits are written last. When the writes 911 * complete, the device will have filled out the capabilities. 912 */ 913 914 pvrdma_write_reg(dev, PVRDMA_REG_DSRLOW, (u32)dev->dsrbase); 915 pvrdma_write_reg(dev, PVRDMA_REG_DSRHIGH, 916 (u32)((u64)(dev->dsrbase) >> 32)); 917 918 /* Make sure the write is complete before reading status. */ 919 mb(); 920 921 /* The driver supports RoCE V1 and V2. */ 922 if (!PVRDMA_SUPPORTED(dev)) { 923 dev_err(&pdev->dev, "driver needs RoCE v1 or v2 support\n"); 924 ret = -EFAULT; 925 goto err_free_cq_ring; 926 } 927 928 /* Paired vmxnet3 will have same bus, slot. But func will be 0 */ 929 pdev_net = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0)); 930 if (!pdev_net) { 931 dev_err(&pdev->dev, "failed to find paired net device\n"); 932 ret = -ENODEV; 933 goto err_free_cq_ring; 934 } 935 936 if (pdev_net->vendor != PCI_VENDOR_ID_VMWARE || 937 pdev_net->device != PCI_DEVICE_ID_VMWARE_VMXNET3) { 938 dev_err(&pdev->dev, "failed to find paired vmxnet3 device\n"); 939 pci_dev_put(pdev_net); 940 ret = -ENODEV; 941 goto err_free_cq_ring; 942 } 943 944 dev->netdev = pci_get_drvdata(pdev_net); 945 pci_dev_put(pdev_net); 946 if (!dev->netdev) { 947 dev_err(&pdev->dev, "failed to get vmxnet3 device\n"); 948 ret = -ENODEV; 949 goto err_free_cq_ring; 950 } 951 dev_hold(dev->netdev); 952 953 dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name); 954 955 /* Interrupt setup */ 956 ret = pvrdma_alloc_intrs(dev); 957 if (ret) { 958 dev_err(&pdev->dev, "failed to allocate interrupts\n"); 959 ret = -ENOMEM; 960 goto err_free_cq_ring; 961 } 962 963 /* Allocate UAR table. */ 964 ret = pvrdma_uar_table_init(dev); 965 if (ret) { 966 dev_err(&pdev->dev, "failed to allocate UAR table\n"); 967 ret = -ENOMEM; 968 goto err_free_intrs; 969 } 970 971 /* Allocate GID table */ 972 dev->sgid_tbl = kcalloc(dev->dsr->caps.gid_tbl_len, 973 sizeof(union ib_gid), GFP_KERNEL); 974 if (!dev->sgid_tbl) { 975 ret = -ENOMEM; 976 goto err_free_uar_table; 977 } 978 dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len); 979 980 pvrdma_enable_intrs(dev); 981 982 /* Activate pvrdma device */ 983 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE); 984 985 /* Make sure the write is complete before reading status. */ 986 mb(); 987 988 /* Check if device was successfully activated */ 989 ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR); 990 if (ret != 0) { 991 dev_err(&pdev->dev, "failed to activate device\n"); 992 ret = -EFAULT; 993 goto err_disable_intr; 994 } 995 996 /* Register IB device */ 997 ret = pvrdma_register_device(dev); 998 if (ret) { 999 dev_err(&pdev->dev, "failed to register IB device\n"); 1000 goto err_disable_intr; 1001 } 1002 1003 dev->nb_netdev.notifier_call = pvrdma_netdevice_event; 1004 ret = register_netdevice_notifier(&dev->nb_netdev); 1005 if (ret) { 1006 dev_err(&pdev->dev, "failed to register netdevice events\n"); 1007 goto err_unreg_ibdev; 1008 } 1009 1010 dev_info(&pdev->dev, "attached to device\n"); 1011 return 0; 1012 1013err_unreg_ibdev: 1014 ib_unregister_device(&dev->ib_dev); 1015err_disable_intr: 1016 pvrdma_disable_intrs(dev); 1017 kfree(dev->sgid_tbl); 1018err_free_uar_table: 1019 pvrdma_uar_table_cleanup(dev); 1020err_free_intrs: 1021 pvrdma_free_irq(dev); 1022 pci_free_irq_vectors(pdev); 1023err_free_cq_ring: 1024 if (dev->netdev) { 1025 dev_put(dev->netdev); 1026 dev->netdev = NULL; 1027 } 1028 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); 1029err_free_async_ring: 1030 pvrdma_page_dir_cleanup(dev, &dev->async_pdir); 1031err_free_slots: 1032 pvrdma_free_slots(dev); 1033err_free_dsr: 1034 dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr, 1035 dev->dsrbase); 1036err_uar_unmap: 1037 iounmap(dev->driver_uar.map); 1038err_unmap_regs: 1039 iounmap(dev->regs); 1040err_free_resource: 1041 pci_release_regions(pdev); 1042err_disable_pdev: 1043 pci_disable_device(pdev); 1044 pci_set_drvdata(pdev, NULL); 1045err_free_device: 1046 mutex_lock(&pvrdma_device_list_lock); 1047 list_del(&dev->device_link); 1048 mutex_unlock(&pvrdma_device_list_lock); 1049 ib_dealloc_device(&dev->ib_dev); 1050 return ret; 1051} 1052 1053static void pvrdma_pci_remove(struct pci_dev *pdev) 1054{ 1055 struct pvrdma_dev *dev = pci_get_drvdata(pdev); 1056 1057 if (!dev) 1058 return; 1059 1060 dev_info(&pdev->dev, "detaching from device\n"); 1061 1062 unregister_netdevice_notifier(&dev->nb_netdev); 1063 dev->nb_netdev.notifier_call = NULL; 1064 1065 flush_workqueue(event_wq); 1066 1067 if (dev->netdev) { 1068 dev_put(dev->netdev); 1069 dev->netdev = NULL; 1070 } 1071 1072 /* Unregister ib device */ 1073 ib_unregister_device(&dev->ib_dev); 1074 1075 mutex_lock(&pvrdma_device_list_lock); 1076 list_del(&dev->device_link); 1077 mutex_unlock(&pvrdma_device_list_lock); 1078 1079 pvrdma_disable_intrs(dev); 1080 pvrdma_free_irq(dev); 1081 pci_free_irq_vectors(pdev); 1082 1083 /* Deactivate pvrdma device */ 1084 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET); 1085 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); 1086 pvrdma_page_dir_cleanup(dev, &dev->async_pdir); 1087 pvrdma_free_slots(dev); 1088 dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr, 1089 dev->dsrbase); 1090 1091 iounmap(dev->regs); 1092 kfree(dev->sgid_tbl); 1093 kfree(dev->cq_tbl); 1094 kfree(dev->srq_tbl); 1095 kfree(dev->qp_tbl); 1096 pvrdma_uar_table_cleanup(dev); 1097 iounmap(dev->driver_uar.map); 1098 1099 ib_dealloc_device(&dev->ib_dev); 1100 1101 /* Free pci resources */ 1102 pci_release_regions(pdev); 1103 pci_disable_device(pdev); 1104 pci_set_drvdata(pdev, NULL); 1105} 1106 1107static const struct pci_device_id pvrdma_pci_table[] = { 1108 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), }, 1109 { 0 }, 1110}; 1111 1112MODULE_DEVICE_TABLE(pci, pvrdma_pci_table); 1113 1114static struct pci_driver pvrdma_driver = { 1115 .name = DRV_NAME, 1116 .id_table = pvrdma_pci_table, 1117 .probe = pvrdma_pci_probe, 1118 .remove = pvrdma_pci_remove, 1119}; 1120 1121static int __init pvrdma_init(void) 1122{ 1123 int err; 1124 1125 event_wq = alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM); 1126 if (!event_wq) 1127 return -ENOMEM; 1128 1129 err = pci_register_driver(&pvrdma_driver); 1130 if (err) 1131 destroy_workqueue(event_wq); 1132 1133 return err; 1134} 1135 1136static void __exit pvrdma_cleanup(void) 1137{ 1138 pci_unregister_driver(&pvrdma_driver); 1139 1140 destroy_workqueue(event_wq); 1141} 1142 1143module_init(pvrdma_init); 1144module_exit(pvrdma_cleanup); 1145 1146MODULE_AUTHOR("VMware, Inc"); 1147MODULE_DESCRIPTION("VMware Paravirtual RDMA driver"); 1148MODULE_LICENSE("Dual BSD/GPL");