device.c (53953B)
1// SPDX-License-Identifier: GPL-2.0 2 3/* 4 * Copyright 2016-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8#define pr_fmt(fmt) "habanalabs: " fmt 9 10#include <uapi/misc/habanalabs.h> 11#include "habanalabs.h" 12 13#include <linux/pci.h> 14#include <linux/hwmon.h> 15 16#define HL_RESET_DELAY_USEC 10000 /* 10ms */ 17 18/* 19 * hl_set_dram_bar- sets the bar to allow later access to address 20 * 21 * @hdev: pointer to habanalabs device structure 22 * @addr: the address the caller wants to access. 23 * 24 * @return: the old BAR base address on success, U64_MAX for failure. 25 * The caller should set it back to the old address after use. 26 * 27 * In case the bar space does not cover the whole address space, 28 * the bar base address should be set to allow access to a given address. 29 * This function can be called also if the bar doesn't need to be set, 30 * in that case it just won't change the base. 31 */ 32static uint64_t hl_set_dram_bar(struct hl_device *hdev, u64 addr) 33{ 34 struct asic_fixed_properties *prop = &hdev->asic_prop; 35 u64 bar_base_addr; 36 37 bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull); 38 39 return hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); 40} 41 42 43static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, 44 enum debugfs_access_type acc_type, enum pci_region region_type) 45{ 46 struct pci_mem_region *region = &hdev->pci_mem_region[region_type]; 47 u64 old_base, rc; 48 49 if (region_type == PCI_REGION_DRAM) { 50 old_base = hl_set_dram_bar(hdev, addr); 51 if (old_base == U64_MAX) 52 return -EIO; 53 } 54 55 switch (acc_type) { 56 case DEBUGFS_READ8: 57 *val = readb(hdev->pcie_bar[region->bar_id] + 58 addr - region->region_base + region->offset_in_bar); 59 break; 60 case DEBUGFS_WRITE8: 61 writeb(*val, hdev->pcie_bar[region->bar_id] + 62 addr - region->region_base + region->offset_in_bar); 63 break; 64 case DEBUGFS_READ32: 65 *val = readl(hdev->pcie_bar[region->bar_id] + 66 addr - region->region_base + region->offset_in_bar); 67 break; 68 case DEBUGFS_WRITE32: 69 writel(*val, hdev->pcie_bar[region->bar_id] + 70 addr - region->region_base + region->offset_in_bar); 71 break; 72 case DEBUGFS_READ64: 73 *val = readq(hdev->pcie_bar[region->bar_id] + 74 addr - region->region_base + region->offset_in_bar); 75 break; 76 case DEBUGFS_WRITE64: 77 writeq(*val, hdev->pcie_bar[region->bar_id] + 78 addr - region->region_base + region->offset_in_bar); 79 break; 80 } 81 82 if (region_type == PCI_REGION_DRAM) { 83 rc = hl_set_dram_bar(hdev, old_base); 84 if (rc == U64_MAX) 85 return -EIO; 86 } 87 88 return 0; 89} 90 91int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) 92{ 93 struct asic_fixed_properties *prop = &hdev->asic_prop; 94 struct scatterlist *sg; 95 int rc, i; 96 97 rc = dma_map_sgtable(&hdev->pdev->dev, sgt, dir, 0); 98 if (rc) 99 return rc; 100 101 /* Shift to the device's base physical address of host memory if necessary */ 102 if (prop->device_dma_offset_for_host_access) 103 for_each_sgtable_dma_sg(sgt, sg, i) 104 sg->dma_address += prop->device_dma_offset_for_host_access; 105 106 return 0; 107} 108 109void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) 110{ 111 struct asic_fixed_properties *prop = &hdev->asic_prop; 112 struct scatterlist *sg; 113 int i; 114 115 /* Cancel the device's base physical address of host memory if necessary */ 116 if (prop->device_dma_offset_for_host_access) 117 for_each_sgtable_dma_sg(sgt, sg, i) 118 sg->dma_address -= prop->device_dma_offset_for_host_access; 119 120 dma_unmap_sgtable(&hdev->pdev->dev, sgt, dir, 0); 121} 122 123/* 124 * hl_access_cfg_region - access the config region 125 * 126 * @hdev: pointer to habanalabs device structure 127 * @addr: the address to access 128 * @val: the value to write from or read to 129 * @acc_type: the type of access (read/write 64/32) 130 */ 131int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val, 132 enum debugfs_access_type acc_type) 133{ 134 struct pci_mem_region *cfg_region = &hdev->pci_mem_region[PCI_REGION_CFG]; 135 u32 val_h, val_l; 136 137 if (!IS_ALIGNED(addr, sizeof(u32))) { 138 dev_err(hdev->dev, "address %#llx not a multiple of %zu\n", addr, sizeof(u32)); 139 return -EINVAL; 140 } 141 142 switch (acc_type) { 143 case DEBUGFS_READ32: 144 *val = RREG32(addr - cfg_region->region_base); 145 break; 146 case DEBUGFS_WRITE32: 147 WREG32(addr - cfg_region->region_base, *val); 148 break; 149 case DEBUGFS_READ64: 150 val_l = RREG32(addr - cfg_region->region_base); 151 val_h = RREG32(addr + sizeof(u32) - cfg_region->region_base); 152 153 *val = (((u64) val_h) << 32) | val_l; 154 break; 155 case DEBUGFS_WRITE64: 156 WREG32(addr - cfg_region->region_base, lower_32_bits(*val)); 157 WREG32(addr + sizeof(u32) - cfg_region->region_base, upper_32_bits(*val)); 158 break; 159 default: 160 dev_err(hdev->dev, "access type %d is not supported\n", acc_type); 161 return -EOPNOTSUPP; 162 } 163 164 return 0; 165} 166 167/* 168 * hl_access_dev_mem - access device memory 169 * 170 * @hdev: pointer to habanalabs device structure 171 * @region: the memory region the address belongs to 172 * @region_type: the type of the region the address belongs to 173 * @addr: the address to access 174 * @val: the value to write from or read to 175 * @acc_type: the type of access (r/w, 32/64) 176 */ 177int hl_access_dev_mem(struct hl_device *hdev, struct pci_mem_region *region, 178 enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type) 179{ 180 switch (region_type) { 181 case PCI_REGION_CFG: 182 return hl_access_cfg_region(hdev, addr, val, acc_type); 183 case PCI_REGION_SRAM: 184 case PCI_REGION_DRAM: 185 return hl_access_sram_dram_region(hdev, addr, val, acc_type, 186 region_type); 187 default: 188 return -EFAULT; 189 } 190 191 return 0; 192} 193 194enum hl_device_status hl_device_status(struct hl_device *hdev) 195{ 196 enum hl_device_status status; 197 198 if (hdev->reset_info.in_reset) 199 status = HL_DEVICE_STATUS_IN_RESET; 200 else if (hdev->reset_info.needs_reset) 201 status = HL_DEVICE_STATUS_NEEDS_RESET; 202 else if (hdev->disabled) 203 status = HL_DEVICE_STATUS_MALFUNCTION; 204 else if (!hdev->init_done) 205 status = HL_DEVICE_STATUS_IN_DEVICE_CREATION; 206 else 207 status = HL_DEVICE_STATUS_OPERATIONAL; 208 209 return status; 210} 211 212bool hl_device_operational(struct hl_device *hdev, 213 enum hl_device_status *status) 214{ 215 enum hl_device_status current_status; 216 217 current_status = hl_device_status(hdev); 218 if (status) 219 *status = current_status; 220 221 switch (current_status) { 222 case HL_DEVICE_STATUS_IN_RESET: 223 case HL_DEVICE_STATUS_MALFUNCTION: 224 case HL_DEVICE_STATUS_NEEDS_RESET: 225 return false; 226 case HL_DEVICE_STATUS_OPERATIONAL: 227 case HL_DEVICE_STATUS_IN_DEVICE_CREATION: 228 default: 229 return true; 230 } 231} 232 233static void hpriv_release(struct kref *ref) 234{ 235 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; 236 bool device_is_idle = true; 237 struct hl_fpriv *hpriv; 238 struct hl_device *hdev; 239 240 hpriv = container_of(ref, struct hl_fpriv, refcount); 241 242 hdev = hpriv->hdev; 243 244 put_pid(hpriv->taskpid); 245 246 hl_debugfs_remove_file(hpriv); 247 248 mutex_destroy(&hpriv->restore_phase_mutex); 249 250 if ((!hdev->pldm) && (hdev->pdev) && 251 (!hdev->asic_funcs->is_device_idle(hdev, 252 idle_mask, 253 HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL))) { 254 dev_err(hdev->dev, 255 "device not idle after user context is closed (0x%llx_%llx)\n", 256 idle_mask[1], idle_mask[0]); 257 258 device_is_idle = false; 259 } 260 261 /* We need to remove the user from the list to make sure the reset process won't 262 * try to kill the user process. Because, if we got here, it means there are no 263 * more driver/device resources that the user process is occupying so there is 264 * no need to kill it 265 * 266 * However, we can't set the compute_ctx to NULL at this stage. This is to prevent 267 * a race between the release and opening the device again. We don't want to let 268 * a user open the device while there a reset is about to happen. 269 */ 270 mutex_lock(&hdev->fpriv_list_lock); 271 list_del(&hpriv->dev_node); 272 mutex_unlock(&hdev->fpriv_list_lock); 273 274 if ((hdev->reset_if_device_not_idle && !device_is_idle) 275 || hdev->reset_upon_device_release) 276 hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE); 277 278 /* Now we can mark the compute_ctx as not active. Even if a reset is running in a different 279 * thread, we don't care because the in_reset is marked so if a user will try to open 280 * the device it will fail on that, even if compute_ctx is false. 281 */ 282 mutex_lock(&hdev->fpriv_list_lock); 283 hdev->is_compute_ctx_active = false; 284 mutex_unlock(&hdev->fpriv_list_lock); 285 286 hdev->compute_ctx_in_release = 0; 287 288 /* release the eventfd */ 289 if (hpriv->notifier_event.eventfd) 290 eventfd_ctx_put(hpriv->notifier_event.eventfd); 291 292 mutex_destroy(&hpriv->notifier_event.lock); 293 294 kfree(hpriv); 295} 296 297void hl_hpriv_get(struct hl_fpriv *hpriv) 298{ 299 kref_get(&hpriv->refcount); 300} 301 302int hl_hpriv_put(struct hl_fpriv *hpriv) 303{ 304 return kref_put(&hpriv->refcount, hpriv_release); 305} 306 307/* 308 * hl_device_release - release function for habanalabs device 309 * 310 * @inode: pointer to inode structure 311 * @filp: pointer to file structure 312 * 313 * Called when process closes an habanalabs device 314 */ 315static int hl_device_release(struct inode *inode, struct file *filp) 316{ 317 struct hl_fpriv *hpriv = filp->private_data; 318 struct hl_device *hdev = hpriv->hdev; 319 320 filp->private_data = NULL; 321 322 if (!hdev) { 323 pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n"); 324 put_pid(hpriv->taskpid); 325 return 0; 326 } 327 328 /* Each pending user interrupt holds the user's context, hence we 329 * must release them all before calling hl_ctx_mgr_fini(). 330 */ 331 hl_release_pending_user_interrupts(hpriv->hdev); 332 333 hl_mem_mgr_fini(&hpriv->mem_mgr); 334 hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); 335 336 hdev->compute_ctx_in_release = 1; 337 338 if (!hl_hpriv_put(hpriv)) 339 dev_notice(hdev->dev, 340 "User process closed FD but device still in use\n"); 341 342 hdev->last_open_session_duration_jif = 343 jiffies - hdev->last_successful_open_jif; 344 345 return 0; 346} 347 348static int hl_device_release_ctrl(struct inode *inode, struct file *filp) 349{ 350 struct hl_fpriv *hpriv = filp->private_data; 351 struct hl_device *hdev = hpriv->hdev; 352 353 filp->private_data = NULL; 354 355 if (!hdev) { 356 pr_err("Closing FD after device was removed\n"); 357 goto out; 358 } 359 360 mutex_lock(&hdev->fpriv_ctrl_list_lock); 361 list_del(&hpriv->dev_node); 362 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 363out: 364 /* release the eventfd */ 365 if (hpriv->notifier_event.eventfd) 366 eventfd_ctx_put(hpriv->notifier_event.eventfd); 367 368 mutex_destroy(&hpriv->notifier_event.lock); 369 put_pid(hpriv->taskpid); 370 371 kfree(hpriv); 372 373 return 0; 374} 375 376/* 377 * hl_mmap - mmap function for habanalabs device 378 * 379 * @*filp: pointer to file structure 380 * @*vma: pointer to vm_area_struct of the process 381 * 382 * Called when process does an mmap on habanalabs device. Call the device's mmap 383 * function at the end of the common code. 384 */ 385static int hl_mmap(struct file *filp, struct vm_area_struct *vma) 386{ 387 struct hl_fpriv *hpriv = filp->private_data; 388 struct hl_device *hdev = hpriv->hdev; 389 unsigned long vm_pgoff; 390 391 if (!hdev) { 392 pr_err_ratelimited("Trying to mmap after device was removed! Please close FD\n"); 393 return -ENODEV; 394 } 395 396 vm_pgoff = vma->vm_pgoff; 397 398 switch (vm_pgoff & HL_MMAP_TYPE_MASK) { 399 case HL_MMAP_TYPE_BLOCK: 400 vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff); 401 return hl_hw_block_mmap(hpriv, vma); 402 403 case HL_MMAP_TYPE_CB: 404 case HL_MMAP_TYPE_TS_BUFF: 405 return hl_mem_mgr_mmap(&hpriv->mem_mgr, vma, NULL); 406 } 407 408 return -EINVAL; 409} 410 411static const struct file_operations hl_ops = { 412 .owner = THIS_MODULE, 413 .open = hl_device_open, 414 .release = hl_device_release, 415 .mmap = hl_mmap, 416 .unlocked_ioctl = hl_ioctl, 417 .compat_ioctl = hl_ioctl 418}; 419 420static const struct file_operations hl_ctrl_ops = { 421 .owner = THIS_MODULE, 422 .open = hl_device_open_ctrl, 423 .release = hl_device_release_ctrl, 424 .unlocked_ioctl = hl_ioctl_control, 425 .compat_ioctl = hl_ioctl_control 426}; 427 428static void device_release_func(struct device *dev) 429{ 430 kfree(dev); 431} 432 433/* 434 * device_init_cdev - Initialize cdev and device for habanalabs device 435 * 436 * @hdev: pointer to habanalabs device structure 437 * @hclass: pointer to the class object of the device 438 * @minor: minor number of the specific device 439 * @fpos: file operations to install for this device 440 * @name: name of the device as it will appear in the filesystem 441 * @cdev: pointer to the char device object that will be initialized 442 * @dev: pointer to the device object that will be initialized 443 * 444 * Initialize a cdev and a Linux device for habanalabs's device. 445 */ 446static int device_init_cdev(struct hl_device *hdev, struct class *hclass, 447 int minor, const struct file_operations *fops, 448 char *name, struct cdev *cdev, 449 struct device **dev) 450{ 451 cdev_init(cdev, fops); 452 cdev->owner = THIS_MODULE; 453 454 *dev = kzalloc(sizeof(**dev), GFP_KERNEL); 455 if (!*dev) 456 return -ENOMEM; 457 458 device_initialize(*dev); 459 (*dev)->devt = MKDEV(hdev->major, minor); 460 (*dev)->class = hclass; 461 (*dev)->release = device_release_func; 462 dev_set_drvdata(*dev, hdev); 463 dev_set_name(*dev, "%s", name); 464 465 return 0; 466} 467 468static int device_cdev_sysfs_add(struct hl_device *hdev) 469{ 470 int rc; 471 472 rc = cdev_device_add(&hdev->cdev, hdev->dev); 473 if (rc) { 474 dev_err(hdev->dev, 475 "failed to add a char device to the system\n"); 476 return rc; 477 } 478 479 rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl); 480 if (rc) { 481 dev_err(hdev->dev, 482 "failed to add a control char device to the system\n"); 483 goto delete_cdev_device; 484 } 485 486 /* hl_sysfs_init() must be done after adding the device to the system */ 487 rc = hl_sysfs_init(hdev); 488 if (rc) { 489 dev_err(hdev->dev, "failed to initialize sysfs\n"); 490 goto delete_ctrl_cdev_device; 491 } 492 493 hdev->cdev_sysfs_created = true; 494 495 return 0; 496 497delete_ctrl_cdev_device: 498 cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); 499delete_cdev_device: 500 cdev_device_del(&hdev->cdev, hdev->dev); 501 return rc; 502} 503 504static void device_cdev_sysfs_del(struct hl_device *hdev) 505{ 506 if (!hdev->cdev_sysfs_created) 507 goto put_devices; 508 509 hl_sysfs_fini(hdev); 510 cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); 511 cdev_device_del(&hdev->cdev, hdev->dev); 512 513put_devices: 514 put_device(hdev->dev); 515 put_device(hdev->dev_ctrl); 516} 517 518static void device_hard_reset_pending(struct work_struct *work) 519{ 520 struct hl_device_reset_work *device_reset_work = 521 container_of(work, struct hl_device_reset_work, reset_work.work); 522 struct hl_device *hdev = device_reset_work->hdev; 523 u32 flags; 524 int rc; 525 526 flags = device_reset_work->flags | HL_DRV_RESET_FROM_RESET_THR; 527 528 rc = hl_device_reset(hdev, flags); 529 if ((rc == -EBUSY) && !hdev->device_fini_pending) { 530 dev_info(hdev->dev, 531 "Could not reset device. will try again in %u seconds", 532 HL_PENDING_RESET_PER_SEC); 533 534 queue_delayed_work(device_reset_work->wq, 535 &device_reset_work->reset_work, 536 msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000)); 537 } 538} 539 540/* 541 * device_early_init - do some early initialization for the habanalabs device 542 * 543 * @hdev: pointer to habanalabs device structure 544 * 545 * Install the relevant function pointers and call the early_init function, 546 * if such a function exists 547 */ 548static int device_early_init(struct hl_device *hdev) 549{ 550 int i, rc; 551 char workq_name[32]; 552 553 switch (hdev->asic_type) { 554 case ASIC_GOYA: 555 goya_set_asic_funcs(hdev); 556 strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); 557 break; 558 case ASIC_GAUDI: 559 gaudi_set_asic_funcs(hdev); 560 strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name)); 561 break; 562 case ASIC_GAUDI_SEC: 563 gaudi_set_asic_funcs(hdev); 564 strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name)); 565 break; 566 default: 567 dev_err(hdev->dev, "Unrecognized ASIC type %d\n", 568 hdev->asic_type); 569 return -EINVAL; 570 } 571 572 rc = hdev->asic_funcs->early_init(hdev); 573 if (rc) 574 return rc; 575 576 rc = hl_asid_init(hdev); 577 if (rc) 578 goto early_fini; 579 580 if (hdev->asic_prop.completion_queues_count) { 581 hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, 582 sizeof(*hdev->cq_wq), 583 GFP_KERNEL); 584 if (!hdev->cq_wq) { 585 rc = -ENOMEM; 586 goto asid_fini; 587 } 588 } 589 590 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) { 591 snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i); 592 hdev->cq_wq[i] = create_singlethread_workqueue(workq_name); 593 if (hdev->cq_wq[i] == NULL) { 594 dev_err(hdev->dev, "Failed to allocate CQ workqueue\n"); 595 rc = -ENOMEM; 596 goto free_cq_wq; 597 } 598 } 599 600 hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0); 601 if (hdev->eq_wq == NULL) { 602 dev_err(hdev->dev, "Failed to allocate EQ workqueue\n"); 603 rc = -ENOMEM; 604 goto free_cq_wq; 605 } 606 607 hdev->ts_free_obj_wq = alloc_workqueue("hl-ts-free-obj", WQ_UNBOUND, 0); 608 if (!hdev->ts_free_obj_wq) { 609 dev_err(hdev->dev, 610 "Failed to allocate Timestamp registration free workqueue\n"); 611 rc = -ENOMEM; 612 goto free_eq_wq; 613 } 614 615 hdev->pf_wq = alloc_workqueue("hl-prefetch", WQ_UNBOUND, 0); 616 if (!hdev->pf_wq) { 617 dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n"); 618 rc = -ENOMEM; 619 goto free_ts_free_wq; 620 } 621 622 hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), 623 GFP_KERNEL); 624 if (!hdev->hl_chip_info) { 625 rc = -ENOMEM; 626 goto free_pf_wq; 627 } 628 629 rc = hl_mmu_if_set_funcs(hdev); 630 if (rc) 631 goto free_chip_info; 632 633 hl_mem_mgr_init(hdev->dev, &hdev->kernel_mem_mgr); 634 635 hdev->device_reset_work.wq = 636 create_singlethread_workqueue("hl_device_reset"); 637 if (!hdev->device_reset_work.wq) { 638 rc = -ENOMEM; 639 dev_err(hdev->dev, "Failed to create device reset WQ\n"); 640 goto free_cb_mgr; 641 } 642 643 INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work, 644 device_hard_reset_pending); 645 hdev->device_reset_work.hdev = hdev; 646 hdev->device_fini_pending = 0; 647 648 mutex_init(&hdev->send_cpu_message_lock); 649 mutex_init(&hdev->debug_lock); 650 INIT_LIST_HEAD(&hdev->cs_mirror_list); 651 spin_lock_init(&hdev->cs_mirror_lock); 652 spin_lock_init(&hdev->reset_info.lock); 653 INIT_LIST_HEAD(&hdev->fpriv_list); 654 INIT_LIST_HEAD(&hdev->fpriv_ctrl_list); 655 mutex_init(&hdev->fpriv_list_lock); 656 mutex_init(&hdev->fpriv_ctrl_list_lock); 657 mutex_init(&hdev->clk_throttling.lock); 658 659 return 0; 660 661free_cb_mgr: 662 hl_mem_mgr_fini(&hdev->kernel_mem_mgr); 663free_chip_info: 664 kfree(hdev->hl_chip_info); 665free_pf_wq: 666 destroy_workqueue(hdev->pf_wq); 667free_ts_free_wq: 668 destroy_workqueue(hdev->ts_free_obj_wq); 669free_eq_wq: 670 destroy_workqueue(hdev->eq_wq); 671free_cq_wq: 672 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 673 if (hdev->cq_wq[i]) 674 destroy_workqueue(hdev->cq_wq[i]); 675 kfree(hdev->cq_wq); 676asid_fini: 677 hl_asid_fini(hdev); 678early_fini: 679 if (hdev->asic_funcs->early_fini) 680 hdev->asic_funcs->early_fini(hdev); 681 682 return rc; 683} 684 685/* 686 * device_early_fini - finalize all that was done in device_early_init 687 * 688 * @hdev: pointer to habanalabs device structure 689 * 690 */ 691static void device_early_fini(struct hl_device *hdev) 692{ 693 int i; 694 695 mutex_destroy(&hdev->debug_lock); 696 mutex_destroy(&hdev->send_cpu_message_lock); 697 698 mutex_destroy(&hdev->fpriv_list_lock); 699 mutex_destroy(&hdev->fpriv_ctrl_list_lock); 700 701 mutex_destroy(&hdev->clk_throttling.lock); 702 703 hl_mem_mgr_fini(&hdev->kernel_mem_mgr); 704 705 kfree(hdev->hl_chip_info); 706 707 destroy_workqueue(hdev->pf_wq); 708 destroy_workqueue(hdev->ts_free_obj_wq); 709 destroy_workqueue(hdev->eq_wq); 710 destroy_workqueue(hdev->device_reset_work.wq); 711 712 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 713 destroy_workqueue(hdev->cq_wq[i]); 714 kfree(hdev->cq_wq); 715 716 hl_asid_fini(hdev); 717 718 if (hdev->asic_funcs->early_fini) 719 hdev->asic_funcs->early_fini(hdev); 720} 721 722static void hl_device_heartbeat(struct work_struct *work) 723{ 724 struct hl_device *hdev = container_of(work, struct hl_device, 725 work_heartbeat.work); 726 727 if (!hl_device_operational(hdev, NULL)) 728 goto reschedule; 729 730 if (!hdev->asic_funcs->send_heartbeat(hdev)) 731 goto reschedule; 732 733 if (hl_device_operational(hdev, NULL)) 734 dev_err(hdev->dev, "Device heartbeat failed!\n"); 735 736 hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT); 737 738 return; 739 740reschedule: 741 /* 742 * prev_reset_trigger tracks consecutive fatal h/w errors until first 743 * heartbeat immediately post reset. 744 * If control reached here, then at least one heartbeat work has been 745 * scheduled since last reset/init cycle. 746 * So if the device is not already in reset cycle, reset the flag 747 * prev_reset_trigger as no reset occurred with HL_DRV_RESET_FW_FATAL_ERR 748 * status for at least one heartbeat. From this point driver restarts 749 * tracking future consecutive fatal errors. 750 */ 751 if (!hdev->reset_info.in_reset) 752 hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; 753 754 schedule_delayed_work(&hdev->work_heartbeat, 755 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); 756} 757 758/* 759 * device_late_init - do late stuff initialization for the habanalabs device 760 * 761 * @hdev: pointer to habanalabs device structure 762 * 763 * Do stuff that either needs the device H/W queues to be active or needs 764 * to happen after all the rest of the initialization is finished 765 */ 766static int device_late_init(struct hl_device *hdev) 767{ 768 int rc; 769 770 if (hdev->asic_funcs->late_init) { 771 rc = hdev->asic_funcs->late_init(hdev); 772 if (rc) { 773 dev_err(hdev->dev, 774 "failed late initialization for the H/W\n"); 775 return rc; 776 } 777 } 778 779 hdev->high_pll = hdev->asic_prop.high_pll; 780 781 if (hdev->heartbeat) { 782 INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); 783 schedule_delayed_work(&hdev->work_heartbeat, 784 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); 785 } 786 787 hdev->late_init_done = true; 788 789 return 0; 790} 791 792/* 793 * device_late_fini - finalize all that was done in device_late_init 794 * 795 * @hdev: pointer to habanalabs device structure 796 * 797 */ 798static void device_late_fini(struct hl_device *hdev) 799{ 800 if (!hdev->late_init_done) 801 return; 802 803 if (hdev->heartbeat) 804 cancel_delayed_work_sync(&hdev->work_heartbeat); 805 806 if (hdev->asic_funcs->late_fini) 807 hdev->asic_funcs->late_fini(hdev); 808 809 hdev->late_init_done = false; 810} 811 812int hl_device_utilization(struct hl_device *hdev, u32 *utilization) 813{ 814 u64 max_power, curr_power, dc_power, dividend; 815 int rc; 816 817 max_power = hdev->max_power; 818 dc_power = hdev->asic_prop.dc_power_default; 819 rc = hl_fw_cpucp_power_get(hdev, &curr_power); 820 821 if (rc) 822 return rc; 823 824 curr_power = clamp(curr_power, dc_power, max_power); 825 826 dividend = (curr_power - dc_power) * 100; 827 *utilization = (u32) div_u64(dividend, (max_power - dc_power)); 828 829 return 0; 830} 831 832int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool enable) 833{ 834 int rc = 0; 835 836 mutex_lock(&hdev->debug_lock); 837 838 if (!enable) { 839 if (!hdev->in_debug) { 840 dev_err(hdev->dev, 841 "Failed to disable debug mode because device was not in debug mode\n"); 842 rc = -EFAULT; 843 goto out; 844 } 845 846 if (!hdev->reset_info.hard_reset_pending) 847 hdev->asic_funcs->halt_coresight(hdev, ctx); 848 849 hdev->in_debug = 0; 850 851 goto out; 852 } 853 854 if (hdev->in_debug) { 855 dev_err(hdev->dev, 856 "Failed to enable debug mode because device is already in debug mode\n"); 857 rc = -EFAULT; 858 goto out; 859 } 860 861 hdev->in_debug = 1; 862 863out: 864 mutex_unlock(&hdev->debug_lock); 865 866 return rc; 867} 868 869static void take_release_locks(struct hl_device *hdev) 870{ 871 /* Flush anyone that is inside the critical section of enqueue 872 * jobs to the H/W 873 */ 874 hdev->asic_funcs->hw_queues_lock(hdev); 875 hdev->asic_funcs->hw_queues_unlock(hdev); 876 877 /* Flush processes that are sending message to CPU */ 878 mutex_lock(&hdev->send_cpu_message_lock); 879 mutex_unlock(&hdev->send_cpu_message_lock); 880 881 /* Flush anyone that is inside device open */ 882 mutex_lock(&hdev->fpriv_list_lock); 883 mutex_unlock(&hdev->fpriv_list_lock); 884 mutex_lock(&hdev->fpriv_ctrl_list_lock); 885 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 886} 887 888static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset, 889 bool skip_wq_flush) 890{ 891 if (hard_reset) 892 device_late_fini(hdev); 893 894 /* 895 * Halt the engines and disable interrupts so we won't get any more 896 * completions from H/W and we won't have any accesses from the 897 * H/W to the host machine 898 */ 899 hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset); 900 901 /* Go over all the queues, release all CS and their jobs */ 902 hl_cs_rollback_all(hdev, skip_wq_flush); 903 904 /* flush the MMU prefetch workqueue */ 905 flush_workqueue(hdev->pf_wq); 906 907 /* Release all pending user interrupts, each pending user interrupt 908 * holds a reference to user context 909 */ 910 hl_release_pending_user_interrupts(hdev); 911} 912 913/* 914 * hl_device_suspend - initiate device suspend 915 * 916 * @hdev: pointer to habanalabs device structure 917 * 918 * Puts the hw in the suspend state (all asics). 919 * Returns 0 for success or an error on failure. 920 * Called at driver suspend. 921 */ 922int hl_device_suspend(struct hl_device *hdev) 923{ 924 int rc; 925 926 pci_save_state(hdev->pdev); 927 928 /* Block future CS/VM/JOB completion operations */ 929 spin_lock(&hdev->reset_info.lock); 930 if (hdev->reset_info.in_reset) { 931 spin_unlock(&hdev->reset_info.lock); 932 dev_err(hdev->dev, "Can't suspend while in reset\n"); 933 return -EIO; 934 } 935 hdev->reset_info.in_reset = 1; 936 spin_unlock(&hdev->reset_info.lock); 937 938 /* This blocks all other stuff that is not blocked by in_reset */ 939 hdev->disabled = true; 940 941 take_release_locks(hdev); 942 943 rc = hdev->asic_funcs->suspend(hdev); 944 if (rc) 945 dev_err(hdev->dev, 946 "Failed to disable PCI access of device CPU\n"); 947 948 /* Shut down the device */ 949 pci_disable_device(hdev->pdev); 950 pci_set_power_state(hdev->pdev, PCI_D3hot); 951 952 return 0; 953} 954 955/* 956 * hl_device_resume - initiate device resume 957 * 958 * @hdev: pointer to habanalabs device structure 959 * 960 * Bring the hw back to operating state (all asics). 961 * Returns 0 for success or an error on failure. 962 * Called at driver resume. 963 */ 964int hl_device_resume(struct hl_device *hdev) 965{ 966 int rc; 967 968 pci_set_power_state(hdev->pdev, PCI_D0); 969 pci_restore_state(hdev->pdev); 970 rc = pci_enable_device_mem(hdev->pdev); 971 if (rc) { 972 dev_err(hdev->dev, 973 "Failed to enable PCI device in resume\n"); 974 return rc; 975 } 976 977 pci_set_master(hdev->pdev); 978 979 rc = hdev->asic_funcs->resume(hdev); 980 if (rc) { 981 dev_err(hdev->dev, "Failed to resume device after suspend\n"); 982 goto disable_device; 983 } 984 985 986 /* 'in_reset' was set to true during suspend, now we must clear it in order 987 * for hard reset to be performed 988 */ 989 hdev->reset_info.in_reset = 0; 990 991 rc = hl_device_reset(hdev, HL_DRV_RESET_HARD); 992 if (rc) { 993 dev_err(hdev->dev, "Failed to reset device during resume\n"); 994 goto disable_device; 995 } 996 997 return 0; 998 999disable_device: 1000 pci_clear_master(hdev->pdev); 1001 pci_disable_device(hdev->pdev); 1002 1003 return rc; 1004} 1005 1006static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev) 1007{ 1008 struct task_struct *task = NULL; 1009 struct list_head *fd_list; 1010 struct hl_fpriv *hpriv; 1011 struct mutex *fd_lock; 1012 u32 pending_cnt; 1013 1014 fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; 1015 fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; 1016 1017 /* Giving time for user to close FD, and for processes that are inside 1018 * hl_device_open to finish 1019 */ 1020 if (!list_empty(fd_list)) 1021 ssleep(1); 1022 1023 if (timeout) { 1024 pending_cnt = timeout; 1025 } else { 1026 if (hdev->process_kill_trial_cnt) { 1027 /* Processes have been already killed */ 1028 pending_cnt = 1; 1029 goto wait_for_processes; 1030 } else { 1031 /* Wait a small period after process kill */ 1032 pending_cnt = HL_PENDING_RESET_PER_SEC; 1033 } 1034 } 1035 1036 mutex_lock(fd_lock); 1037 1038 /* This section must be protected because we are dereferencing 1039 * pointers that are freed if the process exits 1040 */ 1041 list_for_each_entry(hpriv, fd_list, dev_node) { 1042 task = get_pid_task(hpriv->taskpid, PIDTYPE_PID); 1043 if (task) { 1044 dev_info(hdev->dev, "Killing user process pid=%d\n", 1045 task_pid_nr(task)); 1046 send_sig(SIGKILL, task, 1); 1047 usleep_range(1000, 10000); 1048 1049 put_task_struct(task); 1050 } else { 1051 /* 1052 * If we got here, it means that process was killed from outside the driver 1053 * right after it started looping on fd_list and before get_pid_task, thus 1054 * we don't need to kill it. 1055 */ 1056 dev_dbg(hdev->dev, 1057 "Can't get task struct for user process, assuming process was killed from outside the driver\n"); 1058 } 1059 } 1060 1061 mutex_unlock(fd_lock); 1062 1063 /* 1064 * We killed the open users, but that doesn't mean they are closed. 1065 * It could be that they are running a long cleanup phase in the driver 1066 * e.g. MMU unmappings, or running other long teardown flow even before 1067 * our cleanup. 1068 * Therefore we need to wait again to make sure they are closed before 1069 * continuing with the reset. 1070 */ 1071 1072wait_for_processes: 1073 while ((!list_empty(fd_list)) && (pending_cnt)) { 1074 dev_dbg(hdev->dev, 1075 "Waiting for all unmap operations to finish before hard reset\n"); 1076 1077 pending_cnt--; 1078 1079 ssleep(1); 1080 } 1081 1082 /* All processes exited successfully */ 1083 if (list_empty(fd_list)) 1084 return 0; 1085 1086 /* Give up waiting for processes to exit */ 1087 if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS) 1088 return -ETIME; 1089 1090 hdev->process_kill_trial_cnt++; 1091 1092 return -EBUSY; 1093} 1094 1095static void device_disable_open_processes(struct hl_device *hdev, bool control_dev) 1096{ 1097 struct list_head *fd_list; 1098 struct hl_fpriv *hpriv; 1099 struct mutex *fd_lock; 1100 1101 fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; 1102 fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; 1103 1104 mutex_lock(fd_lock); 1105 list_for_each_entry(hpriv, fd_list, dev_node) 1106 hpriv->hdev = NULL; 1107 mutex_unlock(fd_lock); 1108} 1109 1110static void handle_reset_trigger(struct hl_device *hdev, u32 flags) 1111{ 1112 u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT; 1113 1114 /* 1115 * 'reset cause' is being updated here, because getting here 1116 * means that it's the 1st time and the last time we're here 1117 * ('in_reset' makes sure of it). This makes sure that 1118 * 'reset_cause' will continue holding its 1st recorded reason! 1119 */ 1120 if (flags & HL_DRV_RESET_HEARTBEAT) { 1121 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT; 1122 cur_reset_trigger = HL_DRV_RESET_HEARTBEAT; 1123 } else if (flags & HL_DRV_RESET_TDR) { 1124 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_TDR; 1125 cur_reset_trigger = HL_DRV_RESET_TDR; 1126 } else if (flags & HL_DRV_RESET_FW_FATAL_ERR) { 1127 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; 1128 cur_reset_trigger = HL_DRV_RESET_FW_FATAL_ERR; 1129 } else { 1130 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; 1131 } 1132 1133 /* 1134 * If reset cause is same twice, then reset_trigger_repeated 1135 * is set and if this reset is due to a fatal FW error 1136 * device is set to an unstable state. 1137 */ 1138 if (hdev->reset_info.prev_reset_trigger != cur_reset_trigger) { 1139 hdev->reset_info.prev_reset_trigger = cur_reset_trigger; 1140 hdev->reset_info.reset_trigger_repeated = 0; 1141 } else { 1142 hdev->reset_info.reset_trigger_repeated = 1; 1143 } 1144 1145 /* If reset is due to heartbeat, device CPU is no responsive in 1146 * which case no point sending PCI disable message to it. 1147 * 1148 * If F/W is performing the reset, no need to send it a message to disable 1149 * PCI access 1150 */ 1151 if ((flags & HL_DRV_RESET_HARD) && 1152 !(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) { 1153 /* Disable PCI access from device F/W so he won't send 1154 * us additional interrupts. We disable MSI/MSI-X at 1155 * the halt_engines function and we can't have the F/W 1156 * sending us interrupts after that. We need to disable 1157 * the access here because if the device is marked 1158 * disable, the message won't be send. Also, in case 1159 * of heartbeat, the device CPU is marked as disable 1160 * so this message won't be sent 1161 */ 1162 if (hl_fw_send_pci_access_msg(hdev, 1163 CPUCP_PACKET_DISABLE_PCI_ACCESS)) 1164 dev_warn(hdev->dev, 1165 "Failed to disable PCI access by F/W\n"); 1166 } 1167} 1168 1169/* 1170 * hl_device_reset - reset the device 1171 * 1172 * @hdev: pointer to habanalabs device structure 1173 * @flags: reset flags. 1174 * 1175 * Block future CS and wait for pending CS to be enqueued 1176 * Call ASIC H/W fini 1177 * Flush all completions 1178 * Re-initialize all internal data structures 1179 * Call ASIC H/W init, late_init 1180 * Test queues 1181 * Enable device 1182 * 1183 * Returns 0 for success or an error on failure. 1184 */ 1185int hl_device_reset(struct hl_device *hdev, u32 flags) 1186{ 1187 bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false, 1188 reset_upon_device_release = false, schedule_hard_reset = false, 1189 skip_wq_flush, delay_reset; 1190 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; 1191 struct hl_ctx *ctx; 1192 int i, rc; 1193 1194 if (!hdev->init_done) { 1195 dev_err(hdev->dev, "Can't reset before initialization is done\n"); 1196 return 0; 1197 } 1198 1199 hard_reset = !!(flags & HL_DRV_RESET_HARD); 1200 from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR); 1201 fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW); 1202 skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE); 1203 delay_reset = !!(flags & HL_DRV_RESET_DELAY); 1204 1205 if (!hard_reset && !hdev->asic_prop.supports_soft_reset) { 1206 hard_instead_soft = true; 1207 hard_reset = true; 1208 } 1209 1210 if (hdev->reset_upon_device_release && (flags & HL_DRV_RESET_DEV_RELEASE)) { 1211 if (hard_reset) { 1212 dev_crit(hdev->dev, 1213 "Aborting reset because hard-reset is mutually exclusive with reset-on-device-release\n"); 1214 return -EINVAL; 1215 } 1216 1217 reset_upon_device_release = true; 1218 1219 goto do_reset; 1220 } 1221 1222 if (!hard_reset && !hdev->asic_prop.allow_inference_soft_reset) { 1223 hard_instead_soft = true; 1224 hard_reset = true; 1225 } 1226 1227 if (hard_instead_soft) 1228 dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n"); 1229 1230do_reset: 1231 /* Re-entry of reset thread */ 1232 if (from_hard_reset_thread && hdev->process_kill_trial_cnt) 1233 goto kill_processes; 1234 1235 /* 1236 * Prevent concurrency in this function - only one reset should be 1237 * done at any given time. Only need to perform this if we didn't 1238 * get from the dedicated hard reset thread 1239 */ 1240 if (!from_hard_reset_thread) { 1241 /* Block future CS/VM/JOB completion operations */ 1242 spin_lock(&hdev->reset_info.lock); 1243 if (hdev->reset_info.in_reset) { 1244 /* We only allow scheduling of a hard reset during soft reset */ 1245 if (hard_reset && hdev->reset_info.is_in_soft_reset) 1246 hdev->reset_info.hard_reset_schedule_flags = flags; 1247 spin_unlock(&hdev->reset_info.lock); 1248 return 0; 1249 } 1250 hdev->reset_info.in_reset = 1; 1251 spin_unlock(&hdev->reset_info.lock); 1252 1253 if (delay_reset) 1254 usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1); 1255 1256 handle_reset_trigger(hdev, flags); 1257 1258 /* This still allows the completion of some KDMA ops */ 1259 hdev->reset_info.is_in_soft_reset = !hard_reset; 1260 1261 /* This also blocks future CS/VM/JOB completion operations */ 1262 hdev->disabled = true; 1263 1264 take_release_locks(hdev); 1265 1266 if (hard_reset) 1267 dev_info(hdev->dev, "Going to reset device\n"); 1268 else if (reset_upon_device_release) 1269 dev_dbg(hdev->dev, "Going to reset device after release by user\n"); 1270 else 1271 dev_dbg(hdev->dev, "Going to reset engines of inference device\n"); 1272 } 1273 1274again: 1275 if ((hard_reset) && (!from_hard_reset_thread)) { 1276 hdev->reset_info.hard_reset_pending = true; 1277 1278 hdev->process_kill_trial_cnt = 0; 1279 1280 hdev->device_reset_work.flags = flags; 1281 1282 /* 1283 * Because the reset function can't run from heartbeat work, 1284 * we need to call the reset function from a dedicated work. 1285 */ 1286 queue_delayed_work(hdev->device_reset_work.wq, 1287 &hdev->device_reset_work.reset_work, 0); 1288 1289 return 0; 1290 } 1291 1292 cleanup_resources(hdev, hard_reset, fw_reset, skip_wq_flush); 1293 1294kill_processes: 1295 if (hard_reset) { 1296 /* Kill processes here after CS rollback. This is because the 1297 * process can't really exit until all its CSs are done, which 1298 * is what we do in cs rollback 1299 */ 1300 rc = device_kill_open_processes(hdev, 0, false); 1301 1302 if (rc == -EBUSY) { 1303 if (hdev->device_fini_pending) { 1304 dev_crit(hdev->dev, 1305 "Failed to kill all open processes, stopping hard reset\n"); 1306 goto out_err; 1307 } 1308 1309 /* signal reset thread to reschedule */ 1310 return rc; 1311 } 1312 1313 if (rc) { 1314 dev_crit(hdev->dev, 1315 "Failed to kill all open processes, stopping hard reset\n"); 1316 goto out_err; 1317 } 1318 1319 /* Flush the Event queue workers to make sure no other thread is 1320 * reading or writing to registers during the reset 1321 */ 1322 flush_workqueue(hdev->eq_wq); 1323 } 1324 1325 /* Reset the H/W. It will be in idle state after this returns */ 1326 hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset); 1327 1328 if (hard_reset) { 1329 hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; 1330 1331 /* Release kernel context */ 1332 if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1) 1333 hdev->kernel_ctx = NULL; 1334 1335 hl_vm_fini(hdev); 1336 hl_mmu_fini(hdev); 1337 hl_eq_reset(hdev, &hdev->event_queue); 1338 } 1339 1340 /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */ 1341 hl_hw_queue_reset(hdev, hard_reset); 1342 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1343 hl_cq_reset(hdev, &hdev->completion_queue[i]); 1344 1345 /* Make sure the context switch phase will run again */ 1346 ctx = hl_get_compute_ctx(hdev); 1347 if (ctx) { 1348 atomic_set(&ctx->thread_ctx_switch_token, 1); 1349 ctx->thread_ctx_switch_wait_token = 0; 1350 hl_ctx_put(ctx); 1351 } 1352 1353 /* Finished tear-down, starting to re-initialize */ 1354 1355 if (hard_reset) { 1356 hdev->device_cpu_disabled = false; 1357 hdev->reset_info.hard_reset_pending = false; 1358 1359 if (hdev->reset_info.reset_trigger_repeated && 1360 (hdev->reset_info.prev_reset_trigger == 1361 HL_DRV_RESET_FW_FATAL_ERR)) { 1362 /* if there 2 back to back resets from FW, 1363 * ensure driver puts the driver in a unusable state 1364 */ 1365 dev_crit(hdev->dev, 1366 "Consecutive FW fatal errors received, stopping hard reset\n"); 1367 rc = -EIO; 1368 goto out_err; 1369 } 1370 1371 if (hdev->kernel_ctx) { 1372 dev_crit(hdev->dev, 1373 "kernel ctx was alive during hard reset, something is terribly wrong\n"); 1374 rc = -EBUSY; 1375 goto out_err; 1376 } 1377 1378 rc = hl_mmu_init(hdev); 1379 if (rc) { 1380 dev_err(hdev->dev, 1381 "Failed to initialize MMU S/W after hard reset\n"); 1382 goto out_err; 1383 } 1384 1385 /* Allocate the kernel context */ 1386 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), 1387 GFP_KERNEL); 1388 if (!hdev->kernel_ctx) { 1389 rc = -ENOMEM; 1390 hl_mmu_fini(hdev); 1391 goto out_err; 1392 } 1393 1394 hdev->is_compute_ctx_active = false; 1395 1396 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); 1397 if (rc) { 1398 dev_err(hdev->dev, 1399 "failed to init kernel ctx in hard reset\n"); 1400 kfree(hdev->kernel_ctx); 1401 hdev->kernel_ctx = NULL; 1402 hl_mmu_fini(hdev); 1403 goto out_err; 1404 } 1405 } 1406 1407 /* Device is now enabled as part of the initialization requires 1408 * communication with the device firmware to get information that 1409 * is required for the initialization itself 1410 */ 1411 hdev->disabled = false; 1412 1413 rc = hdev->asic_funcs->hw_init(hdev); 1414 if (rc) { 1415 dev_err(hdev->dev, "failed to initialize the H/W after reset\n"); 1416 goto out_err; 1417 } 1418 1419 /* If device is not idle fail the reset process */ 1420 if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, 1421 HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { 1422 dev_err(hdev->dev, "device is not idle (mask 0x%llx_%llx) after reset\n", 1423 idle_mask[1], idle_mask[0]); 1424 rc = -EIO; 1425 goto out_err; 1426 } 1427 1428 /* Check that the communication with the device is working */ 1429 rc = hdev->asic_funcs->test_queues(hdev); 1430 if (rc) { 1431 dev_err(hdev->dev, "Failed to detect if device is alive after reset\n"); 1432 goto out_err; 1433 } 1434 1435 if (hard_reset) { 1436 rc = device_late_init(hdev); 1437 if (rc) { 1438 dev_err(hdev->dev, "Failed late init after hard reset\n"); 1439 goto out_err; 1440 } 1441 1442 rc = hl_vm_init(hdev); 1443 if (rc) { 1444 dev_err(hdev->dev, "Failed to init memory module after hard reset\n"); 1445 goto out_err; 1446 } 1447 1448 hl_fw_set_max_power(hdev); 1449 } else { 1450 rc = hdev->asic_funcs->non_hard_reset_late_init(hdev); 1451 if (rc) { 1452 if (reset_upon_device_release) 1453 dev_err(hdev->dev, 1454 "Failed late init in reset after device release\n"); 1455 else 1456 dev_err(hdev->dev, "Failed late init after soft reset\n"); 1457 goto out_err; 1458 } 1459 } 1460 1461 spin_lock(&hdev->reset_info.lock); 1462 hdev->reset_info.is_in_soft_reset = false; 1463 1464 /* Schedule hard reset only if requested and if not already in hard reset. 1465 * We keep 'in_reset' enabled, so no other reset can go in during the hard 1466 * reset schedule 1467 */ 1468 if (!hard_reset && hdev->reset_info.hard_reset_schedule_flags) 1469 schedule_hard_reset = true; 1470 else 1471 hdev->reset_info.in_reset = 0; 1472 1473 spin_unlock(&hdev->reset_info.lock); 1474 1475 hdev->reset_info.needs_reset = false; 1476 1477 if (hard_reset) 1478 dev_info(hdev->dev, "Successfully finished resetting the device\n"); 1479 else 1480 dev_dbg(hdev->dev, "Successfully finished resetting the device\n"); 1481 1482 if (hard_reset) { 1483 hdev->reset_info.hard_reset_cnt++; 1484 1485 /* After reset is done, we are ready to receive events from 1486 * the F/W. We can't do it before because we will ignore events 1487 * and if those events are fatal, we won't know about it and 1488 * the device will be operational although it shouldn't be 1489 */ 1490 hdev->asic_funcs->enable_events_from_fw(hdev); 1491 } else if (!reset_upon_device_release) { 1492 hdev->reset_info.soft_reset_cnt++; 1493 } 1494 1495 if (schedule_hard_reset) { 1496 dev_info(hdev->dev, "Performing hard reset scheduled during soft reset\n"); 1497 flags = hdev->reset_info.hard_reset_schedule_flags; 1498 hdev->reset_info.hard_reset_schedule_flags = 0; 1499 hdev->disabled = true; 1500 hard_reset = true; 1501 handle_reset_trigger(hdev, flags); 1502 goto again; 1503 } 1504 1505 return 0; 1506 1507out_err: 1508 hdev->disabled = true; 1509 hdev->reset_info.is_in_soft_reset = false; 1510 1511 if (hard_reset) { 1512 dev_err(hdev->dev, "Failed to reset! Device is NOT usable\n"); 1513 hdev->reset_info.hard_reset_cnt++; 1514 } else if (reset_upon_device_release) { 1515 dev_err(hdev->dev, "Failed to reset device after user release\n"); 1516 flags |= HL_DRV_RESET_HARD; 1517 flags &= ~HL_DRV_RESET_DEV_RELEASE; 1518 hard_reset = true; 1519 goto again; 1520 } else { 1521 dev_err(hdev->dev, "Failed to do soft-reset\n"); 1522 hdev->reset_info.soft_reset_cnt++; 1523 flags |= HL_DRV_RESET_HARD; 1524 hard_reset = true; 1525 goto again; 1526 } 1527 1528 hdev->reset_info.in_reset = 0; 1529 1530 return rc; 1531} 1532 1533static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event) 1534{ 1535 mutex_lock(¬ifier_event->lock); 1536 notifier_event->events_mask |= event; 1537 if (notifier_event->eventfd) 1538 eventfd_signal(notifier_event->eventfd, 1); 1539 1540 mutex_unlock(¬ifier_event->lock); 1541} 1542 1543/* 1544 * hl_notifier_event_send_all - notify all user processes via eventfd 1545 * 1546 * @hdev: pointer to habanalabs device structure 1547 * @event: the occurred event 1548 * Returns 0 for success or an error on failure. 1549 */ 1550void hl_notifier_event_send_all(struct hl_device *hdev, u64 event) 1551{ 1552 struct hl_fpriv *hpriv; 1553 1554 mutex_lock(&hdev->fpriv_list_lock); 1555 1556 list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) 1557 hl_notifier_event_send(&hpriv->notifier_event, event); 1558 1559 mutex_unlock(&hdev->fpriv_list_lock); 1560 1561 /* control device */ 1562 mutex_lock(&hdev->fpriv_ctrl_list_lock); 1563 1564 list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node) 1565 hl_notifier_event_send(&hpriv->notifier_event, event); 1566 1567 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 1568} 1569 1570/* 1571 * hl_device_init - main initialization function for habanalabs device 1572 * 1573 * @hdev: pointer to habanalabs device structure 1574 * 1575 * Allocate an id for the device, do early initialization and then call the 1576 * ASIC specific initialization functions. Finally, create the cdev and the 1577 * Linux device to expose it to the user 1578 */ 1579int hl_device_init(struct hl_device *hdev, struct class *hclass) 1580{ 1581 int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt; 1582 char *name; 1583 bool add_cdev_sysfs_on_err = false; 1584 1585 name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2); 1586 if (!name) { 1587 rc = -ENOMEM; 1588 goto out_disabled; 1589 } 1590 1591 /* Initialize cdev and device structures */ 1592 rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name, 1593 &hdev->cdev, &hdev->dev); 1594 1595 kfree(name); 1596 1597 if (rc) 1598 goto out_disabled; 1599 1600 name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2); 1601 if (!name) { 1602 rc = -ENOMEM; 1603 goto free_dev; 1604 } 1605 1606 /* Initialize cdev and device structures for control device */ 1607 rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops, 1608 name, &hdev->cdev_ctrl, &hdev->dev_ctrl); 1609 1610 kfree(name); 1611 1612 if (rc) 1613 goto free_dev; 1614 1615 /* Initialize ASIC function pointers and perform early init */ 1616 rc = device_early_init(hdev); 1617 if (rc) 1618 goto free_dev_ctrl; 1619 1620 user_interrupt_cnt = hdev->asic_prop.user_interrupt_count; 1621 1622 if (user_interrupt_cnt) { 1623 hdev->user_interrupt = kcalloc(user_interrupt_cnt, 1624 sizeof(*hdev->user_interrupt), 1625 GFP_KERNEL); 1626 1627 if (!hdev->user_interrupt) { 1628 rc = -ENOMEM; 1629 goto early_fini; 1630 } 1631 } 1632 1633 /* 1634 * Start calling ASIC initialization. First S/W then H/W and finally 1635 * late init 1636 */ 1637 rc = hdev->asic_funcs->sw_init(hdev); 1638 if (rc) 1639 goto user_interrupts_fini; 1640 1641 1642 /* initialize completion structure for multi CS wait */ 1643 hl_multi_cs_completion_init(hdev); 1644 1645 /* 1646 * Initialize the H/W queues. Must be done before hw_init, because 1647 * there the addresses of the kernel queue are being written to the 1648 * registers of the device 1649 */ 1650 rc = hl_hw_queues_create(hdev); 1651 if (rc) { 1652 dev_err(hdev->dev, "failed to initialize kernel queues\n"); 1653 goto sw_fini; 1654 } 1655 1656 cq_cnt = hdev->asic_prop.completion_queues_count; 1657 1658 /* 1659 * Initialize the completion queues. Must be done before hw_init, 1660 * because there the addresses of the completion queues are being 1661 * passed as arguments to request_irq 1662 */ 1663 if (cq_cnt) { 1664 hdev->completion_queue = kcalloc(cq_cnt, 1665 sizeof(*hdev->completion_queue), 1666 GFP_KERNEL); 1667 1668 if (!hdev->completion_queue) { 1669 dev_err(hdev->dev, 1670 "failed to allocate completion queues\n"); 1671 rc = -ENOMEM; 1672 goto hw_queues_destroy; 1673 } 1674 } 1675 1676 for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) { 1677 rc = hl_cq_init(hdev, &hdev->completion_queue[i], 1678 hdev->asic_funcs->get_queue_id_for_cq(hdev, i)); 1679 if (rc) { 1680 dev_err(hdev->dev, 1681 "failed to initialize completion queue\n"); 1682 goto cq_fini; 1683 } 1684 hdev->completion_queue[i].cq_idx = i; 1685 } 1686 1687 /* 1688 * Initialize the event queue. Must be done before hw_init, 1689 * because there the address of the event queue is being 1690 * passed as argument to request_irq 1691 */ 1692 rc = hl_eq_init(hdev, &hdev->event_queue); 1693 if (rc) { 1694 dev_err(hdev->dev, "failed to initialize event queue\n"); 1695 goto cq_fini; 1696 } 1697 1698 /* MMU S/W must be initialized before kernel context is created */ 1699 rc = hl_mmu_init(hdev); 1700 if (rc) { 1701 dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n"); 1702 goto eq_fini; 1703 } 1704 1705 /* Allocate the kernel context */ 1706 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL); 1707 if (!hdev->kernel_ctx) { 1708 rc = -ENOMEM; 1709 goto mmu_fini; 1710 } 1711 1712 hdev->is_compute_ctx_active = false; 1713 1714 hdev->asic_funcs->state_dump_init(hdev); 1715 1716 hl_debugfs_add_device(hdev); 1717 1718 /* debugfs nodes are created in hl_ctx_init so it must be called after 1719 * hl_debugfs_add_device. 1720 */ 1721 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); 1722 if (rc) { 1723 dev_err(hdev->dev, "failed to initialize kernel context\n"); 1724 kfree(hdev->kernel_ctx); 1725 goto remove_device_from_debugfs; 1726 } 1727 1728 rc = hl_cb_pool_init(hdev); 1729 if (rc) { 1730 dev_err(hdev->dev, "failed to initialize CB pool\n"); 1731 goto release_ctx; 1732 } 1733 1734 /* 1735 * From this point, override rc (=0) in case of an error to allow 1736 * debugging (by adding char devices and create sysfs nodes as part of 1737 * the error flow). 1738 */ 1739 add_cdev_sysfs_on_err = true; 1740 1741 /* Device is now enabled as part of the initialization requires 1742 * communication with the device firmware to get information that 1743 * is required for the initialization itself 1744 */ 1745 hdev->disabled = false; 1746 1747 rc = hdev->asic_funcs->hw_init(hdev); 1748 if (rc) { 1749 dev_err(hdev->dev, "failed to initialize the H/W\n"); 1750 rc = 0; 1751 goto out_disabled; 1752 } 1753 1754 /* Check that the communication with the device is working */ 1755 rc = hdev->asic_funcs->test_queues(hdev); 1756 if (rc) { 1757 dev_err(hdev->dev, "Failed to detect if device is alive\n"); 1758 rc = 0; 1759 goto out_disabled; 1760 } 1761 1762 rc = device_late_init(hdev); 1763 if (rc) { 1764 dev_err(hdev->dev, "Failed late initialization\n"); 1765 rc = 0; 1766 goto out_disabled; 1767 } 1768 1769 dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n", 1770 hdev->asic_name, 1771 hdev->asic_prop.dram_size / SZ_1G); 1772 1773 rc = hl_vm_init(hdev); 1774 if (rc) { 1775 dev_err(hdev->dev, "Failed to initialize memory module\n"); 1776 rc = 0; 1777 goto out_disabled; 1778 } 1779 1780 /* 1781 * Expose devices and sysfs nodes to user. 1782 * From here there is no need to add char devices and create sysfs nodes 1783 * in case of an error. 1784 */ 1785 add_cdev_sysfs_on_err = false; 1786 rc = device_cdev_sysfs_add(hdev); 1787 if (rc) { 1788 dev_err(hdev->dev, 1789 "Failed to add char devices and sysfs nodes\n"); 1790 rc = 0; 1791 goto out_disabled; 1792 } 1793 1794 /* Need to call this again because the max power might change, 1795 * depending on card type for certain ASICs 1796 */ 1797 if (hdev->asic_prop.set_max_power_on_device_init) 1798 hl_fw_set_max_power(hdev); 1799 1800 /* 1801 * hl_hwmon_init() must be called after device_late_init(), because only 1802 * there we get the information from the device about which 1803 * hwmon-related sensors the device supports. 1804 * Furthermore, it must be done after adding the device to the system. 1805 */ 1806 rc = hl_hwmon_init(hdev); 1807 if (rc) { 1808 dev_err(hdev->dev, "Failed to initialize hwmon\n"); 1809 rc = 0; 1810 goto out_disabled; 1811 } 1812 1813 dev_notice(hdev->dev, 1814 "Successfully added device to habanalabs driver\n"); 1815 1816 hdev->init_done = true; 1817 1818 /* After initialization is done, we are ready to receive events from 1819 * the F/W. We can't do it before because we will ignore events and if 1820 * those events are fatal, we won't know about it and the device will 1821 * be operational although it shouldn't be 1822 */ 1823 hdev->asic_funcs->enable_events_from_fw(hdev); 1824 1825 return 0; 1826 1827release_ctx: 1828 if (hl_ctx_put(hdev->kernel_ctx) != 1) 1829 dev_err(hdev->dev, 1830 "kernel ctx is still alive on initialization failure\n"); 1831remove_device_from_debugfs: 1832 hl_debugfs_remove_device(hdev); 1833mmu_fini: 1834 hl_mmu_fini(hdev); 1835eq_fini: 1836 hl_eq_fini(hdev, &hdev->event_queue); 1837cq_fini: 1838 for (i = 0 ; i < cq_ready_cnt ; i++) 1839 hl_cq_fini(hdev, &hdev->completion_queue[i]); 1840 kfree(hdev->completion_queue); 1841hw_queues_destroy: 1842 hl_hw_queues_destroy(hdev); 1843sw_fini: 1844 hdev->asic_funcs->sw_fini(hdev); 1845user_interrupts_fini: 1846 kfree(hdev->user_interrupt); 1847early_fini: 1848 device_early_fini(hdev); 1849free_dev_ctrl: 1850 put_device(hdev->dev_ctrl); 1851free_dev: 1852 put_device(hdev->dev); 1853out_disabled: 1854 hdev->disabled = true; 1855 if (add_cdev_sysfs_on_err) 1856 device_cdev_sysfs_add(hdev); 1857 if (hdev->pdev) 1858 dev_err(&hdev->pdev->dev, 1859 "Failed to initialize hl%d. Device is NOT usable !\n", 1860 hdev->id / 2); 1861 else 1862 pr_err("Failed to initialize hl%d. Device is NOT usable !\n", 1863 hdev->id / 2); 1864 1865 return rc; 1866} 1867 1868/* 1869 * hl_device_fini - main tear-down function for habanalabs device 1870 * 1871 * @hdev: pointer to habanalabs device structure 1872 * 1873 * Destroy the device, call ASIC fini functions and release the id 1874 */ 1875void hl_device_fini(struct hl_device *hdev) 1876{ 1877 bool device_in_reset; 1878 ktime_t timeout; 1879 u64 reset_sec; 1880 int i, rc; 1881 1882 dev_info(hdev->dev, "Removing device\n"); 1883 1884 hdev->device_fini_pending = 1; 1885 flush_delayed_work(&hdev->device_reset_work.reset_work); 1886 1887 if (hdev->pldm) 1888 reset_sec = HL_PLDM_HARD_RESET_MAX_TIMEOUT; 1889 else 1890 reset_sec = HL_HARD_RESET_MAX_TIMEOUT; 1891 1892 /* 1893 * This function is competing with the reset function, so try to 1894 * take the reset atomic and if we are already in middle of reset, 1895 * wait until reset function is finished. Reset function is designed 1896 * to always finish. However, in Gaudi, because of all the network 1897 * ports, the hard reset could take between 10-30 seconds 1898 */ 1899 1900 timeout = ktime_add_us(ktime_get(), reset_sec * 1000 * 1000); 1901 1902 spin_lock(&hdev->reset_info.lock); 1903 device_in_reset = !!hdev->reset_info.in_reset; 1904 if (!device_in_reset) 1905 hdev->reset_info.in_reset = 1; 1906 spin_unlock(&hdev->reset_info.lock); 1907 1908 while (device_in_reset) { 1909 usleep_range(50, 200); 1910 1911 spin_lock(&hdev->reset_info.lock); 1912 device_in_reset = !!hdev->reset_info.in_reset; 1913 if (!device_in_reset) 1914 hdev->reset_info.in_reset = 1; 1915 spin_unlock(&hdev->reset_info.lock); 1916 1917 if (ktime_compare(ktime_get(), timeout) > 0) { 1918 dev_crit(hdev->dev, 1919 "Failed to remove device because reset function did not finish\n"); 1920 return; 1921 } 1922 } 1923 1924 /* Disable PCI access from device F/W so it won't send us additional 1925 * interrupts. We disable MSI/MSI-X at the halt_engines function and we 1926 * can't have the F/W sending us interrupts after that. We need to 1927 * disable the access here because if the device is marked disable, the 1928 * message won't be send. Also, in case of heartbeat, the device CPU is 1929 * marked as disable so this message won't be sent 1930 */ 1931 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); 1932 1933 /* Mark device as disabled */ 1934 hdev->disabled = true; 1935 1936 take_release_locks(hdev); 1937 1938 hdev->reset_info.hard_reset_pending = true; 1939 1940 hl_hwmon_fini(hdev); 1941 1942 cleanup_resources(hdev, true, false, false); 1943 1944 /* Kill processes here after CS rollback. This is because the process 1945 * can't really exit until all its CSs are done, which is what we 1946 * do in cs rollback 1947 */ 1948 dev_info(hdev->dev, 1949 "Waiting for all processes to exit (timeout of %u seconds)", 1950 HL_PENDING_RESET_LONG_SEC); 1951 1952 rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC, false); 1953 if (rc) { 1954 dev_crit(hdev->dev, "Failed to kill all open processes\n"); 1955 device_disable_open_processes(hdev, false); 1956 } 1957 1958 rc = device_kill_open_processes(hdev, 0, true); 1959 if (rc) { 1960 dev_crit(hdev->dev, "Failed to kill all control device open processes\n"); 1961 device_disable_open_processes(hdev, true); 1962 } 1963 1964 hl_cb_pool_fini(hdev); 1965 1966 /* Reset the H/W. It will be in idle state after this returns */ 1967 hdev->asic_funcs->hw_fini(hdev, true, false); 1968 1969 hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; 1970 1971 /* Release kernel context */ 1972 if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1)) 1973 dev_err(hdev->dev, "kernel ctx is still alive\n"); 1974 1975 hl_debugfs_remove_device(hdev); 1976 1977 hl_vm_fini(hdev); 1978 1979 hl_mmu_fini(hdev); 1980 1981 hl_eq_fini(hdev, &hdev->event_queue); 1982 1983 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1984 hl_cq_fini(hdev, &hdev->completion_queue[i]); 1985 kfree(hdev->completion_queue); 1986 kfree(hdev->user_interrupt); 1987 1988 hl_hw_queues_destroy(hdev); 1989 1990 /* Call ASIC S/W finalize function */ 1991 hdev->asic_funcs->sw_fini(hdev); 1992 1993 device_early_fini(hdev); 1994 1995 /* Hide devices and sysfs nodes from user */ 1996 device_cdev_sysfs_del(hdev); 1997 1998 pr_info("removed device successfully\n"); 1999} 2000 2001/* 2002 * MMIO register access helper functions. 2003 */ 2004 2005/* 2006 * hl_rreg - Read an MMIO register 2007 * 2008 * @hdev: pointer to habanalabs device structure 2009 * @reg: MMIO register offset (in bytes) 2010 * 2011 * Returns the value of the MMIO register we are asked to read 2012 * 2013 */ 2014inline u32 hl_rreg(struct hl_device *hdev, u32 reg) 2015{ 2016 return readl(hdev->rmmio + reg); 2017} 2018 2019/* 2020 * hl_wreg - Write to an MMIO register 2021 * 2022 * @hdev: pointer to habanalabs device structure 2023 * @reg: MMIO register offset (in bytes) 2024 * @val: 32-bit value 2025 * 2026 * Writes the 32-bit value into the MMIO register 2027 * 2028 */ 2029inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val) 2030{ 2031 writel(val, hdev->rmmio + reg); 2032}