device.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
device.c (53953B)
      1// SPDX-License-Identifier: GPL-2.0
      2
      3/*
      4 * Copyright 2016-2022 HabanaLabs, Ltd.
      5 * All Rights Reserved.
      6 */
      7
      8#define pr_fmt(fmt)			"habanalabs: " fmt
      9
     10#include <uapi/misc/habanalabs.h>
     11#include "habanalabs.h"
     12
     13#include <linux/pci.h>
     14#include <linux/hwmon.h>
     15
     16#define HL_RESET_DELAY_USEC		10000	/* 10ms */
     17
     18/*
     19 * hl_set_dram_bar- sets the bar to allow later access to address
     20 *
     21 * @hdev: pointer to habanalabs device structure
     22 * @addr: the address the caller wants to access.
     23 *
     24 * @return: the old BAR base address on success, U64_MAX for failure.
     25 *	    The caller should set it back to the old address after use.
     26 *
     27 * In case the bar space does not cover the whole address space,
     28 * the bar base address should be set to allow access to a given address.
     29 * This function can be called also if the bar doesn't need to be set,
     30 * in that case it just won't change the base.
     31 */
     32static uint64_t hl_set_dram_bar(struct hl_device *hdev, u64 addr)
     33{
     34	struct asic_fixed_properties *prop = &hdev->asic_prop;
     35	u64 bar_base_addr;
     36
     37	bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull);
     38
     39	return hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);
     40}
     41
     42
     43static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
     44	enum debugfs_access_type acc_type, enum pci_region region_type)
     45{
     46	struct pci_mem_region *region = &hdev->pci_mem_region[region_type];
     47	u64 old_base, rc;
     48
     49	if (region_type == PCI_REGION_DRAM) {
     50		old_base = hl_set_dram_bar(hdev, addr);
     51		if (old_base == U64_MAX)
     52			return -EIO;
     53	}
     54
     55	switch (acc_type) {
     56	case DEBUGFS_READ8:
     57		*val = readb(hdev->pcie_bar[region->bar_id] +
     58			addr - region->region_base + region->offset_in_bar);
     59		break;
     60	case DEBUGFS_WRITE8:
     61		writeb(*val, hdev->pcie_bar[region->bar_id] +
     62			addr - region->region_base + region->offset_in_bar);
     63		break;
     64	case DEBUGFS_READ32:
     65		*val = readl(hdev->pcie_bar[region->bar_id] +
     66			addr - region->region_base + region->offset_in_bar);
     67		break;
     68	case DEBUGFS_WRITE32:
     69		writel(*val, hdev->pcie_bar[region->bar_id] +
     70			addr - region->region_base + region->offset_in_bar);
     71		break;
     72	case DEBUGFS_READ64:
     73		*val = readq(hdev->pcie_bar[region->bar_id] +
     74			addr - region->region_base + region->offset_in_bar);
     75		break;
     76	case DEBUGFS_WRITE64:
     77		writeq(*val, hdev->pcie_bar[region->bar_id] +
     78			addr - region->region_base + region->offset_in_bar);
     79		break;
     80	}
     81
     82	if (region_type == PCI_REGION_DRAM) {
     83		rc = hl_set_dram_bar(hdev, old_base);
     84		if (rc == U64_MAX)
     85			return -EIO;
     86	}
     87
     88	return 0;
     89}
     90
     91int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
     92{
     93	struct asic_fixed_properties *prop = &hdev->asic_prop;
     94	struct scatterlist *sg;
     95	int rc, i;
     96
     97	rc = dma_map_sgtable(&hdev->pdev->dev, sgt, dir, 0);
     98	if (rc)
     99		return rc;
    100
    101	/* Shift to the device's base physical address of host memory if necessary */
    102	if (prop->device_dma_offset_for_host_access)
    103		for_each_sgtable_dma_sg(sgt, sg, i)
    104			sg->dma_address += prop->device_dma_offset_for_host_access;
    105
    106	return 0;
    107}
    108
    109void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
    110{
    111	struct asic_fixed_properties *prop = &hdev->asic_prop;
    112	struct scatterlist *sg;
    113	int i;
    114
    115	/* Cancel the device's base physical address of host memory if necessary */
    116	if (prop->device_dma_offset_for_host_access)
    117		for_each_sgtable_dma_sg(sgt, sg, i)
    118			sg->dma_address -= prop->device_dma_offset_for_host_access;
    119
    120	dma_unmap_sgtable(&hdev->pdev->dev, sgt, dir, 0);
    121}
    122
    123/*
    124 * hl_access_cfg_region - access the config region
    125 *
    126 * @hdev: pointer to habanalabs device structure
    127 * @addr: the address to access
    128 * @val: the value to write from or read to
    129 * @acc_type: the type of access (read/write 64/32)
    130 */
    131int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val,
    132	enum debugfs_access_type acc_type)
    133{
    134	struct pci_mem_region *cfg_region = &hdev->pci_mem_region[PCI_REGION_CFG];
    135	u32 val_h, val_l;
    136
    137	if (!IS_ALIGNED(addr, sizeof(u32))) {
    138		dev_err(hdev->dev, "address %#llx not a multiple of %zu\n", addr, sizeof(u32));
    139		return -EINVAL;
    140	}
    141
    142	switch (acc_type) {
    143	case DEBUGFS_READ32:
    144		*val = RREG32(addr - cfg_region->region_base);
    145		break;
    146	case DEBUGFS_WRITE32:
    147		WREG32(addr - cfg_region->region_base, *val);
    148		break;
    149	case DEBUGFS_READ64:
    150		val_l = RREG32(addr - cfg_region->region_base);
    151		val_h = RREG32(addr + sizeof(u32) - cfg_region->region_base);
    152
    153		*val = (((u64) val_h) << 32) | val_l;
    154		break;
    155	case DEBUGFS_WRITE64:
    156		WREG32(addr - cfg_region->region_base, lower_32_bits(*val));
    157		WREG32(addr + sizeof(u32) - cfg_region->region_base, upper_32_bits(*val));
    158		break;
    159	default:
    160		dev_err(hdev->dev, "access type %d is not supported\n", acc_type);
    161		return -EOPNOTSUPP;
    162	}
    163
    164	return 0;
    165}
    166
    167/*
    168 * hl_access_dev_mem - access device memory
    169 *
    170 * @hdev: pointer to habanalabs device structure
    171 * @region: the memory region the address belongs to
    172 * @region_type: the type of the region the address belongs to
    173 * @addr: the address to access
    174 * @val: the value to write from or read to
    175 * @acc_type: the type of access (r/w, 32/64)
    176 */
    177int hl_access_dev_mem(struct hl_device *hdev, struct pci_mem_region *region,
    178		enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type)
    179{
    180	switch (region_type) {
    181	case PCI_REGION_CFG:
    182		return hl_access_cfg_region(hdev, addr, val, acc_type);
    183	case PCI_REGION_SRAM:
    184	case PCI_REGION_DRAM:
    185		return hl_access_sram_dram_region(hdev, addr, val, acc_type,
    186			region_type);
    187	default:
    188		return -EFAULT;
    189	}
    190
    191	return 0;
    192}
    193
    194enum hl_device_status hl_device_status(struct hl_device *hdev)
    195{
    196	enum hl_device_status status;
    197
    198	if (hdev->reset_info.in_reset)
    199		status = HL_DEVICE_STATUS_IN_RESET;
    200	else if (hdev->reset_info.needs_reset)
    201		status = HL_DEVICE_STATUS_NEEDS_RESET;
    202	else if (hdev->disabled)
    203		status = HL_DEVICE_STATUS_MALFUNCTION;
    204	else if (!hdev->init_done)
    205		status = HL_DEVICE_STATUS_IN_DEVICE_CREATION;
    206	else
    207		status = HL_DEVICE_STATUS_OPERATIONAL;
    208
    209	return status;
    210}
    211
    212bool hl_device_operational(struct hl_device *hdev,
    213		enum hl_device_status *status)
    214{
    215	enum hl_device_status current_status;
    216
    217	current_status = hl_device_status(hdev);
    218	if (status)
    219		*status = current_status;
    220
    221	switch (current_status) {
    222	case HL_DEVICE_STATUS_IN_RESET:
    223	case HL_DEVICE_STATUS_MALFUNCTION:
    224	case HL_DEVICE_STATUS_NEEDS_RESET:
    225		return false;
    226	case HL_DEVICE_STATUS_OPERATIONAL:
    227	case HL_DEVICE_STATUS_IN_DEVICE_CREATION:
    228	default:
    229		return true;
    230	}
    231}
    232
    233static void hpriv_release(struct kref *ref)
    234{
    235	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
    236	bool device_is_idle = true;
    237	struct hl_fpriv *hpriv;
    238	struct hl_device *hdev;
    239
    240	hpriv = container_of(ref, struct hl_fpriv, refcount);
    241
    242	hdev = hpriv->hdev;
    243
    244	put_pid(hpriv->taskpid);
    245
    246	hl_debugfs_remove_file(hpriv);
    247
    248	mutex_destroy(&hpriv->restore_phase_mutex);
    249
    250	if ((!hdev->pldm) && (hdev->pdev) &&
    251			(!hdev->asic_funcs->is_device_idle(hdev,
    252				idle_mask,
    253				HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL))) {
    254		dev_err(hdev->dev,
    255			"device not idle after user context is closed (0x%llx_%llx)\n",
    256			idle_mask[1], idle_mask[0]);
    257
    258		device_is_idle = false;
    259	}
    260
    261	/* We need to remove the user from the list to make sure the reset process won't
    262	 * try to kill the user process. Because, if we got here, it means there are no
    263	 * more driver/device resources that the user process is occupying so there is
    264	 * no need to kill it
    265	 *
    266	 * However, we can't set the compute_ctx to NULL at this stage. This is to prevent
    267	 * a race between the release and opening the device again. We don't want to let
    268	 * a user open the device while there a reset is about to happen.
    269	 */
    270	mutex_lock(&hdev->fpriv_list_lock);
    271	list_del(&hpriv->dev_node);
    272	mutex_unlock(&hdev->fpriv_list_lock);
    273
    274	if ((hdev->reset_if_device_not_idle && !device_is_idle)
    275			|| hdev->reset_upon_device_release)
    276		hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);
    277
    278	/* Now we can mark the compute_ctx as not active. Even if a reset is running in a different
    279	 * thread, we don't care because the in_reset is marked so if a user will try to open
    280	 * the device it will fail on that, even if compute_ctx is false.
    281	 */
    282	mutex_lock(&hdev->fpriv_list_lock);
    283	hdev->is_compute_ctx_active = false;
    284	mutex_unlock(&hdev->fpriv_list_lock);
    285
    286	hdev->compute_ctx_in_release = 0;
    287
    288	/* release the eventfd */
    289	if (hpriv->notifier_event.eventfd)
    290		eventfd_ctx_put(hpriv->notifier_event.eventfd);
    291
    292	mutex_destroy(&hpriv->notifier_event.lock);
    293
    294	kfree(hpriv);
    295}
    296
    297void hl_hpriv_get(struct hl_fpriv *hpriv)
    298{
    299	kref_get(&hpriv->refcount);
    300}
    301
    302int hl_hpriv_put(struct hl_fpriv *hpriv)
    303{
    304	return kref_put(&hpriv->refcount, hpriv_release);
    305}
    306
    307/*
    308 * hl_device_release - release function for habanalabs device
    309 *
    310 * @inode: pointer to inode structure
    311 * @filp: pointer to file structure
    312 *
    313 * Called when process closes an habanalabs device
    314 */
    315static int hl_device_release(struct inode *inode, struct file *filp)
    316{
    317	struct hl_fpriv *hpriv = filp->private_data;
    318	struct hl_device *hdev = hpriv->hdev;
    319
    320	filp->private_data = NULL;
    321
    322	if (!hdev) {
    323		pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n");
    324		put_pid(hpriv->taskpid);
    325		return 0;
    326	}
    327
    328	/* Each pending user interrupt holds the user's context, hence we
    329	 * must release them all before calling hl_ctx_mgr_fini().
    330	 */
    331	hl_release_pending_user_interrupts(hpriv->hdev);
    332
    333	hl_mem_mgr_fini(&hpriv->mem_mgr);
    334	hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
    335
    336	hdev->compute_ctx_in_release = 1;
    337
    338	if (!hl_hpriv_put(hpriv))
    339		dev_notice(hdev->dev,
    340			"User process closed FD but device still in use\n");
    341
    342	hdev->last_open_session_duration_jif =
    343		jiffies - hdev->last_successful_open_jif;
    344
    345	return 0;
    346}
    347
    348static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
    349{
    350	struct hl_fpriv *hpriv = filp->private_data;
    351	struct hl_device *hdev = hpriv->hdev;
    352
    353	filp->private_data = NULL;
    354
    355	if (!hdev) {
    356		pr_err("Closing FD after device was removed\n");
    357		goto out;
    358	}
    359
    360	mutex_lock(&hdev->fpriv_ctrl_list_lock);
    361	list_del(&hpriv->dev_node);
    362	mutex_unlock(&hdev->fpriv_ctrl_list_lock);
    363out:
    364	/* release the eventfd */
    365	if (hpriv->notifier_event.eventfd)
    366		eventfd_ctx_put(hpriv->notifier_event.eventfd);
    367
    368	mutex_destroy(&hpriv->notifier_event.lock);
    369	put_pid(hpriv->taskpid);
    370
    371	kfree(hpriv);
    372
    373	return 0;
    374}
    375
    376/*
    377 * hl_mmap - mmap function for habanalabs device
    378 *
    379 * @*filp: pointer to file structure
    380 * @*vma: pointer to vm_area_struct of the process
    381 *
    382 * Called when process does an mmap on habanalabs device. Call the device's mmap
    383 * function at the end of the common code.
    384 */
    385static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
    386{
    387	struct hl_fpriv *hpriv = filp->private_data;
    388	struct hl_device *hdev = hpriv->hdev;
    389	unsigned long vm_pgoff;
    390
    391	if (!hdev) {
    392		pr_err_ratelimited("Trying to mmap after device was removed! Please close FD\n");
    393		return -ENODEV;
    394	}
    395
    396	vm_pgoff = vma->vm_pgoff;
    397
    398	switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
    399	case HL_MMAP_TYPE_BLOCK:
    400		vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
    401		return hl_hw_block_mmap(hpriv, vma);
    402
    403	case HL_MMAP_TYPE_CB:
    404	case HL_MMAP_TYPE_TS_BUFF:
    405		return hl_mem_mgr_mmap(&hpriv->mem_mgr, vma, NULL);
    406	}
    407
    408	return -EINVAL;
    409}
    410
    411static const struct file_operations hl_ops = {
    412	.owner = THIS_MODULE,
    413	.open = hl_device_open,
    414	.release = hl_device_release,
    415	.mmap = hl_mmap,
    416	.unlocked_ioctl = hl_ioctl,
    417	.compat_ioctl = hl_ioctl
    418};
    419
    420static const struct file_operations hl_ctrl_ops = {
    421	.owner = THIS_MODULE,
    422	.open = hl_device_open_ctrl,
    423	.release = hl_device_release_ctrl,
    424	.unlocked_ioctl = hl_ioctl_control,
    425	.compat_ioctl = hl_ioctl_control
    426};
    427
    428static void device_release_func(struct device *dev)
    429{
    430	kfree(dev);
    431}
    432
    433/*
    434 * device_init_cdev - Initialize cdev and device for habanalabs device
    435 *
    436 * @hdev: pointer to habanalabs device structure
    437 * @hclass: pointer to the class object of the device
    438 * @minor: minor number of the specific device
    439 * @fpos: file operations to install for this device
    440 * @name: name of the device as it will appear in the filesystem
    441 * @cdev: pointer to the char device object that will be initialized
    442 * @dev: pointer to the device object that will be initialized
    443 *
    444 * Initialize a cdev and a Linux device for habanalabs's device.
    445 */
    446static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
    447				int minor, const struct file_operations *fops,
    448				char *name, struct cdev *cdev,
    449				struct device **dev)
    450{
    451	cdev_init(cdev, fops);
    452	cdev->owner = THIS_MODULE;
    453
    454	*dev = kzalloc(sizeof(**dev), GFP_KERNEL);
    455	if (!*dev)
    456		return -ENOMEM;
    457
    458	device_initialize(*dev);
    459	(*dev)->devt = MKDEV(hdev->major, minor);
    460	(*dev)->class = hclass;
    461	(*dev)->release = device_release_func;
    462	dev_set_drvdata(*dev, hdev);
    463	dev_set_name(*dev, "%s", name);
    464
    465	return 0;
    466}
    467
    468static int device_cdev_sysfs_add(struct hl_device *hdev)
    469{
    470	int rc;
    471
    472	rc = cdev_device_add(&hdev->cdev, hdev->dev);
    473	if (rc) {
    474		dev_err(hdev->dev,
    475			"failed to add a char device to the system\n");
    476		return rc;
    477	}
    478
    479	rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
    480	if (rc) {
    481		dev_err(hdev->dev,
    482			"failed to add a control char device to the system\n");
    483		goto delete_cdev_device;
    484	}
    485
    486	/* hl_sysfs_init() must be done after adding the device to the system */
    487	rc = hl_sysfs_init(hdev);
    488	if (rc) {
    489		dev_err(hdev->dev, "failed to initialize sysfs\n");
    490		goto delete_ctrl_cdev_device;
    491	}
    492
    493	hdev->cdev_sysfs_created = true;
    494
    495	return 0;
    496
    497delete_ctrl_cdev_device:
    498	cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
    499delete_cdev_device:
    500	cdev_device_del(&hdev->cdev, hdev->dev);
    501	return rc;
    502}
    503
    504static void device_cdev_sysfs_del(struct hl_device *hdev)
    505{
    506	if (!hdev->cdev_sysfs_created)
    507		goto put_devices;
    508
    509	hl_sysfs_fini(hdev);
    510	cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
    511	cdev_device_del(&hdev->cdev, hdev->dev);
    512
    513put_devices:
    514	put_device(hdev->dev);
    515	put_device(hdev->dev_ctrl);
    516}
    517
    518static void device_hard_reset_pending(struct work_struct *work)
    519{
    520	struct hl_device_reset_work *device_reset_work =
    521		container_of(work, struct hl_device_reset_work, reset_work.work);
    522	struct hl_device *hdev = device_reset_work->hdev;
    523	u32 flags;
    524	int rc;
    525
    526	flags = device_reset_work->flags | HL_DRV_RESET_FROM_RESET_THR;
    527
    528	rc = hl_device_reset(hdev, flags);
    529	if ((rc == -EBUSY) && !hdev->device_fini_pending) {
    530		dev_info(hdev->dev,
    531			"Could not reset device. will try again in %u seconds",
    532			HL_PENDING_RESET_PER_SEC);
    533
    534		queue_delayed_work(device_reset_work->wq,
    535			&device_reset_work->reset_work,
    536			msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000));
    537	}
    538}
    539
    540/*
    541 * device_early_init - do some early initialization for the habanalabs device
    542 *
    543 * @hdev: pointer to habanalabs device structure
    544 *
    545 * Install the relevant function pointers and call the early_init function,
    546 * if such a function exists
    547 */
    548static int device_early_init(struct hl_device *hdev)
    549{
    550	int i, rc;
    551	char workq_name[32];
    552
    553	switch (hdev->asic_type) {
    554	case ASIC_GOYA:
    555		goya_set_asic_funcs(hdev);
    556		strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
    557		break;
    558	case ASIC_GAUDI:
    559		gaudi_set_asic_funcs(hdev);
    560		strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name));
    561		break;
    562	case ASIC_GAUDI_SEC:
    563		gaudi_set_asic_funcs(hdev);
    564		strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name));
    565		break;
    566	default:
    567		dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
    568			hdev->asic_type);
    569		return -EINVAL;
    570	}
    571
    572	rc = hdev->asic_funcs->early_init(hdev);
    573	if (rc)
    574		return rc;
    575
    576	rc = hl_asid_init(hdev);
    577	if (rc)
    578		goto early_fini;
    579
    580	if (hdev->asic_prop.completion_queues_count) {
    581		hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
    582				sizeof(*hdev->cq_wq),
    583				GFP_KERNEL);
    584		if (!hdev->cq_wq) {
    585			rc = -ENOMEM;
    586			goto asid_fini;
    587		}
    588	}
    589
    590	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
    591		snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i);
    592		hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
    593		if (hdev->cq_wq[i] == NULL) {
    594			dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
    595			rc = -ENOMEM;
    596			goto free_cq_wq;
    597		}
    598	}
    599
    600	hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
    601	if (hdev->eq_wq == NULL) {
    602		dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
    603		rc = -ENOMEM;
    604		goto free_cq_wq;
    605	}
    606
    607	hdev->ts_free_obj_wq = alloc_workqueue("hl-ts-free-obj", WQ_UNBOUND, 0);
    608	if (!hdev->ts_free_obj_wq) {
    609		dev_err(hdev->dev,
    610			"Failed to allocate Timestamp registration free workqueue\n");
    611		rc = -ENOMEM;
    612		goto free_eq_wq;
    613	}
    614
    615	hdev->pf_wq = alloc_workqueue("hl-prefetch", WQ_UNBOUND, 0);
    616	if (!hdev->pf_wq) {
    617		dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n");
    618		rc = -ENOMEM;
    619		goto free_ts_free_wq;
    620	}
    621
    622	hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
    623					GFP_KERNEL);
    624	if (!hdev->hl_chip_info) {
    625		rc = -ENOMEM;
    626		goto free_pf_wq;
    627	}
    628
    629	rc = hl_mmu_if_set_funcs(hdev);
    630	if (rc)
    631		goto free_chip_info;
    632
    633	hl_mem_mgr_init(hdev->dev, &hdev->kernel_mem_mgr);
    634
    635	hdev->device_reset_work.wq =
    636			create_singlethread_workqueue("hl_device_reset");
    637	if (!hdev->device_reset_work.wq) {
    638		rc = -ENOMEM;
    639		dev_err(hdev->dev, "Failed to create device reset WQ\n");
    640		goto free_cb_mgr;
    641	}
    642
    643	INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work,
    644			device_hard_reset_pending);
    645	hdev->device_reset_work.hdev = hdev;
    646	hdev->device_fini_pending = 0;
    647
    648	mutex_init(&hdev->send_cpu_message_lock);
    649	mutex_init(&hdev->debug_lock);
    650	INIT_LIST_HEAD(&hdev->cs_mirror_list);
    651	spin_lock_init(&hdev->cs_mirror_lock);
    652	spin_lock_init(&hdev->reset_info.lock);
    653	INIT_LIST_HEAD(&hdev->fpriv_list);
    654	INIT_LIST_HEAD(&hdev->fpriv_ctrl_list);
    655	mutex_init(&hdev->fpriv_list_lock);
    656	mutex_init(&hdev->fpriv_ctrl_list_lock);
    657	mutex_init(&hdev->clk_throttling.lock);
    658
    659	return 0;
    660
    661free_cb_mgr:
    662	hl_mem_mgr_fini(&hdev->kernel_mem_mgr);
    663free_chip_info:
    664	kfree(hdev->hl_chip_info);
    665free_pf_wq:
    666	destroy_workqueue(hdev->pf_wq);
    667free_ts_free_wq:
    668	destroy_workqueue(hdev->ts_free_obj_wq);
    669free_eq_wq:
    670	destroy_workqueue(hdev->eq_wq);
    671free_cq_wq:
    672	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
    673		if (hdev->cq_wq[i])
    674			destroy_workqueue(hdev->cq_wq[i]);
    675	kfree(hdev->cq_wq);
    676asid_fini:
    677	hl_asid_fini(hdev);
    678early_fini:
    679	if (hdev->asic_funcs->early_fini)
    680		hdev->asic_funcs->early_fini(hdev);
    681
    682	return rc;
    683}
    684
    685/*
    686 * device_early_fini - finalize all that was done in device_early_init
    687 *
    688 * @hdev: pointer to habanalabs device structure
    689 *
    690 */
    691static void device_early_fini(struct hl_device *hdev)
    692{
    693	int i;
    694
    695	mutex_destroy(&hdev->debug_lock);
    696	mutex_destroy(&hdev->send_cpu_message_lock);
    697
    698	mutex_destroy(&hdev->fpriv_list_lock);
    699	mutex_destroy(&hdev->fpriv_ctrl_list_lock);
    700
    701	mutex_destroy(&hdev->clk_throttling.lock);
    702
    703	hl_mem_mgr_fini(&hdev->kernel_mem_mgr);
    704
    705	kfree(hdev->hl_chip_info);
    706
    707	destroy_workqueue(hdev->pf_wq);
    708	destroy_workqueue(hdev->ts_free_obj_wq);
    709	destroy_workqueue(hdev->eq_wq);
    710	destroy_workqueue(hdev->device_reset_work.wq);
    711
    712	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
    713		destroy_workqueue(hdev->cq_wq[i]);
    714	kfree(hdev->cq_wq);
    715
    716	hl_asid_fini(hdev);
    717
    718	if (hdev->asic_funcs->early_fini)
    719		hdev->asic_funcs->early_fini(hdev);
    720}
    721
    722static void hl_device_heartbeat(struct work_struct *work)
    723{
    724	struct hl_device *hdev = container_of(work, struct hl_device,
    725						work_heartbeat.work);
    726
    727	if (!hl_device_operational(hdev, NULL))
    728		goto reschedule;
    729
    730	if (!hdev->asic_funcs->send_heartbeat(hdev))
    731		goto reschedule;
    732
    733	if (hl_device_operational(hdev, NULL))
    734		dev_err(hdev->dev, "Device heartbeat failed!\n");
    735
    736	hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT);
    737
    738	return;
    739
    740reschedule:
    741	/*
    742	 * prev_reset_trigger tracks consecutive fatal h/w errors until first
    743	 * heartbeat immediately post reset.
    744	 * If control reached here, then at least one heartbeat work has been
    745	 * scheduled since last reset/init cycle.
    746	 * So if the device is not already in reset cycle, reset the flag
    747	 * prev_reset_trigger as no reset occurred with HL_DRV_RESET_FW_FATAL_ERR
    748	 * status for at least one heartbeat. From this point driver restarts
    749	 * tracking future consecutive fatal errors.
    750	 */
    751	if (!hdev->reset_info.in_reset)
    752		hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
    753
    754	schedule_delayed_work(&hdev->work_heartbeat,
    755			usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
    756}
    757
    758/*
    759 * device_late_init - do late stuff initialization for the habanalabs device
    760 *
    761 * @hdev: pointer to habanalabs device structure
    762 *
    763 * Do stuff that either needs the device H/W queues to be active or needs
    764 * to happen after all the rest of the initialization is finished
    765 */
    766static int device_late_init(struct hl_device *hdev)
    767{
    768	int rc;
    769
    770	if (hdev->asic_funcs->late_init) {
    771		rc = hdev->asic_funcs->late_init(hdev);
    772		if (rc) {
    773			dev_err(hdev->dev,
    774				"failed late initialization for the H/W\n");
    775			return rc;
    776		}
    777	}
    778
    779	hdev->high_pll = hdev->asic_prop.high_pll;
    780
    781	if (hdev->heartbeat) {
    782		INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
    783		schedule_delayed_work(&hdev->work_heartbeat,
    784				usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
    785	}
    786
    787	hdev->late_init_done = true;
    788
    789	return 0;
    790}
    791
    792/*
    793 * device_late_fini - finalize all that was done in device_late_init
    794 *
    795 * @hdev: pointer to habanalabs device structure
    796 *
    797 */
    798static void device_late_fini(struct hl_device *hdev)
    799{
    800	if (!hdev->late_init_done)
    801		return;
    802
    803	if (hdev->heartbeat)
    804		cancel_delayed_work_sync(&hdev->work_heartbeat);
    805
    806	if (hdev->asic_funcs->late_fini)
    807		hdev->asic_funcs->late_fini(hdev);
    808
    809	hdev->late_init_done = false;
    810}
    811
    812int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
    813{
    814	u64 max_power, curr_power, dc_power, dividend;
    815	int rc;
    816
    817	max_power = hdev->max_power;
    818	dc_power = hdev->asic_prop.dc_power_default;
    819	rc = hl_fw_cpucp_power_get(hdev, &curr_power);
    820
    821	if (rc)
    822		return rc;
    823
    824	curr_power = clamp(curr_power, dc_power, max_power);
    825
    826	dividend = (curr_power - dc_power) * 100;
    827	*utilization = (u32) div_u64(dividend, (max_power - dc_power));
    828
    829	return 0;
    830}
    831
    832int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool enable)
    833{
    834	int rc = 0;
    835
    836	mutex_lock(&hdev->debug_lock);
    837
    838	if (!enable) {
    839		if (!hdev->in_debug) {
    840			dev_err(hdev->dev,
    841				"Failed to disable debug mode because device was not in debug mode\n");
    842			rc = -EFAULT;
    843			goto out;
    844		}
    845
    846		if (!hdev->reset_info.hard_reset_pending)
    847			hdev->asic_funcs->halt_coresight(hdev, ctx);
    848
    849		hdev->in_debug = 0;
    850
    851		goto out;
    852	}
    853
    854	if (hdev->in_debug) {
    855		dev_err(hdev->dev,
    856			"Failed to enable debug mode because device is already in debug mode\n");
    857		rc = -EFAULT;
    858		goto out;
    859	}
    860
    861	hdev->in_debug = 1;
    862
    863out:
    864	mutex_unlock(&hdev->debug_lock);
    865
    866	return rc;
    867}
    868
    869static void take_release_locks(struct hl_device *hdev)
    870{
    871	/* Flush anyone that is inside the critical section of enqueue
    872	 * jobs to the H/W
    873	 */
    874	hdev->asic_funcs->hw_queues_lock(hdev);
    875	hdev->asic_funcs->hw_queues_unlock(hdev);
    876
    877	/* Flush processes that are sending message to CPU */
    878	mutex_lock(&hdev->send_cpu_message_lock);
    879	mutex_unlock(&hdev->send_cpu_message_lock);
    880
    881	/* Flush anyone that is inside device open */
    882	mutex_lock(&hdev->fpriv_list_lock);
    883	mutex_unlock(&hdev->fpriv_list_lock);
    884	mutex_lock(&hdev->fpriv_ctrl_list_lock);
    885	mutex_unlock(&hdev->fpriv_ctrl_list_lock);
    886}
    887
    888static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset,
    889				bool skip_wq_flush)
    890{
    891	if (hard_reset)
    892		device_late_fini(hdev);
    893
    894	/*
    895	 * Halt the engines and disable interrupts so we won't get any more
    896	 * completions from H/W and we won't have any accesses from the
    897	 * H/W to the host machine
    898	 */
    899	hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset);
    900
    901	/* Go over all the queues, release all CS and their jobs */
    902	hl_cs_rollback_all(hdev, skip_wq_flush);
    903
    904	/* flush the MMU prefetch workqueue */
    905	flush_workqueue(hdev->pf_wq);
    906
    907	/* Release all pending user interrupts, each pending user interrupt
    908	 * holds a reference to user context
    909	 */
    910	hl_release_pending_user_interrupts(hdev);
    911}
    912
    913/*
    914 * hl_device_suspend - initiate device suspend
    915 *
    916 * @hdev: pointer to habanalabs device structure
    917 *
    918 * Puts the hw in the suspend state (all asics).
    919 * Returns 0 for success or an error on failure.
    920 * Called at driver suspend.
    921 */
    922int hl_device_suspend(struct hl_device *hdev)
    923{
    924	int rc;
    925
    926	pci_save_state(hdev->pdev);
    927
    928	/* Block future CS/VM/JOB completion operations */
    929	spin_lock(&hdev->reset_info.lock);
    930	if (hdev->reset_info.in_reset) {
    931		spin_unlock(&hdev->reset_info.lock);
    932		dev_err(hdev->dev, "Can't suspend while in reset\n");
    933		return -EIO;
    934	}
    935	hdev->reset_info.in_reset = 1;
    936	spin_unlock(&hdev->reset_info.lock);
    937
    938	/* This blocks all other stuff that is not blocked by in_reset */
    939	hdev->disabled = true;
    940
    941	take_release_locks(hdev);
    942
    943	rc = hdev->asic_funcs->suspend(hdev);
    944	if (rc)
    945		dev_err(hdev->dev,
    946			"Failed to disable PCI access of device CPU\n");
    947
    948	/* Shut down the device */
    949	pci_disable_device(hdev->pdev);
    950	pci_set_power_state(hdev->pdev, PCI_D3hot);
    951
    952	return 0;
    953}
    954
    955/*
    956 * hl_device_resume - initiate device resume
    957 *
    958 * @hdev: pointer to habanalabs device structure
    959 *
    960 * Bring the hw back to operating state (all asics).
    961 * Returns 0 for success or an error on failure.
    962 * Called at driver resume.
    963 */
    964int hl_device_resume(struct hl_device *hdev)
    965{
    966	int rc;
    967
    968	pci_set_power_state(hdev->pdev, PCI_D0);
    969	pci_restore_state(hdev->pdev);
    970	rc = pci_enable_device_mem(hdev->pdev);
    971	if (rc) {
    972		dev_err(hdev->dev,
    973			"Failed to enable PCI device in resume\n");
    974		return rc;
    975	}
    976
    977	pci_set_master(hdev->pdev);
    978
    979	rc = hdev->asic_funcs->resume(hdev);
    980	if (rc) {
    981		dev_err(hdev->dev, "Failed to resume device after suspend\n");
    982		goto disable_device;
    983	}
    984
    985
    986	/* 'in_reset' was set to true during suspend, now we must clear it in order
    987	 * for hard reset to be performed
    988	 */
    989	hdev->reset_info.in_reset = 0;
    990
    991	rc = hl_device_reset(hdev, HL_DRV_RESET_HARD);
    992	if (rc) {
    993		dev_err(hdev->dev, "Failed to reset device during resume\n");
    994		goto disable_device;
    995	}
    996
    997	return 0;
    998
    999disable_device:
   1000	pci_clear_master(hdev->pdev);
   1001	pci_disable_device(hdev->pdev);
   1002
   1003	return rc;
   1004}
   1005
   1006static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev)
   1007{
   1008	struct task_struct *task = NULL;
   1009	struct list_head *fd_list;
   1010	struct hl_fpriv	*hpriv;
   1011	struct mutex *fd_lock;
   1012	u32 pending_cnt;
   1013
   1014	fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
   1015	fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
   1016
   1017	/* Giving time for user to close FD, and for processes that are inside
   1018	 * hl_device_open to finish
   1019	 */
   1020	if (!list_empty(fd_list))
   1021		ssleep(1);
   1022
   1023	if (timeout) {
   1024		pending_cnt = timeout;
   1025	} else {
   1026		if (hdev->process_kill_trial_cnt) {
   1027			/* Processes have been already killed */
   1028			pending_cnt = 1;
   1029			goto wait_for_processes;
   1030		} else {
   1031			/* Wait a small period after process kill */
   1032			pending_cnt = HL_PENDING_RESET_PER_SEC;
   1033		}
   1034	}
   1035
   1036	mutex_lock(fd_lock);
   1037
   1038	/* This section must be protected because we are dereferencing
   1039	 * pointers that are freed if the process exits
   1040	 */
   1041	list_for_each_entry(hpriv, fd_list, dev_node) {
   1042		task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
   1043		if (task) {
   1044			dev_info(hdev->dev, "Killing user process pid=%d\n",
   1045				task_pid_nr(task));
   1046			send_sig(SIGKILL, task, 1);
   1047			usleep_range(1000, 10000);
   1048
   1049			put_task_struct(task);
   1050		} else {
   1051			/*
   1052			 * If we got here, it means that process was killed from outside the driver
   1053			 * right after it started looping on fd_list and before get_pid_task, thus
   1054			 * we don't need to kill it.
   1055			 */
   1056			dev_dbg(hdev->dev,
   1057				"Can't get task struct for user process, assuming process was killed from outside the driver\n");
   1058		}
   1059	}
   1060
   1061	mutex_unlock(fd_lock);
   1062
   1063	/*
   1064	 * We killed the open users, but that doesn't mean they are closed.
   1065	 * It could be that they are running a long cleanup phase in the driver
   1066	 * e.g. MMU unmappings, or running other long teardown flow even before
   1067	 * our cleanup.
   1068	 * Therefore we need to wait again to make sure they are closed before
   1069	 * continuing with the reset.
   1070	 */
   1071
   1072wait_for_processes:
   1073	while ((!list_empty(fd_list)) && (pending_cnt)) {
   1074		dev_dbg(hdev->dev,
   1075			"Waiting for all unmap operations to finish before hard reset\n");
   1076
   1077		pending_cnt--;
   1078
   1079		ssleep(1);
   1080	}
   1081
   1082	/* All processes exited successfully */
   1083	if (list_empty(fd_list))
   1084		return 0;
   1085
   1086	/* Give up waiting for processes to exit */
   1087	if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS)
   1088		return -ETIME;
   1089
   1090	hdev->process_kill_trial_cnt++;
   1091
   1092	return -EBUSY;
   1093}
   1094
   1095static void device_disable_open_processes(struct hl_device *hdev, bool control_dev)
   1096{
   1097	struct list_head *fd_list;
   1098	struct hl_fpriv *hpriv;
   1099	struct mutex *fd_lock;
   1100
   1101	fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
   1102	fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
   1103
   1104	mutex_lock(fd_lock);
   1105	list_for_each_entry(hpriv, fd_list, dev_node)
   1106		hpriv->hdev = NULL;
   1107	mutex_unlock(fd_lock);
   1108}
   1109
   1110static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
   1111{
   1112	u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
   1113
   1114	/*
   1115	 * 'reset cause' is being updated here, because getting here
   1116	 * means that it's the 1st time and the last time we're here
   1117	 * ('in_reset' makes sure of it). This makes sure that
   1118	 * 'reset_cause' will continue holding its 1st recorded reason!
   1119	 */
   1120	if (flags & HL_DRV_RESET_HEARTBEAT) {
   1121		hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
   1122		cur_reset_trigger = HL_DRV_RESET_HEARTBEAT;
   1123	} else if (flags & HL_DRV_RESET_TDR) {
   1124		hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_TDR;
   1125		cur_reset_trigger = HL_DRV_RESET_TDR;
   1126	} else if (flags & HL_DRV_RESET_FW_FATAL_ERR) {
   1127		hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
   1128		cur_reset_trigger = HL_DRV_RESET_FW_FATAL_ERR;
   1129	} else {
   1130		hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
   1131	}
   1132
   1133	/*
   1134	 * If reset cause is same twice, then reset_trigger_repeated
   1135	 * is set and if this reset is due to a fatal FW error
   1136	 * device is set to an unstable state.
   1137	 */
   1138	if (hdev->reset_info.prev_reset_trigger != cur_reset_trigger) {
   1139		hdev->reset_info.prev_reset_trigger = cur_reset_trigger;
   1140		hdev->reset_info.reset_trigger_repeated = 0;
   1141	} else {
   1142		hdev->reset_info.reset_trigger_repeated = 1;
   1143	}
   1144
   1145	/* If reset is due to heartbeat, device CPU is no responsive in
   1146	 * which case no point sending PCI disable message to it.
   1147	 *
   1148	 * If F/W is performing the reset, no need to send it a message to disable
   1149	 * PCI access
   1150	 */
   1151	if ((flags & HL_DRV_RESET_HARD) &&
   1152			!(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) {
   1153		/* Disable PCI access from device F/W so he won't send
   1154		 * us additional interrupts. We disable MSI/MSI-X at
   1155		 * the halt_engines function and we can't have the F/W
   1156		 * sending us interrupts after that. We need to disable
   1157		 * the access here because if the device is marked
   1158		 * disable, the message won't be send. Also, in case
   1159		 * of heartbeat, the device CPU is marked as disable
   1160		 * so this message won't be sent
   1161		 */
   1162		if (hl_fw_send_pci_access_msg(hdev,
   1163				CPUCP_PACKET_DISABLE_PCI_ACCESS))
   1164			dev_warn(hdev->dev,
   1165				"Failed to disable PCI access by F/W\n");
   1166	}
   1167}
   1168
   1169/*
   1170 * hl_device_reset - reset the device
   1171 *
   1172 * @hdev: pointer to habanalabs device structure
   1173 * @flags: reset flags.
   1174 *
   1175 * Block future CS and wait for pending CS to be enqueued
   1176 * Call ASIC H/W fini
   1177 * Flush all completions
   1178 * Re-initialize all internal data structures
   1179 * Call ASIC H/W init, late_init
   1180 * Test queues
   1181 * Enable device
   1182 *
   1183 * Returns 0 for success or an error on failure.
   1184 */
   1185int hl_device_reset(struct hl_device *hdev, u32 flags)
   1186{
   1187	bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false,
   1188			reset_upon_device_release = false, schedule_hard_reset = false,
   1189			skip_wq_flush, delay_reset;
   1190	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
   1191	struct hl_ctx *ctx;
   1192	int i, rc;
   1193
   1194	if (!hdev->init_done) {
   1195		dev_err(hdev->dev, "Can't reset before initialization is done\n");
   1196		return 0;
   1197	}
   1198
   1199	hard_reset = !!(flags & HL_DRV_RESET_HARD);
   1200	from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR);
   1201	fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW);
   1202	skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE);
   1203	delay_reset = !!(flags & HL_DRV_RESET_DELAY);
   1204
   1205	if (!hard_reset && !hdev->asic_prop.supports_soft_reset) {
   1206		hard_instead_soft = true;
   1207		hard_reset = true;
   1208	}
   1209
   1210	if (hdev->reset_upon_device_release && (flags & HL_DRV_RESET_DEV_RELEASE)) {
   1211		if (hard_reset) {
   1212			dev_crit(hdev->dev,
   1213				"Aborting reset because hard-reset is mutually exclusive with reset-on-device-release\n");
   1214			return -EINVAL;
   1215		}
   1216
   1217		reset_upon_device_release = true;
   1218
   1219		goto do_reset;
   1220	}
   1221
   1222	if (!hard_reset && !hdev->asic_prop.allow_inference_soft_reset) {
   1223		hard_instead_soft = true;
   1224		hard_reset = true;
   1225	}
   1226
   1227	if (hard_instead_soft)
   1228		dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
   1229
   1230do_reset:
   1231	/* Re-entry of reset thread */
   1232	if (from_hard_reset_thread && hdev->process_kill_trial_cnt)
   1233		goto kill_processes;
   1234
   1235	/*
   1236	 * Prevent concurrency in this function - only one reset should be
   1237	 * done at any given time. Only need to perform this if we didn't
   1238	 * get from the dedicated hard reset thread
   1239	 */
   1240	if (!from_hard_reset_thread) {
   1241		/* Block future CS/VM/JOB completion operations */
   1242		spin_lock(&hdev->reset_info.lock);
   1243		if (hdev->reset_info.in_reset) {
   1244			/* We only allow scheduling of a hard reset during soft reset */
   1245			if (hard_reset && hdev->reset_info.is_in_soft_reset)
   1246				hdev->reset_info.hard_reset_schedule_flags = flags;
   1247			spin_unlock(&hdev->reset_info.lock);
   1248			return 0;
   1249		}
   1250		hdev->reset_info.in_reset = 1;
   1251		spin_unlock(&hdev->reset_info.lock);
   1252
   1253		if (delay_reset)
   1254			usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1);
   1255
   1256		handle_reset_trigger(hdev, flags);
   1257
   1258		/* This still allows the completion of some KDMA ops */
   1259		hdev->reset_info.is_in_soft_reset = !hard_reset;
   1260
   1261		/* This also blocks future CS/VM/JOB completion operations */
   1262		hdev->disabled = true;
   1263
   1264		take_release_locks(hdev);
   1265
   1266		if (hard_reset)
   1267			dev_info(hdev->dev, "Going to reset device\n");
   1268		else if (reset_upon_device_release)
   1269			dev_dbg(hdev->dev, "Going to reset device after release by user\n");
   1270		else
   1271			dev_dbg(hdev->dev, "Going to reset engines of inference device\n");
   1272	}
   1273
   1274again:
   1275	if ((hard_reset) && (!from_hard_reset_thread)) {
   1276		hdev->reset_info.hard_reset_pending = true;
   1277
   1278		hdev->process_kill_trial_cnt = 0;
   1279
   1280		hdev->device_reset_work.flags = flags;
   1281
   1282		/*
   1283		 * Because the reset function can't run from heartbeat work,
   1284		 * we need to call the reset function from a dedicated work.
   1285		 */
   1286		queue_delayed_work(hdev->device_reset_work.wq,
   1287			&hdev->device_reset_work.reset_work, 0);
   1288
   1289		return 0;
   1290	}
   1291
   1292	cleanup_resources(hdev, hard_reset, fw_reset, skip_wq_flush);
   1293
   1294kill_processes:
   1295	if (hard_reset) {
   1296		/* Kill processes here after CS rollback. This is because the
   1297		 * process can't really exit until all its CSs are done, which
   1298		 * is what we do in cs rollback
   1299		 */
   1300		rc = device_kill_open_processes(hdev, 0, false);
   1301
   1302		if (rc == -EBUSY) {
   1303			if (hdev->device_fini_pending) {
   1304				dev_crit(hdev->dev,
   1305					"Failed to kill all open processes, stopping hard reset\n");
   1306				goto out_err;
   1307			}
   1308
   1309			/* signal reset thread to reschedule */
   1310			return rc;
   1311		}
   1312
   1313		if (rc) {
   1314			dev_crit(hdev->dev,
   1315				"Failed to kill all open processes, stopping hard reset\n");
   1316			goto out_err;
   1317		}
   1318
   1319		/* Flush the Event queue workers to make sure no other thread is
   1320		 * reading or writing to registers during the reset
   1321		 */
   1322		flush_workqueue(hdev->eq_wq);
   1323	}
   1324
   1325	/* Reset the H/W. It will be in idle state after this returns */
   1326	hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
   1327
   1328	if (hard_reset) {
   1329		hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
   1330
   1331		/* Release kernel context */
   1332		if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
   1333			hdev->kernel_ctx = NULL;
   1334
   1335		hl_vm_fini(hdev);
   1336		hl_mmu_fini(hdev);
   1337		hl_eq_reset(hdev, &hdev->event_queue);
   1338	}
   1339
   1340	/* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
   1341	hl_hw_queue_reset(hdev, hard_reset);
   1342	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
   1343		hl_cq_reset(hdev, &hdev->completion_queue[i]);
   1344
   1345	/* Make sure the context switch phase will run again */
   1346	ctx = hl_get_compute_ctx(hdev);
   1347	if (ctx) {
   1348		atomic_set(&ctx->thread_ctx_switch_token, 1);
   1349		ctx->thread_ctx_switch_wait_token = 0;
   1350		hl_ctx_put(ctx);
   1351	}
   1352
   1353	/* Finished tear-down, starting to re-initialize */
   1354
   1355	if (hard_reset) {
   1356		hdev->device_cpu_disabled = false;
   1357		hdev->reset_info.hard_reset_pending = false;
   1358
   1359		if (hdev->reset_info.reset_trigger_repeated &&
   1360				(hdev->reset_info.prev_reset_trigger ==
   1361						HL_DRV_RESET_FW_FATAL_ERR)) {
   1362			/* if there 2 back to back resets from FW,
   1363			 * ensure driver puts the driver in a unusable state
   1364			 */
   1365			dev_crit(hdev->dev,
   1366				"Consecutive FW fatal errors received, stopping hard reset\n");
   1367			rc = -EIO;
   1368			goto out_err;
   1369		}
   1370
   1371		if (hdev->kernel_ctx) {
   1372			dev_crit(hdev->dev,
   1373				"kernel ctx was alive during hard reset, something is terribly wrong\n");
   1374			rc = -EBUSY;
   1375			goto out_err;
   1376		}
   1377
   1378		rc = hl_mmu_init(hdev);
   1379		if (rc) {
   1380			dev_err(hdev->dev,
   1381				"Failed to initialize MMU S/W after hard reset\n");
   1382			goto out_err;
   1383		}
   1384
   1385		/* Allocate the kernel context */
   1386		hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
   1387						GFP_KERNEL);
   1388		if (!hdev->kernel_ctx) {
   1389			rc = -ENOMEM;
   1390			hl_mmu_fini(hdev);
   1391			goto out_err;
   1392		}
   1393
   1394		hdev->is_compute_ctx_active = false;
   1395
   1396		rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
   1397		if (rc) {
   1398			dev_err(hdev->dev,
   1399				"failed to init kernel ctx in hard reset\n");
   1400			kfree(hdev->kernel_ctx);
   1401			hdev->kernel_ctx = NULL;
   1402			hl_mmu_fini(hdev);
   1403			goto out_err;
   1404		}
   1405	}
   1406
   1407	/* Device is now enabled as part of the initialization requires
   1408	 * communication with the device firmware to get information that
   1409	 * is required for the initialization itself
   1410	 */
   1411	hdev->disabled = false;
   1412
   1413	rc = hdev->asic_funcs->hw_init(hdev);
   1414	if (rc) {
   1415		dev_err(hdev->dev, "failed to initialize the H/W after reset\n");
   1416		goto out_err;
   1417	}
   1418
   1419	/* If device is not idle fail the reset process */
   1420	if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
   1421			HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
   1422		dev_err(hdev->dev, "device is not idle (mask 0x%llx_%llx) after reset\n",
   1423			idle_mask[1], idle_mask[0]);
   1424		rc = -EIO;
   1425		goto out_err;
   1426	}
   1427
   1428	/* Check that the communication with the device is working */
   1429	rc = hdev->asic_funcs->test_queues(hdev);
   1430	if (rc) {
   1431		dev_err(hdev->dev, "Failed to detect if device is alive after reset\n");
   1432		goto out_err;
   1433	}
   1434
   1435	if (hard_reset) {
   1436		rc = device_late_init(hdev);
   1437		if (rc) {
   1438			dev_err(hdev->dev, "Failed late init after hard reset\n");
   1439			goto out_err;
   1440		}
   1441
   1442		rc = hl_vm_init(hdev);
   1443		if (rc) {
   1444			dev_err(hdev->dev, "Failed to init memory module after hard reset\n");
   1445			goto out_err;
   1446		}
   1447
   1448		hl_fw_set_max_power(hdev);
   1449	} else {
   1450		rc = hdev->asic_funcs->non_hard_reset_late_init(hdev);
   1451		if (rc) {
   1452			if (reset_upon_device_release)
   1453				dev_err(hdev->dev,
   1454					"Failed late init in reset after device release\n");
   1455			else
   1456				dev_err(hdev->dev, "Failed late init after soft reset\n");
   1457			goto out_err;
   1458		}
   1459	}
   1460
   1461	spin_lock(&hdev->reset_info.lock);
   1462	hdev->reset_info.is_in_soft_reset = false;
   1463
   1464	/* Schedule hard reset only if requested and if not already in hard reset.
   1465	 * We keep 'in_reset' enabled, so no other reset can go in during the hard
   1466	 * reset schedule
   1467	 */
   1468	if (!hard_reset && hdev->reset_info.hard_reset_schedule_flags)
   1469		schedule_hard_reset = true;
   1470	else
   1471		hdev->reset_info.in_reset = 0;
   1472
   1473	spin_unlock(&hdev->reset_info.lock);
   1474
   1475	hdev->reset_info.needs_reset = false;
   1476
   1477	if (hard_reset)
   1478		dev_info(hdev->dev, "Successfully finished resetting the device\n");
   1479	else
   1480		dev_dbg(hdev->dev, "Successfully finished resetting the device\n");
   1481
   1482	if (hard_reset) {
   1483		hdev->reset_info.hard_reset_cnt++;
   1484
   1485		/* After reset is done, we are ready to receive events from
   1486		 * the F/W. We can't do it before because we will ignore events
   1487		 * and if those events are fatal, we won't know about it and
   1488		 * the device will be operational although it shouldn't be
   1489		 */
   1490		hdev->asic_funcs->enable_events_from_fw(hdev);
   1491	} else if (!reset_upon_device_release) {
   1492		hdev->reset_info.soft_reset_cnt++;
   1493	}
   1494
   1495	if (schedule_hard_reset) {
   1496		dev_info(hdev->dev, "Performing hard reset scheduled during soft reset\n");
   1497		flags = hdev->reset_info.hard_reset_schedule_flags;
   1498		hdev->reset_info.hard_reset_schedule_flags = 0;
   1499		hdev->disabled = true;
   1500		hard_reset = true;
   1501		handle_reset_trigger(hdev, flags);
   1502		goto again;
   1503	}
   1504
   1505	return 0;
   1506
   1507out_err:
   1508	hdev->disabled = true;
   1509	hdev->reset_info.is_in_soft_reset = false;
   1510
   1511	if (hard_reset) {
   1512		dev_err(hdev->dev, "Failed to reset! Device is NOT usable\n");
   1513		hdev->reset_info.hard_reset_cnt++;
   1514	} else if (reset_upon_device_release) {
   1515		dev_err(hdev->dev, "Failed to reset device after user release\n");
   1516		flags |= HL_DRV_RESET_HARD;
   1517		flags &= ~HL_DRV_RESET_DEV_RELEASE;
   1518		hard_reset = true;
   1519		goto again;
   1520	} else {
   1521		dev_err(hdev->dev, "Failed to do soft-reset\n");
   1522		hdev->reset_info.soft_reset_cnt++;
   1523		flags |= HL_DRV_RESET_HARD;
   1524		hard_reset = true;
   1525		goto again;
   1526	}
   1527
   1528	hdev->reset_info.in_reset = 0;
   1529
   1530	return rc;
   1531}
   1532
   1533static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event)
   1534{
   1535	mutex_lock(&notifier_event->lock);
   1536	notifier_event->events_mask |= event;
   1537	if (notifier_event->eventfd)
   1538		eventfd_signal(notifier_event->eventfd, 1);
   1539
   1540	mutex_unlock(&notifier_event->lock);
   1541}
   1542
   1543/*
   1544 * hl_notifier_event_send_all - notify all user processes via eventfd
   1545 *
   1546 * @hdev: pointer to habanalabs device structure
   1547 * @event: the occurred event
   1548 * Returns 0 for success or an error on failure.
   1549 */
   1550void hl_notifier_event_send_all(struct hl_device *hdev, u64 event)
   1551{
   1552	struct hl_fpriv	*hpriv;
   1553
   1554	mutex_lock(&hdev->fpriv_list_lock);
   1555
   1556	list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node)
   1557		hl_notifier_event_send(&hpriv->notifier_event, event);
   1558
   1559	mutex_unlock(&hdev->fpriv_list_lock);
   1560
   1561	/* control device */
   1562	mutex_lock(&hdev->fpriv_ctrl_list_lock);
   1563
   1564	list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node)
   1565		hl_notifier_event_send(&hpriv->notifier_event, event);
   1566
   1567	mutex_unlock(&hdev->fpriv_ctrl_list_lock);
   1568}
   1569
   1570/*
   1571 * hl_device_init - main initialization function for habanalabs device
   1572 *
   1573 * @hdev: pointer to habanalabs device structure
   1574 *
   1575 * Allocate an id for the device, do early initialization and then call the
   1576 * ASIC specific initialization functions. Finally, create the cdev and the
   1577 * Linux device to expose it to the user
   1578 */
   1579int hl_device_init(struct hl_device *hdev, struct class *hclass)
   1580{
   1581	int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
   1582	char *name;
   1583	bool add_cdev_sysfs_on_err = false;
   1584
   1585	name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
   1586	if (!name) {
   1587		rc = -ENOMEM;
   1588		goto out_disabled;
   1589	}
   1590
   1591	/* Initialize cdev and device structures */
   1592	rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
   1593				&hdev->cdev, &hdev->dev);
   1594
   1595	kfree(name);
   1596
   1597	if (rc)
   1598		goto out_disabled;
   1599
   1600	name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
   1601	if (!name) {
   1602		rc = -ENOMEM;
   1603		goto free_dev;
   1604	}
   1605
   1606	/* Initialize cdev and device structures for control device */
   1607	rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
   1608				name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
   1609
   1610	kfree(name);
   1611
   1612	if (rc)
   1613		goto free_dev;
   1614
   1615	/* Initialize ASIC function pointers and perform early init */
   1616	rc = device_early_init(hdev);
   1617	if (rc)
   1618		goto free_dev_ctrl;
   1619
   1620	user_interrupt_cnt = hdev->asic_prop.user_interrupt_count;
   1621
   1622	if (user_interrupt_cnt) {
   1623		hdev->user_interrupt = kcalloc(user_interrupt_cnt,
   1624				sizeof(*hdev->user_interrupt),
   1625				GFP_KERNEL);
   1626
   1627		if (!hdev->user_interrupt) {
   1628			rc = -ENOMEM;
   1629			goto early_fini;
   1630		}
   1631	}
   1632
   1633	/*
   1634	 * Start calling ASIC initialization. First S/W then H/W and finally
   1635	 * late init
   1636	 */
   1637	rc = hdev->asic_funcs->sw_init(hdev);
   1638	if (rc)
   1639		goto user_interrupts_fini;
   1640
   1641
   1642	/* initialize completion structure for multi CS wait */
   1643	hl_multi_cs_completion_init(hdev);
   1644
   1645	/*
   1646	 * Initialize the H/W queues. Must be done before hw_init, because
   1647	 * there the addresses of the kernel queue are being written to the
   1648	 * registers of the device
   1649	 */
   1650	rc = hl_hw_queues_create(hdev);
   1651	if (rc) {
   1652		dev_err(hdev->dev, "failed to initialize kernel queues\n");
   1653		goto sw_fini;
   1654	}
   1655
   1656	cq_cnt = hdev->asic_prop.completion_queues_count;
   1657
   1658	/*
   1659	 * Initialize the completion queues. Must be done before hw_init,
   1660	 * because there the addresses of the completion queues are being
   1661	 * passed as arguments to request_irq
   1662	 */
   1663	if (cq_cnt) {
   1664		hdev->completion_queue = kcalloc(cq_cnt,
   1665				sizeof(*hdev->completion_queue),
   1666				GFP_KERNEL);
   1667
   1668		if (!hdev->completion_queue) {
   1669			dev_err(hdev->dev,
   1670				"failed to allocate completion queues\n");
   1671			rc = -ENOMEM;
   1672			goto hw_queues_destroy;
   1673		}
   1674	}
   1675
   1676	for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
   1677		rc = hl_cq_init(hdev, &hdev->completion_queue[i],
   1678				hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
   1679		if (rc) {
   1680			dev_err(hdev->dev,
   1681				"failed to initialize completion queue\n");
   1682			goto cq_fini;
   1683		}
   1684		hdev->completion_queue[i].cq_idx = i;
   1685	}
   1686
   1687	/*
   1688	 * Initialize the event queue. Must be done before hw_init,
   1689	 * because there the address of the event queue is being
   1690	 * passed as argument to request_irq
   1691	 */
   1692	rc = hl_eq_init(hdev, &hdev->event_queue);
   1693	if (rc) {
   1694		dev_err(hdev->dev, "failed to initialize event queue\n");
   1695		goto cq_fini;
   1696	}
   1697
   1698	/* MMU S/W must be initialized before kernel context is created */
   1699	rc = hl_mmu_init(hdev);
   1700	if (rc) {
   1701		dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
   1702		goto eq_fini;
   1703	}
   1704
   1705	/* Allocate the kernel context */
   1706	hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
   1707	if (!hdev->kernel_ctx) {
   1708		rc = -ENOMEM;
   1709		goto mmu_fini;
   1710	}
   1711
   1712	hdev->is_compute_ctx_active = false;
   1713
   1714	hdev->asic_funcs->state_dump_init(hdev);
   1715
   1716	hl_debugfs_add_device(hdev);
   1717
   1718	/* debugfs nodes are created in hl_ctx_init so it must be called after
   1719	 * hl_debugfs_add_device.
   1720	 */
   1721	rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
   1722	if (rc) {
   1723		dev_err(hdev->dev, "failed to initialize kernel context\n");
   1724		kfree(hdev->kernel_ctx);
   1725		goto remove_device_from_debugfs;
   1726	}
   1727
   1728	rc = hl_cb_pool_init(hdev);
   1729	if (rc) {
   1730		dev_err(hdev->dev, "failed to initialize CB pool\n");
   1731		goto release_ctx;
   1732	}
   1733
   1734	/*
   1735	 * From this point, override rc (=0) in case of an error to allow
   1736	 * debugging (by adding char devices and create sysfs nodes as part of
   1737	 * the error flow).
   1738	 */
   1739	add_cdev_sysfs_on_err = true;
   1740
   1741	/* Device is now enabled as part of the initialization requires
   1742	 * communication with the device firmware to get information that
   1743	 * is required for the initialization itself
   1744	 */
   1745	hdev->disabled = false;
   1746
   1747	rc = hdev->asic_funcs->hw_init(hdev);
   1748	if (rc) {
   1749		dev_err(hdev->dev, "failed to initialize the H/W\n");
   1750		rc = 0;
   1751		goto out_disabled;
   1752	}
   1753
   1754	/* Check that the communication with the device is working */
   1755	rc = hdev->asic_funcs->test_queues(hdev);
   1756	if (rc) {
   1757		dev_err(hdev->dev, "Failed to detect if device is alive\n");
   1758		rc = 0;
   1759		goto out_disabled;
   1760	}
   1761
   1762	rc = device_late_init(hdev);
   1763	if (rc) {
   1764		dev_err(hdev->dev, "Failed late initialization\n");
   1765		rc = 0;
   1766		goto out_disabled;
   1767	}
   1768
   1769	dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
   1770		hdev->asic_name,
   1771		hdev->asic_prop.dram_size / SZ_1G);
   1772
   1773	rc = hl_vm_init(hdev);
   1774	if (rc) {
   1775		dev_err(hdev->dev, "Failed to initialize memory module\n");
   1776		rc = 0;
   1777		goto out_disabled;
   1778	}
   1779
   1780	/*
   1781	 * Expose devices and sysfs nodes to user.
   1782	 * From here there is no need to add char devices and create sysfs nodes
   1783	 * in case of an error.
   1784	 */
   1785	add_cdev_sysfs_on_err = false;
   1786	rc = device_cdev_sysfs_add(hdev);
   1787	if (rc) {
   1788		dev_err(hdev->dev,
   1789			"Failed to add char devices and sysfs nodes\n");
   1790		rc = 0;
   1791		goto out_disabled;
   1792	}
   1793
   1794	/* Need to call this again because the max power might change,
   1795	 * depending on card type for certain ASICs
   1796	 */
   1797	if (hdev->asic_prop.set_max_power_on_device_init)
   1798		hl_fw_set_max_power(hdev);
   1799
   1800	/*
   1801	 * hl_hwmon_init() must be called after device_late_init(), because only
   1802	 * there we get the information from the device about which
   1803	 * hwmon-related sensors the device supports.
   1804	 * Furthermore, it must be done after adding the device to the system.
   1805	 */
   1806	rc = hl_hwmon_init(hdev);
   1807	if (rc) {
   1808		dev_err(hdev->dev, "Failed to initialize hwmon\n");
   1809		rc = 0;
   1810		goto out_disabled;
   1811	}
   1812
   1813	dev_notice(hdev->dev,
   1814		"Successfully added device to habanalabs driver\n");
   1815
   1816	hdev->init_done = true;
   1817
   1818	/* After initialization is done, we are ready to receive events from
   1819	 * the F/W. We can't do it before because we will ignore events and if
   1820	 * those events are fatal, we won't know about it and the device will
   1821	 * be operational although it shouldn't be
   1822	 */
   1823	hdev->asic_funcs->enable_events_from_fw(hdev);
   1824
   1825	return 0;
   1826
   1827release_ctx:
   1828	if (hl_ctx_put(hdev->kernel_ctx) != 1)
   1829		dev_err(hdev->dev,
   1830			"kernel ctx is still alive on initialization failure\n");
   1831remove_device_from_debugfs:
   1832	hl_debugfs_remove_device(hdev);
   1833mmu_fini:
   1834	hl_mmu_fini(hdev);
   1835eq_fini:
   1836	hl_eq_fini(hdev, &hdev->event_queue);
   1837cq_fini:
   1838	for (i = 0 ; i < cq_ready_cnt ; i++)
   1839		hl_cq_fini(hdev, &hdev->completion_queue[i]);
   1840	kfree(hdev->completion_queue);
   1841hw_queues_destroy:
   1842	hl_hw_queues_destroy(hdev);
   1843sw_fini:
   1844	hdev->asic_funcs->sw_fini(hdev);
   1845user_interrupts_fini:
   1846	kfree(hdev->user_interrupt);
   1847early_fini:
   1848	device_early_fini(hdev);
   1849free_dev_ctrl:
   1850	put_device(hdev->dev_ctrl);
   1851free_dev:
   1852	put_device(hdev->dev);
   1853out_disabled:
   1854	hdev->disabled = true;
   1855	if (add_cdev_sysfs_on_err)
   1856		device_cdev_sysfs_add(hdev);
   1857	if (hdev->pdev)
   1858		dev_err(&hdev->pdev->dev,
   1859			"Failed to initialize hl%d. Device is NOT usable !\n",
   1860			hdev->id / 2);
   1861	else
   1862		pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
   1863			hdev->id / 2);
   1864
   1865	return rc;
   1866}
   1867
   1868/*
   1869 * hl_device_fini - main tear-down function for habanalabs device
   1870 *
   1871 * @hdev: pointer to habanalabs device structure
   1872 *
   1873 * Destroy the device, call ASIC fini functions and release the id
   1874 */
   1875void hl_device_fini(struct hl_device *hdev)
   1876{
   1877	bool device_in_reset;
   1878	ktime_t timeout;
   1879	u64 reset_sec;
   1880	int i, rc;
   1881
   1882	dev_info(hdev->dev, "Removing device\n");
   1883
   1884	hdev->device_fini_pending = 1;
   1885	flush_delayed_work(&hdev->device_reset_work.reset_work);
   1886
   1887	if (hdev->pldm)
   1888		reset_sec = HL_PLDM_HARD_RESET_MAX_TIMEOUT;
   1889	else
   1890		reset_sec = HL_HARD_RESET_MAX_TIMEOUT;
   1891
   1892	/*
   1893	 * This function is competing with the reset function, so try to
   1894	 * take the reset atomic and if we are already in middle of reset,
   1895	 * wait until reset function is finished. Reset function is designed
   1896	 * to always finish. However, in Gaudi, because of all the network
   1897	 * ports, the hard reset could take between 10-30 seconds
   1898	 */
   1899
   1900	timeout = ktime_add_us(ktime_get(), reset_sec * 1000 * 1000);
   1901
   1902	spin_lock(&hdev->reset_info.lock);
   1903	device_in_reset = !!hdev->reset_info.in_reset;
   1904	if (!device_in_reset)
   1905		hdev->reset_info.in_reset = 1;
   1906	spin_unlock(&hdev->reset_info.lock);
   1907
   1908	while (device_in_reset) {
   1909		usleep_range(50, 200);
   1910
   1911		spin_lock(&hdev->reset_info.lock);
   1912		device_in_reset = !!hdev->reset_info.in_reset;
   1913		if (!device_in_reset)
   1914			hdev->reset_info.in_reset = 1;
   1915		spin_unlock(&hdev->reset_info.lock);
   1916
   1917		if (ktime_compare(ktime_get(), timeout) > 0) {
   1918			dev_crit(hdev->dev,
   1919				"Failed to remove device because reset function did not finish\n");
   1920			return;
   1921		}
   1922	}
   1923
   1924	/* Disable PCI access from device F/W so it won't send us additional
   1925	 * interrupts. We disable MSI/MSI-X at the halt_engines function and we
   1926	 * can't have the F/W sending us interrupts after that. We need to
   1927	 * disable the access here because if the device is marked disable, the
   1928	 * message won't be send. Also, in case of heartbeat, the device CPU is
   1929	 * marked as disable so this message won't be sent
   1930	 */
   1931	hl_fw_send_pci_access_msg(hdev,	CPUCP_PACKET_DISABLE_PCI_ACCESS);
   1932
   1933	/* Mark device as disabled */
   1934	hdev->disabled = true;
   1935
   1936	take_release_locks(hdev);
   1937
   1938	hdev->reset_info.hard_reset_pending = true;
   1939
   1940	hl_hwmon_fini(hdev);
   1941
   1942	cleanup_resources(hdev, true, false, false);
   1943
   1944	/* Kill processes here after CS rollback. This is because the process
   1945	 * can't really exit until all its CSs are done, which is what we
   1946	 * do in cs rollback
   1947	 */
   1948	dev_info(hdev->dev,
   1949		"Waiting for all processes to exit (timeout of %u seconds)",
   1950		HL_PENDING_RESET_LONG_SEC);
   1951
   1952	rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC, false);
   1953	if (rc) {
   1954		dev_crit(hdev->dev, "Failed to kill all open processes\n");
   1955		device_disable_open_processes(hdev, false);
   1956	}
   1957
   1958	rc = device_kill_open_processes(hdev, 0, true);
   1959	if (rc) {
   1960		dev_crit(hdev->dev, "Failed to kill all control device open processes\n");
   1961		device_disable_open_processes(hdev, true);
   1962	}
   1963
   1964	hl_cb_pool_fini(hdev);
   1965
   1966	/* Reset the H/W. It will be in idle state after this returns */
   1967	hdev->asic_funcs->hw_fini(hdev, true, false);
   1968
   1969	hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
   1970
   1971	/* Release kernel context */
   1972	if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
   1973		dev_err(hdev->dev, "kernel ctx is still alive\n");
   1974
   1975	hl_debugfs_remove_device(hdev);
   1976
   1977	hl_vm_fini(hdev);
   1978
   1979	hl_mmu_fini(hdev);
   1980
   1981	hl_eq_fini(hdev, &hdev->event_queue);
   1982
   1983	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
   1984		hl_cq_fini(hdev, &hdev->completion_queue[i]);
   1985	kfree(hdev->completion_queue);
   1986	kfree(hdev->user_interrupt);
   1987
   1988	hl_hw_queues_destroy(hdev);
   1989
   1990	/* Call ASIC S/W finalize function */
   1991	hdev->asic_funcs->sw_fini(hdev);
   1992
   1993	device_early_fini(hdev);
   1994
   1995	/* Hide devices and sysfs nodes from user */
   1996	device_cdev_sysfs_del(hdev);
   1997
   1998	pr_info("removed device successfully\n");
   1999}
   2000
   2001/*
   2002 * MMIO register access helper functions.
   2003 */
   2004
   2005/*
   2006 * hl_rreg - Read an MMIO register
   2007 *
   2008 * @hdev: pointer to habanalabs device structure
   2009 * @reg: MMIO register offset (in bytes)
   2010 *
   2011 * Returns the value of the MMIO register we are asked to read
   2012 *
   2013 */
   2014inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
   2015{
   2016	return readl(hdev->rmmio + reg);
   2017}
   2018
   2019/*
   2020 * hl_wreg - Write to an MMIO register
   2021 *
   2022 * @hdev: pointer to habanalabs device structure
   2023 * @reg: MMIO register offset (in bytes)
   2024 * @val: 32-bit value
   2025 *
   2026 * Writes the 32-bit value into the MMIO register
   2027 *
   2028 */
   2029inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
   2030{
   2031	writel(val, hdev->rmmio + reg);
   2032}