cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

kfd_device_queue_manager.c (67934B)


      1// SPDX-License-Identifier: GPL-2.0 OR MIT
      2/*
      3 * Copyright 2014-2022 Advanced Micro Devices, Inc.
      4 *
      5 * Permission is hereby granted, free of charge, to any person obtaining a
      6 * copy of this software and associated documentation files (the "Software"),
      7 * to deal in the Software without restriction, including without limitation
      8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9 * and/or sell copies of the Software, and to permit persons to whom the
     10 * Software is furnished to do so, subject to the following conditions:
     11 *
     12 * The above copyright notice and this permission notice shall be included in
     13 * all copies or substantial portions of the Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     21 * OTHER DEALINGS IN THE SOFTWARE.
     22 *
     23 */
     24
     25#include <linux/ratelimit.h>
     26#include <linux/printk.h>
     27#include <linux/slab.h>
     28#include <linux/list.h>
     29#include <linux/types.h>
     30#include <linux/bitops.h>
     31#include <linux/sched.h>
     32#include "kfd_priv.h"
     33#include "kfd_device_queue_manager.h"
     34#include "kfd_mqd_manager.h"
     35#include "cik_regs.h"
     36#include "kfd_kernel_queue.h"
     37#include "amdgpu_amdkfd.h"
     38#include "mes_api_def.h"
     39
     40/* Size of the per-pipe EOP queue */
     41#define CIK_HPD_EOP_BYTES_LOG2 11
     42#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
     43
     44static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
     45				  u32 pasid, unsigned int vmid);
     46
     47static int execute_queues_cpsch(struct device_queue_manager *dqm,
     48				enum kfd_unmap_queues_filter filter,
     49				uint32_t filter_param);
     50static int unmap_queues_cpsch(struct device_queue_manager *dqm,
     51				enum kfd_unmap_queues_filter filter,
     52				uint32_t filter_param, bool reset);
     53
     54static int map_queues_cpsch(struct device_queue_manager *dqm);
     55
     56static void deallocate_sdma_queue(struct device_queue_manager *dqm,
     57				struct queue *q);
     58
     59static inline void deallocate_hqd(struct device_queue_manager *dqm,
     60				struct queue *q);
     61static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
     62static int allocate_sdma_queue(struct device_queue_manager *dqm,
     63				struct queue *q, const uint32_t *restore_sdma_id);
     64static void kfd_process_hw_exception(struct work_struct *work);
     65
     66static inline
     67enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
     68{
     69	if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
     70		return KFD_MQD_TYPE_SDMA;
     71	return KFD_MQD_TYPE_CP;
     72}
     73
     74static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
     75{
     76	int i;
     77	int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec
     78		+ pipe) * dqm->dev->shared_resources.num_queue_per_pipe;
     79
     80	/* queue is available for KFD usage if bit is 1 */
     81	for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
     82		if (test_bit(pipe_offset + i,
     83			      dqm->dev->shared_resources.cp_queue_bitmap))
     84			return true;
     85	return false;
     86}
     87
     88unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
     89{
     90	return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap,
     91				KGD_MAX_QUEUES);
     92}
     93
     94unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
     95{
     96	return dqm->dev->shared_resources.num_queue_per_pipe;
     97}
     98
     99unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
    100{
    101	return dqm->dev->shared_resources.num_pipe_per_mec;
    102}
    103
    104static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
    105{
    106	return kfd_get_num_sdma_engines(dqm->dev) +
    107		kfd_get_num_xgmi_sdma_engines(dqm->dev);
    108}
    109
    110unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
    111{
    112	return kfd_get_num_sdma_engines(dqm->dev) *
    113		dqm->dev->device_info.num_sdma_queues_per_engine;
    114}
    115
    116unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
    117{
    118	return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
    119		dqm->dev->device_info.num_sdma_queues_per_engine;
    120}
    121
    122static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm)
    123{
    124	return dqm->dev->device_info.reserved_sdma_queues_bitmap;
    125}
    126
    127void program_sh_mem_settings(struct device_queue_manager *dqm,
    128					struct qcm_process_device *qpd)
    129{
    130	return dqm->dev->kfd2kgd->program_sh_mem_settings(
    131						dqm->dev->adev, qpd->vmid,
    132						qpd->sh_mem_config,
    133						qpd->sh_mem_ape1_base,
    134						qpd->sh_mem_ape1_limit,
    135						qpd->sh_mem_bases);
    136}
    137
    138static void kfd_hws_hang(struct device_queue_manager *dqm)
    139{
    140	/*
    141	 * Issue a GPU reset if HWS is unresponsive
    142	 */
    143	dqm->is_hws_hang = true;
    144
    145	/* It's possible we're detecting a HWS hang in the
    146	 * middle of a GPU reset. No need to schedule another
    147	 * reset in this case.
    148	 */
    149	if (!dqm->is_resetting)
    150		schedule_work(&dqm->hw_exception_work);
    151}
    152
    153static int convert_to_mes_queue_type(int queue_type)
    154{
    155	int mes_queue_type;
    156
    157	switch (queue_type) {
    158	case KFD_QUEUE_TYPE_COMPUTE:
    159		mes_queue_type = MES_QUEUE_TYPE_COMPUTE;
    160		break;
    161	case KFD_QUEUE_TYPE_SDMA:
    162		mes_queue_type = MES_QUEUE_TYPE_SDMA;
    163		break;
    164	default:
    165		WARN(1, "Invalid queue type %d", queue_type);
    166		mes_queue_type = -EINVAL;
    167		break;
    168	}
    169
    170	return mes_queue_type;
    171}
    172
    173static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
    174			 struct qcm_process_device *qpd)
    175{
    176	struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
    177	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
    178	struct mes_add_queue_input queue_input;
    179	int r, queue_type;
    180
    181	if (dqm->is_hws_hang)
    182		return -EIO;
    183
    184	memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
    185	queue_input.process_id = qpd->pqm->process->pasid;
    186	queue_input.page_table_base_addr =  qpd->page_table_base;
    187	queue_input.process_va_start = 0;
    188	queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
    189	/* MES unit for quantum is 100ns */
    190	queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM;  /* Equivalent to 10ms. */
    191	queue_input.process_context_addr = pdd->proc_ctx_gpu_addr;
    192	queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */
    193	queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
    194	queue_input.inprocess_gang_priority = q->properties.priority;
    195	queue_input.gang_global_priority_level =
    196					AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
    197	queue_input.doorbell_offset = q->properties.doorbell_off;
    198	queue_input.mqd_addr = q->gart_mqd_addr;
    199	queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
    200	queue_input.paging = false;
    201	queue_input.tba_addr = qpd->tba_addr;
    202	queue_input.tma_addr = qpd->tma_addr;
    203
    204	queue_type = convert_to_mes_queue_type(q->properties.type);
    205	if (queue_type < 0) {
    206		pr_err("Queue type not supported with MES, queue:%d\n",
    207				q->properties.type);
    208		return -EINVAL;
    209	}
    210	queue_input.queue_type = (uint32_t)queue_type;
    211
    212	if (q->gws) {
    213		queue_input.gws_base = 0;
    214		queue_input.gws_size = qpd->num_gws;
    215	}
    216
    217	amdgpu_mes_lock(&adev->mes);
    218	r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
    219	amdgpu_mes_unlock(&adev->mes);
    220	if (r) {
    221		pr_err("failed to add hardware queue to MES, doorbell=0x%x\n",
    222			q->properties.doorbell_off);
    223		pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
    224		kfd_hws_hang(dqm);
    225}
    226
    227	return r;
    228}
    229
    230static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
    231			struct qcm_process_device *qpd)
    232{
    233	struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
    234	int r;
    235	struct mes_remove_queue_input queue_input;
    236
    237	if (dqm->is_hws_hang)
    238		return -EIO;
    239
    240	memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
    241	queue_input.doorbell_offset = q->properties.doorbell_off;
    242	queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
    243
    244	amdgpu_mes_lock(&adev->mes);
    245	r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
    246	amdgpu_mes_unlock(&adev->mes);
    247
    248	if (r) {
    249		pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n",
    250			q->properties.doorbell_off);
    251		pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
    252		kfd_hws_hang(dqm);
    253	}
    254
    255	return r;
    256}
    257
    258static int remove_all_queues_mes(struct device_queue_manager *dqm)
    259{
    260	struct device_process_node *cur;
    261	struct qcm_process_device *qpd;
    262	struct queue *q;
    263	int retval = 0;
    264
    265	list_for_each_entry(cur, &dqm->queues, list) {
    266		qpd = cur->qpd;
    267		list_for_each_entry(q, &qpd->queues_list, list) {
    268			if (q->properties.is_active) {
    269				retval = remove_queue_mes(dqm, q, qpd);
    270				if (retval) {
    271					pr_err("%s: Failed to remove queue %d for dev %d",
    272						__func__,
    273						q->properties.queue_id,
    274						dqm->dev->id);
    275					return retval;
    276				}
    277			}
    278		}
    279	}
    280
    281	return retval;
    282}
    283
    284static void increment_queue_count(struct device_queue_manager *dqm,
    285				  struct qcm_process_device *qpd,
    286				  struct queue *q)
    287{
    288	dqm->active_queue_count++;
    289	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
    290	    q->properties.type == KFD_QUEUE_TYPE_DIQ)
    291		dqm->active_cp_queue_count++;
    292
    293	if (q->properties.is_gws) {
    294		dqm->gws_queue_count++;
    295		qpd->mapped_gws_queue = true;
    296	}
    297}
    298
    299static void decrement_queue_count(struct device_queue_manager *dqm,
    300				  struct qcm_process_device *qpd,
    301				  struct queue *q)
    302{
    303	dqm->active_queue_count--;
    304	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
    305	    q->properties.type == KFD_QUEUE_TYPE_DIQ)
    306		dqm->active_cp_queue_count--;
    307
    308	if (q->properties.is_gws) {
    309		dqm->gws_queue_count--;
    310		qpd->mapped_gws_queue = false;
    311	}
    312}
    313
    314/*
    315 * Allocate a doorbell ID to this queue.
    316 * If doorbell_id is passed in, make sure requested ID is valid then allocate it.
    317 */
    318static int allocate_doorbell(struct qcm_process_device *qpd,
    319			     struct queue *q,
    320			     uint32_t const *restore_id)
    321{
    322	struct kfd_dev *dev = qpd->dqm->dev;
    323
    324	if (!KFD_IS_SOC15(dev)) {
    325		/* On pre-SOC15 chips we need to use the queue ID to
    326		 * preserve the user mode ABI.
    327		 */
    328
    329		if (restore_id && *restore_id != q->properties.queue_id)
    330			return -EINVAL;
    331
    332		q->doorbell_id = q->properties.queue_id;
    333	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
    334			q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
    335		/* For SDMA queues on SOC15 with 8-byte doorbell, use static
    336		 * doorbell assignments based on the engine and queue id.
    337		 * The doobell index distance between RLC (2*i) and (2*i+1)
    338		 * for a SDMA engine is 512.
    339		 */
    340
    341		uint32_t *idx_offset = dev->shared_resources.sdma_doorbell_idx;
    342		uint32_t valid_id = idx_offset[q->properties.sdma_engine_id]
    343						+ (q->properties.sdma_queue_id & 1)
    344						* KFD_QUEUE_DOORBELL_MIRROR_OFFSET
    345						+ (q->properties.sdma_queue_id >> 1);
    346
    347		if (restore_id && *restore_id != valid_id)
    348			return -EINVAL;
    349		q->doorbell_id = valid_id;
    350	} else {
    351		/* For CP queues on SOC15 */
    352		if (restore_id) {
    353			/* make sure that ID is free  */
    354			if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap))
    355				return -EINVAL;
    356
    357			q->doorbell_id = *restore_id;
    358		} else {
    359			/* or reserve a free doorbell ID */
    360			unsigned int found;
    361
    362			found = find_first_zero_bit(qpd->doorbell_bitmap,
    363						KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
    364			if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
    365				pr_debug("No doorbells available");
    366				return -EBUSY;
    367			}
    368			set_bit(found, qpd->doorbell_bitmap);
    369			q->doorbell_id = found;
    370		}
    371	}
    372
    373	q->properties.doorbell_off =
    374		kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
    375					  q->doorbell_id);
    376	return 0;
    377}
    378
    379static void deallocate_doorbell(struct qcm_process_device *qpd,
    380				struct queue *q)
    381{
    382	unsigned int old;
    383	struct kfd_dev *dev = qpd->dqm->dev;
    384
    385	if (!KFD_IS_SOC15(dev) ||
    386	    q->properties.type == KFD_QUEUE_TYPE_SDMA ||
    387	    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
    388		return;
    389
    390	old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
    391	WARN_ON(!old);
    392}
    393
    394static void program_trap_handler_settings(struct device_queue_manager *dqm,
    395				struct qcm_process_device *qpd)
    396{
    397	if (dqm->dev->kfd2kgd->program_trap_handler_settings)
    398		dqm->dev->kfd2kgd->program_trap_handler_settings(
    399						dqm->dev->adev, qpd->vmid,
    400						qpd->tba_addr, qpd->tma_addr);
    401}
    402
    403static int allocate_vmid(struct device_queue_manager *dqm,
    404			struct qcm_process_device *qpd,
    405			struct queue *q)
    406{
    407	int allocated_vmid = -1, i;
    408
    409	for (i = dqm->dev->vm_info.first_vmid_kfd;
    410			i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
    411		if (!dqm->vmid_pasid[i]) {
    412			allocated_vmid = i;
    413			break;
    414		}
    415	}
    416
    417	if (allocated_vmid < 0) {
    418		pr_err("no more vmid to allocate\n");
    419		return -ENOSPC;
    420	}
    421
    422	pr_debug("vmid allocated: %d\n", allocated_vmid);
    423
    424	dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
    425
    426	set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
    427
    428	qpd->vmid = allocated_vmid;
    429	q->properties.vmid = allocated_vmid;
    430
    431	program_sh_mem_settings(dqm, qpd);
    432
    433	if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled)
    434		program_trap_handler_settings(dqm, qpd);
    435
    436	/* qpd->page_table_base is set earlier when register_process()
    437	 * is called, i.e. when the first queue is created.
    438	 */
    439	dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev,
    440			qpd->vmid,
    441			qpd->page_table_base);
    442	/* invalidate the VM context after pasid and vmid mapping is set up */
    443	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
    444
    445	if (dqm->dev->kfd2kgd->set_scratch_backing_va)
    446		dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev,
    447				qpd->sh_hidden_private_base, qpd->vmid);
    448
    449	return 0;
    450}
    451
    452static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
    453				struct qcm_process_device *qpd)
    454{
    455	const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf;
    456	int ret;
    457
    458	if (!qpd->ib_kaddr)
    459		return -ENOMEM;
    460
    461	ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
    462	if (ret)
    463		return ret;
    464
    465	return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid,
    466				qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
    467				pmf->release_mem_size / sizeof(uint32_t));
    468}
    469
    470static void deallocate_vmid(struct device_queue_manager *dqm,
    471				struct qcm_process_device *qpd,
    472				struct queue *q)
    473{
    474	/* On GFX v7, CP doesn't flush TC at dequeue */
    475	if (q->device->adev->asic_type == CHIP_HAWAII)
    476		if (flush_texture_cache_nocpsch(q->device, qpd))
    477			pr_err("Failed to flush TC\n");
    478
    479	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
    480
    481	/* Release the vmid mapping */
    482	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
    483	dqm->vmid_pasid[qpd->vmid] = 0;
    484
    485	qpd->vmid = 0;
    486	q->properties.vmid = 0;
    487}
    488
    489static int create_queue_nocpsch(struct device_queue_manager *dqm,
    490				struct queue *q,
    491				struct qcm_process_device *qpd,
    492				const struct kfd_criu_queue_priv_data *qd,
    493				const void *restore_mqd, const void *restore_ctl_stack)
    494{
    495	struct mqd_manager *mqd_mgr;
    496	int retval;
    497
    498	dqm_lock(dqm);
    499
    500	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
    501		pr_warn("Can't create new usermode queue because %d queues were already created\n",
    502				dqm->total_queue_count);
    503		retval = -EPERM;
    504		goto out_unlock;
    505	}
    506
    507	if (list_empty(&qpd->queues_list)) {
    508		retval = allocate_vmid(dqm, qpd, q);
    509		if (retval)
    510			goto out_unlock;
    511	}
    512	q->properties.vmid = qpd->vmid;
    513	/*
    514	 * Eviction state logic: mark all queues as evicted, even ones
    515	 * not currently active. Restoring inactive queues later only
    516	 * updates the is_evicted flag but is a no-op otherwise.
    517	 */
    518	q->properties.is_evicted = !!qpd->evicted;
    519
    520	q->properties.tba_addr = qpd->tba_addr;
    521	q->properties.tma_addr = qpd->tma_addr;
    522
    523	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
    524			q->properties.type)];
    525	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
    526		retval = allocate_hqd(dqm, q);
    527		if (retval)
    528			goto deallocate_vmid;
    529		pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
    530			q->pipe, q->queue);
    531	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
    532		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
    533		retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
    534		if (retval)
    535			goto deallocate_vmid;
    536		dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
    537	}
    538
    539	retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
    540	if (retval)
    541		goto out_deallocate_hqd;
    542
    543	/* Temporarily release dqm lock to avoid a circular lock dependency */
    544	dqm_unlock(dqm);
    545	q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
    546	dqm_lock(dqm);
    547
    548	if (!q->mqd_mem_obj) {
    549		retval = -ENOMEM;
    550		goto out_deallocate_doorbell;
    551	}
    552
    553	if (qd)
    554		mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
    555				     &q->properties, restore_mqd, restore_ctl_stack,
    556				     qd->ctl_stack_size);
    557	else
    558		mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
    559					&q->gart_mqd_addr, &q->properties);
    560
    561	if (q->properties.is_active) {
    562		if (!dqm->sched_running) {
    563			WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
    564			goto add_queue_to_list;
    565		}
    566
    567		if (WARN(q->process->mm != current->mm,
    568					"should only run in user thread"))
    569			retval = -EFAULT;
    570		else
    571			retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
    572					q->queue, &q->properties, current->mm);
    573		if (retval)
    574			goto out_free_mqd;
    575	}
    576
    577add_queue_to_list:
    578	list_add(&q->list, &qpd->queues_list);
    579	qpd->queue_count++;
    580	if (q->properties.is_active)
    581		increment_queue_count(dqm, qpd, q);
    582
    583	/*
    584	 * Unconditionally increment this counter, regardless of the queue's
    585	 * type or whether the queue is active.
    586	 */
    587	dqm->total_queue_count++;
    588	pr_debug("Total of %d queues are accountable so far\n",
    589			dqm->total_queue_count);
    590	goto out_unlock;
    591
    592out_free_mqd:
    593	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
    594out_deallocate_doorbell:
    595	deallocate_doorbell(qpd, q);
    596out_deallocate_hqd:
    597	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
    598		deallocate_hqd(dqm, q);
    599	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
    600		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
    601		deallocate_sdma_queue(dqm, q);
    602deallocate_vmid:
    603	if (list_empty(&qpd->queues_list))
    604		deallocate_vmid(dqm, qpd, q);
    605out_unlock:
    606	dqm_unlock(dqm);
    607	return retval;
    608}
    609
    610static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
    611{
    612	bool set;
    613	int pipe, bit, i;
    614
    615	set = false;
    616
    617	for (pipe = dqm->next_pipe_to_allocate, i = 0;
    618			i < get_pipes_per_mec(dqm);
    619			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
    620
    621		if (!is_pipe_enabled(dqm, 0, pipe))
    622			continue;
    623
    624		if (dqm->allocated_queues[pipe] != 0) {
    625			bit = ffs(dqm->allocated_queues[pipe]) - 1;
    626			dqm->allocated_queues[pipe] &= ~(1 << bit);
    627			q->pipe = pipe;
    628			q->queue = bit;
    629			set = true;
    630			break;
    631		}
    632	}
    633
    634	if (!set)
    635		return -EBUSY;
    636
    637	pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
    638	/* horizontal hqd allocation */
    639	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
    640
    641	return 0;
    642}
    643
    644static inline void deallocate_hqd(struct device_queue_manager *dqm,
    645				struct queue *q)
    646{
    647	dqm->allocated_queues[q->pipe] |= (1 << q->queue);
    648}
    649
    650#define SQ_IND_CMD_CMD_KILL		0x00000003
    651#define SQ_IND_CMD_MODE_BROADCAST	0x00000001
    652
    653static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
    654{
    655	int status = 0;
    656	unsigned int vmid;
    657	uint16_t queried_pasid;
    658	union SQ_CMD_BITS reg_sq_cmd;
    659	union GRBM_GFX_INDEX_BITS reg_gfx_index;
    660	struct kfd_process_device *pdd;
    661	int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
    662	int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
    663
    664	reg_sq_cmd.u32All = 0;
    665	reg_gfx_index.u32All = 0;
    666
    667	pr_debug("Killing all process wavefronts\n");
    668
    669	if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {
    670		pr_err("no vmid pasid mapping supported \n");
    671		return -EOPNOTSUPP;
    672	}
    673
    674	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
    675	 * ATC_VMID15_PASID_MAPPING
    676	 * to check which VMID the current process is mapped to.
    677	 */
    678
    679	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
    680		status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
    681				(dev->adev, vmid, &queried_pasid);
    682
    683		if (status && queried_pasid == p->pasid) {
    684			pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
    685					vmid, p->pasid);
    686			break;
    687		}
    688	}
    689
    690	if (vmid > last_vmid_to_scan) {
    691		pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
    692		return -EFAULT;
    693	}
    694
    695	/* taking the VMID for that process on the safe way using PDD */
    696	pdd = kfd_get_process_device_data(dev, p);
    697	if (!pdd)
    698		return -EFAULT;
    699
    700	reg_gfx_index.bits.sh_broadcast_writes = 1;
    701	reg_gfx_index.bits.se_broadcast_writes = 1;
    702	reg_gfx_index.bits.instance_broadcast_writes = 1;
    703	reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
    704	reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
    705	reg_sq_cmd.bits.vm_id = vmid;
    706
    707	dev->kfd2kgd->wave_control_execute(dev->adev,
    708					reg_gfx_index.u32All,
    709					reg_sq_cmd.u32All);
    710
    711	return 0;
    712}
    713
    714/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
    715 * to avoid asynchronized access
    716 */
    717static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
    718				struct qcm_process_device *qpd,
    719				struct queue *q)
    720{
    721	int retval;
    722	struct mqd_manager *mqd_mgr;
    723
    724	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
    725			q->properties.type)];
    726
    727	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
    728		deallocate_hqd(dqm, q);
    729	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
    730		deallocate_sdma_queue(dqm, q);
    731	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
    732		deallocate_sdma_queue(dqm, q);
    733	else {
    734		pr_debug("q->properties.type %d is invalid\n",
    735				q->properties.type);
    736		return -EINVAL;
    737	}
    738	dqm->total_queue_count--;
    739
    740	deallocate_doorbell(qpd, q);
    741
    742	if (!dqm->sched_running) {
    743		WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
    744		return 0;
    745	}
    746
    747	retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
    748				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
    749				KFD_UNMAP_LATENCY_MS,
    750				q->pipe, q->queue);
    751	if (retval == -ETIME)
    752		qpd->reset_wavefronts = true;
    753
    754	list_del(&q->list);
    755	if (list_empty(&qpd->queues_list)) {
    756		if (qpd->reset_wavefronts) {
    757			pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
    758					dqm->dev);
    759			/* dbgdev_wave_reset_wavefronts has to be called before
    760			 * deallocate_vmid(), i.e. when vmid is still in use.
    761			 */
    762			dbgdev_wave_reset_wavefronts(dqm->dev,
    763					qpd->pqm->process);
    764			qpd->reset_wavefronts = false;
    765		}
    766
    767		deallocate_vmid(dqm, qpd, q);
    768	}
    769	qpd->queue_count--;
    770	if (q->properties.is_active)
    771		decrement_queue_count(dqm, qpd, q);
    772
    773	return retval;
    774}
    775
    776static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
    777				struct qcm_process_device *qpd,
    778				struct queue *q)
    779{
    780	int retval;
    781	uint64_t sdma_val = 0;
    782	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
    783	struct mqd_manager *mqd_mgr =
    784		dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
    785
    786	/* Get the SDMA queue stats */
    787	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
    788	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
    789		retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
    790							&sdma_val);
    791		if (retval)
    792			pr_err("Failed to read SDMA queue counter for queue: %d\n",
    793				q->properties.queue_id);
    794	}
    795
    796	dqm_lock(dqm);
    797	retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
    798	if (!retval)
    799		pdd->sdma_past_activity_counter += sdma_val;
    800	dqm_unlock(dqm);
    801
    802	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
    803
    804	return retval;
    805}
    806
    807static int update_queue(struct device_queue_manager *dqm, struct queue *q,
    808			struct mqd_update_info *minfo)
    809{
    810	int retval = 0;
    811	struct mqd_manager *mqd_mgr;
    812	struct kfd_process_device *pdd;
    813	bool prev_active = false;
    814	bool add_queue = false;
    815
    816	dqm_lock(dqm);
    817	pdd = kfd_get_process_device_data(q->device, q->process);
    818	if (!pdd) {
    819		retval = -ENODEV;
    820		goto out_unlock;
    821	}
    822	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
    823			q->properties.type)];
    824
    825	/* Save previous activity state for counters */
    826	prev_active = q->properties.is_active;
    827
    828	/* Make sure the queue is unmapped before updating the MQD */
    829	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
    830		if (!dqm->dev->shared_resources.enable_mes)
    831			retval = unmap_queues_cpsch(dqm,
    832						    KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false);
    833		else if (prev_active)
    834			retval = remove_queue_mes(dqm, q, &pdd->qpd);
    835
    836		if (retval) {
    837			pr_err("unmap queue failed\n");
    838			goto out_unlock;
    839		}
    840	} else if (prev_active &&
    841		   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
    842		    q->properties.type == KFD_QUEUE_TYPE_SDMA ||
    843		    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
    844
    845		if (!dqm->sched_running) {
    846			WARN_ONCE(1, "Update non-HWS queue while stopped\n");
    847			goto out_unlock;
    848		}
    849
    850		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
    851				(dqm->dev->cwsr_enabled ?
    852				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
    853				 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
    854				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
    855		if (retval) {
    856			pr_err("destroy mqd failed\n");
    857			goto out_unlock;
    858		}
    859	}
    860
    861	mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo);
    862
    863	/*
    864	 * check active state vs. the previous state and modify
    865	 * counter accordingly. map_queues_cpsch uses the
    866	 * dqm->active_queue_count to determine whether a new runlist must be
    867	 * uploaded.
    868	 */
    869	if (q->properties.is_active && !prev_active) {
    870		increment_queue_count(dqm, &pdd->qpd, q);
    871	} else if (!q->properties.is_active && prev_active) {
    872		decrement_queue_count(dqm, &pdd->qpd, q);
    873	} else if (q->gws && !q->properties.is_gws) {
    874		if (q->properties.is_active) {
    875			dqm->gws_queue_count++;
    876			pdd->qpd.mapped_gws_queue = true;
    877		}
    878		q->properties.is_gws = true;
    879	} else if (!q->gws && q->properties.is_gws) {
    880		if (q->properties.is_active) {
    881			dqm->gws_queue_count--;
    882			pdd->qpd.mapped_gws_queue = false;
    883		}
    884		q->properties.is_gws = false;
    885	}
    886
    887	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
    888		if (!dqm->dev->shared_resources.enable_mes)
    889			retval = map_queues_cpsch(dqm);
    890		else if (add_queue)
    891			retval = add_queue_mes(dqm, q, &pdd->qpd);
    892	} else if (q->properties.is_active &&
    893		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
    894		  q->properties.type == KFD_QUEUE_TYPE_SDMA ||
    895		  q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
    896		if (WARN(q->process->mm != current->mm,
    897			 "should only run in user thread"))
    898			retval = -EFAULT;
    899		else
    900			retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
    901						   q->pipe, q->queue,
    902						   &q->properties, current->mm);
    903	}
    904
    905out_unlock:
    906	dqm_unlock(dqm);
    907	return retval;
    908}
    909
    910static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
    911					struct qcm_process_device *qpd)
    912{
    913	struct queue *q;
    914	struct mqd_manager *mqd_mgr;
    915	struct kfd_process_device *pdd;
    916	int retval, ret = 0;
    917
    918	dqm_lock(dqm);
    919	if (qpd->evicted++ > 0) /* already evicted, do nothing */
    920		goto out;
    921
    922	pdd = qpd_to_pdd(qpd);
    923	pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
    924			    pdd->process->pasid);
    925
    926	pdd->last_evict_timestamp = get_jiffies_64();
    927	/* Mark all queues as evicted. Deactivate all active queues on
    928	 * the qpd.
    929	 */
    930	list_for_each_entry(q, &qpd->queues_list, list) {
    931		q->properties.is_evicted = true;
    932		if (!q->properties.is_active)
    933			continue;
    934
    935		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
    936				q->properties.type)];
    937		q->properties.is_active = false;
    938		decrement_queue_count(dqm, qpd, q);
    939
    940		if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
    941			continue;
    942
    943		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
    944				(dqm->dev->cwsr_enabled ?
    945				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
    946				 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
    947				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
    948		if (retval && !ret)
    949			/* Return the first error, but keep going to
    950			 * maintain a consistent eviction state
    951			 */
    952			ret = retval;
    953	}
    954
    955out:
    956	dqm_unlock(dqm);
    957	return ret;
    958}
    959
    960static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
    961				      struct qcm_process_device *qpd)
    962{
    963	struct queue *q;
    964	struct kfd_process_device *pdd;
    965	int retval = 0;
    966
    967	dqm_lock(dqm);
    968	if (qpd->evicted++ > 0) /* already evicted, do nothing */
    969		goto out;
    970
    971	pdd = qpd_to_pdd(qpd);
    972	pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
    973			    pdd->process->pasid);
    974
    975	/* Mark all queues as evicted. Deactivate all active queues on
    976	 * the qpd.
    977	 */
    978	list_for_each_entry(q, &qpd->queues_list, list) {
    979		q->properties.is_evicted = true;
    980		if (!q->properties.is_active)
    981			continue;
    982
    983		q->properties.is_active = false;
    984		decrement_queue_count(dqm, qpd, q);
    985
    986		if (dqm->dev->shared_resources.enable_mes) {
    987			retval = remove_queue_mes(dqm, q, qpd);
    988			if (retval) {
    989				pr_err("Failed to evict queue %d\n",
    990					q->properties.queue_id);
    991				goto out;
    992			}
    993		}
    994	}
    995	pdd->last_evict_timestamp = get_jiffies_64();
    996	if (!dqm->dev->shared_resources.enable_mes)
    997		retval = execute_queues_cpsch(dqm,
    998					      qpd->is_debug ?
    999					      KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
   1000					      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
   1001
   1002out:
   1003	dqm_unlock(dqm);
   1004	return retval;
   1005}
   1006
   1007static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
   1008					  struct qcm_process_device *qpd)
   1009{
   1010	struct mm_struct *mm = NULL;
   1011	struct queue *q;
   1012	struct mqd_manager *mqd_mgr;
   1013	struct kfd_process_device *pdd;
   1014	uint64_t pd_base;
   1015	uint64_t eviction_duration;
   1016	int retval, ret = 0;
   1017
   1018	pdd = qpd_to_pdd(qpd);
   1019	/* Retrieve PD base */
   1020	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
   1021
   1022	dqm_lock(dqm);
   1023	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
   1024		goto out;
   1025	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
   1026		qpd->evicted--;
   1027		goto out;
   1028	}
   1029
   1030	pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
   1031			    pdd->process->pasid);
   1032
   1033	/* Update PD Base in QPD */
   1034	qpd->page_table_base = pd_base;
   1035	pr_debug("Updated PD address to 0x%llx\n", pd_base);
   1036
   1037	if (!list_empty(&qpd->queues_list)) {
   1038		dqm->dev->kfd2kgd->set_vm_context_page_table_base(
   1039				dqm->dev->adev,
   1040				qpd->vmid,
   1041				qpd->page_table_base);
   1042		kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
   1043	}
   1044
   1045	/* Take a safe reference to the mm_struct, which may otherwise
   1046	 * disappear even while the kfd_process is still referenced.
   1047	 */
   1048	mm = get_task_mm(pdd->process->lead_thread);
   1049	if (!mm) {
   1050		ret = -EFAULT;
   1051		goto out;
   1052	}
   1053
   1054	/* Remove the eviction flags. Activate queues that are not
   1055	 * inactive for other reasons.
   1056	 */
   1057	list_for_each_entry(q, &qpd->queues_list, list) {
   1058		q->properties.is_evicted = false;
   1059		if (!QUEUE_IS_ACTIVE(q->properties))
   1060			continue;
   1061
   1062		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
   1063				q->properties.type)];
   1064		q->properties.is_active = true;
   1065		increment_queue_count(dqm, qpd, q);
   1066
   1067		if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
   1068			continue;
   1069
   1070		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
   1071				       q->queue, &q->properties, mm);
   1072		if (retval && !ret)
   1073			/* Return the first error, but keep going to
   1074			 * maintain a consistent eviction state
   1075			 */
   1076			ret = retval;
   1077	}
   1078	qpd->evicted = 0;
   1079	eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
   1080	atomic64_add(eviction_duration, &pdd->evict_duration_counter);
   1081out:
   1082	if (mm)
   1083		mmput(mm);
   1084	dqm_unlock(dqm);
   1085	return ret;
   1086}
   1087
   1088static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
   1089					struct qcm_process_device *qpd)
   1090{
   1091	struct queue *q;
   1092	struct kfd_process_device *pdd;
   1093	uint64_t pd_base;
   1094	uint64_t eviction_duration;
   1095	int retval = 0;
   1096
   1097	pdd = qpd_to_pdd(qpd);
   1098	/* Retrieve PD base */
   1099	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
   1100
   1101	dqm_lock(dqm);
   1102	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
   1103		goto out;
   1104	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
   1105		qpd->evicted--;
   1106		goto out;
   1107	}
   1108
   1109	pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
   1110			    pdd->process->pasid);
   1111
   1112	/* Update PD Base in QPD */
   1113	qpd->page_table_base = pd_base;
   1114	pr_debug("Updated PD address to 0x%llx\n", pd_base);
   1115
   1116	/* activate all active queues on the qpd */
   1117	list_for_each_entry(q, &qpd->queues_list, list) {
   1118		q->properties.is_evicted = false;
   1119		if (!QUEUE_IS_ACTIVE(q->properties))
   1120			continue;
   1121
   1122		q->properties.is_active = true;
   1123		increment_queue_count(dqm, &pdd->qpd, q);
   1124
   1125		if (dqm->dev->shared_resources.enable_mes) {
   1126			retval = add_queue_mes(dqm, q, qpd);
   1127			if (retval) {
   1128				pr_err("Failed to restore queue %d\n",
   1129					q->properties.queue_id);
   1130				goto out;
   1131			}
   1132		}
   1133	}
   1134	if (!dqm->dev->shared_resources.enable_mes)
   1135		retval = execute_queues_cpsch(dqm,
   1136					      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
   1137	qpd->evicted = 0;
   1138	eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
   1139	atomic64_add(eviction_duration, &pdd->evict_duration_counter);
   1140out:
   1141	dqm_unlock(dqm);
   1142	return retval;
   1143}
   1144
   1145static int register_process(struct device_queue_manager *dqm,
   1146					struct qcm_process_device *qpd)
   1147{
   1148	struct device_process_node *n;
   1149	struct kfd_process_device *pdd;
   1150	uint64_t pd_base;
   1151	int retval;
   1152
   1153	n = kzalloc(sizeof(*n), GFP_KERNEL);
   1154	if (!n)
   1155		return -ENOMEM;
   1156
   1157	n->qpd = qpd;
   1158
   1159	pdd = qpd_to_pdd(qpd);
   1160	/* Retrieve PD base */
   1161	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
   1162
   1163	dqm_lock(dqm);
   1164	list_add(&n->list, &dqm->queues);
   1165
   1166	/* Update PD Base in QPD */
   1167	qpd->page_table_base = pd_base;
   1168	pr_debug("Updated PD address to 0x%llx\n", pd_base);
   1169
   1170	retval = dqm->asic_ops.update_qpd(dqm, qpd);
   1171
   1172	dqm->processes_count++;
   1173
   1174	dqm_unlock(dqm);
   1175
   1176	/* Outside the DQM lock because under the DQM lock we can't do
   1177	 * reclaim or take other locks that others hold while reclaiming.
   1178	 */
   1179	kfd_inc_compute_active(dqm->dev);
   1180
   1181	return retval;
   1182}
   1183
   1184static int unregister_process(struct device_queue_manager *dqm,
   1185					struct qcm_process_device *qpd)
   1186{
   1187	int retval;
   1188	struct device_process_node *cur, *next;
   1189
   1190	pr_debug("qpd->queues_list is %s\n",
   1191			list_empty(&qpd->queues_list) ? "empty" : "not empty");
   1192
   1193	retval = 0;
   1194	dqm_lock(dqm);
   1195
   1196	list_for_each_entry_safe(cur, next, &dqm->queues, list) {
   1197		if (qpd == cur->qpd) {
   1198			list_del(&cur->list);
   1199			kfree(cur);
   1200			dqm->processes_count--;
   1201			goto out;
   1202		}
   1203	}
   1204	/* qpd not found in dqm list */
   1205	retval = 1;
   1206out:
   1207	dqm_unlock(dqm);
   1208
   1209	/* Outside the DQM lock because under the DQM lock we can't do
   1210	 * reclaim or take other locks that others hold while reclaiming.
   1211	 */
   1212	if (!retval)
   1213		kfd_dec_compute_active(dqm->dev);
   1214
   1215	return retval;
   1216}
   1217
   1218static int
   1219set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
   1220			unsigned int vmid)
   1221{
   1222	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
   1223						dqm->dev->adev, pasid, vmid);
   1224}
   1225
   1226static void init_interrupts(struct device_queue_manager *dqm)
   1227{
   1228	unsigned int i;
   1229
   1230	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
   1231		if (is_pipe_enabled(dqm, 0, i))
   1232			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i);
   1233}
   1234
   1235static int initialize_nocpsch(struct device_queue_manager *dqm)
   1236{
   1237	int pipe, queue;
   1238
   1239	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
   1240
   1241	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
   1242					sizeof(unsigned int), GFP_KERNEL);
   1243	if (!dqm->allocated_queues)
   1244		return -ENOMEM;
   1245
   1246	mutex_init(&dqm->lock_hidden);
   1247	INIT_LIST_HEAD(&dqm->queues);
   1248	dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
   1249	dqm->active_cp_queue_count = 0;
   1250	dqm->gws_queue_count = 0;
   1251
   1252	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
   1253		int pipe_offset = pipe * get_queues_per_pipe(dqm);
   1254
   1255		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
   1256			if (test_bit(pipe_offset + queue,
   1257				     dqm->dev->shared_resources.cp_queue_bitmap))
   1258				dqm->allocated_queues[pipe] |= 1 << queue;
   1259	}
   1260
   1261	memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
   1262
   1263	dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
   1264	dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm));
   1265	pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
   1266
   1267	dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
   1268
   1269	return 0;
   1270}
   1271
   1272static void uninitialize(struct device_queue_manager *dqm)
   1273{
   1274	int i;
   1275
   1276	WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
   1277
   1278	kfree(dqm->allocated_queues);
   1279	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
   1280		kfree(dqm->mqd_mgrs[i]);
   1281	mutex_destroy(&dqm->lock_hidden);
   1282}
   1283
   1284static int start_nocpsch(struct device_queue_manager *dqm)
   1285{
   1286	int r = 0;
   1287
   1288	pr_info("SW scheduler is used");
   1289	init_interrupts(dqm);
   1290
   1291	if (dqm->dev->adev->asic_type == CHIP_HAWAII)
   1292		r = pm_init(&dqm->packet_mgr, dqm);
   1293	if (!r)
   1294		dqm->sched_running = true;
   1295
   1296	return r;
   1297}
   1298
   1299static int stop_nocpsch(struct device_queue_manager *dqm)
   1300{
   1301	if (dqm->dev->adev->asic_type == CHIP_HAWAII)
   1302		pm_uninit(&dqm->packet_mgr, false);
   1303	dqm->sched_running = false;
   1304
   1305	return 0;
   1306}
   1307
   1308static void pre_reset(struct device_queue_manager *dqm)
   1309{
   1310	dqm_lock(dqm);
   1311	dqm->is_resetting = true;
   1312	dqm_unlock(dqm);
   1313}
   1314
   1315static int allocate_sdma_queue(struct device_queue_manager *dqm,
   1316				struct queue *q, const uint32_t *restore_sdma_id)
   1317{
   1318	int bit;
   1319
   1320	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
   1321		if (dqm->sdma_bitmap == 0) {
   1322			pr_err("No more SDMA queue to allocate\n");
   1323			return -ENOMEM;
   1324		}
   1325
   1326		if (restore_sdma_id) {
   1327			/* Re-use existing sdma_id */
   1328			if (!(dqm->sdma_bitmap & (1ULL << *restore_sdma_id))) {
   1329				pr_err("SDMA queue already in use\n");
   1330				return -EBUSY;
   1331			}
   1332			dqm->sdma_bitmap &= ~(1ULL << *restore_sdma_id);
   1333			q->sdma_id = *restore_sdma_id;
   1334		} else {
   1335			/* Find first available sdma_id */
   1336			bit = __ffs64(dqm->sdma_bitmap);
   1337			dqm->sdma_bitmap &= ~(1ULL << bit);
   1338			q->sdma_id = bit;
   1339		}
   1340
   1341		q->properties.sdma_engine_id = q->sdma_id %
   1342				kfd_get_num_sdma_engines(dqm->dev);
   1343		q->properties.sdma_queue_id = q->sdma_id /
   1344				kfd_get_num_sdma_engines(dqm->dev);
   1345	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
   1346		if (dqm->xgmi_sdma_bitmap == 0) {
   1347			pr_err("No more XGMI SDMA queue to allocate\n");
   1348			return -ENOMEM;
   1349		}
   1350		if (restore_sdma_id) {
   1351			/* Re-use existing sdma_id */
   1352			if (!(dqm->xgmi_sdma_bitmap & (1ULL << *restore_sdma_id))) {
   1353				pr_err("SDMA queue already in use\n");
   1354				return -EBUSY;
   1355			}
   1356			dqm->xgmi_sdma_bitmap &= ~(1ULL << *restore_sdma_id);
   1357			q->sdma_id = *restore_sdma_id;
   1358		} else {
   1359			bit = __ffs64(dqm->xgmi_sdma_bitmap);
   1360			dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
   1361			q->sdma_id = bit;
   1362		}
   1363		/* sdma_engine_id is sdma id including
   1364		 * both PCIe-optimized SDMAs and XGMI-
   1365		 * optimized SDMAs. The calculation below
   1366		 * assumes the first N engines are always
   1367		 * PCIe-optimized ones
   1368		 */
   1369		q->properties.sdma_engine_id =
   1370			kfd_get_num_sdma_engines(dqm->dev) +
   1371			q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
   1372		q->properties.sdma_queue_id = q->sdma_id /
   1373			kfd_get_num_xgmi_sdma_engines(dqm->dev);
   1374	}
   1375
   1376	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
   1377	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
   1378
   1379	return 0;
   1380}
   1381
   1382static void deallocate_sdma_queue(struct device_queue_manager *dqm,
   1383				struct queue *q)
   1384{
   1385	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
   1386		if (q->sdma_id >= get_num_sdma_queues(dqm))
   1387			return;
   1388		dqm->sdma_bitmap |= (1ULL << q->sdma_id);
   1389	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
   1390		if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
   1391			return;
   1392		dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
   1393	}
   1394}
   1395
   1396/*
   1397 * Device Queue Manager implementation for cp scheduler
   1398 */
   1399
   1400static int set_sched_resources(struct device_queue_manager *dqm)
   1401{
   1402	int i, mec;
   1403	struct scheduling_resources res;
   1404
   1405	res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
   1406
   1407	res.queue_mask = 0;
   1408	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
   1409		mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
   1410			/ dqm->dev->shared_resources.num_pipe_per_mec;
   1411
   1412		if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap))
   1413			continue;
   1414
   1415		/* only acquire queues from the first MEC */
   1416		if (mec > 0)
   1417			continue;
   1418
   1419		/* This situation may be hit in the future if a new HW
   1420		 * generation exposes more than 64 queues. If so, the
   1421		 * definition of res.queue_mask needs updating
   1422		 */
   1423		if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
   1424			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
   1425			break;
   1426		}
   1427
   1428		res.queue_mask |= 1ull
   1429			<< amdgpu_queue_mask_bit_to_set_resource_bit(
   1430				dqm->dev->adev, i);
   1431	}
   1432	res.gws_mask = ~0ull;
   1433	res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
   1434
   1435	pr_debug("Scheduling resources:\n"
   1436			"vmid mask: 0x%8X\n"
   1437			"queue mask: 0x%8llX\n",
   1438			res.vmid_mask, res.queue_mask);
   1439
   1440	return pm_send_set_resources(&dqm->packet_mgr, &res);
   1441}
   1442
   1443static int initialize_cpsch(struct device_queue_manager *dqm)
   1444{
   1445	uint64_t num_sdma_queues;
   1446	uint64_t num_xgmi_sdma_queues;
   1447
   1448	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
   1449
   1450	mutex_init(&dqm->lock_hidden);
   1451	INIT_LIST_HEAD(&dqm->queues);
   1452	dqm->active_queue_count = dqm->processes_count = 0;
   1453	dqm->active_cp_queue_count = 0;
   1454	dqm->gws_queue_count = 0;
   1455	dqm->active_runlist = false;
   1456
   1457	num_sdma_queues = get_num_sdma_queues(dqm);
   1458	if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap))
   1459		dqm->sdma_bitmap = ULLONG_MAX;
   1460	else
   1461		dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1);
   1462
   1463	dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm));
   1464	pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
   1465
   1466	num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm);
   1467	if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap))
   1468		dqm->xgmi_sdma_bitmap = ULLONG_MAX;
   1469	else
   1470		dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1);
   1471
   1472	INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
   1473
   1474	return 0;
   1475}
   1476
   1477static int start_cpsch(struct device_queue_manager *dqm)
   1478{
   1479	int retval;
   1480
   1481	retval = 0;
   1482
   1483	dqm_lock(dqm);
   1484
   1485	if (!dqm->dev->shared_resources.enable_mes) {
   1486		retval = pm_init(&dqm->packet_mgr, dqm);
   1487		if (retval)
   1488			goto fail_packet_manager_init;
   1489
   1490		retval = set_sched_resources(dqm);
   1491		if (retval)
   1492			goto fail_set_sched_resources;
   1493	}
   1494	pr_debug("Allocating fence memory\n");
   1495
   1496	/* allocate fence memory on the gart */
   1497	retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
   1498					&dqm->fence_mem);
   1499
   1500	if (retval)
   1501		goto fail_allocate_vidmem;
   1502
   1503	dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr;
   1504	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
   1505
   1506	init_interrupts(dqm);
   1507
   1508	/* clear hang status when driver try to start the hw scheduler */
   1509	dqm->is_hws_hang = false;
   1510	dqm->is_resetting = false;
   1511	dqm->sched_running = true;
   1512	if (!dqm->dev->shared_resources.enable_mes)
   1513		execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
   1514	dqm_unlock(dqm);
   1515
   1516	return 0;
   1517fail_allocate_vidmem:
   1518fail_set_sched_resources:
   1519	if (!dqm->dev->shared_resources.enable_mes)
   1520		pm_uninit(&dqm->packet_mgr, false);
   1521fail_packet_manager_init:
   1522	dqm_unlock(dqm);
   1523	return retval;
   1524}
   1525
   1526static int stop_cpsch(struct device_queue_manager *dqm)
   1527{
   1528	bool hanging;
   1529
   1530	dqm_lock(dqm);
   1531	if (!dqm->sched_running) {
   1532		dqm_unlock(dqm);
   1533		return 0;
   1534	}
   1535
   1536	if (!dqm->is_hws_hang) {
   1537		if (!dqm->dev->shared_resources.enable_mes)
   1538			unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false);
   1539		else
   1540			remove_all_queues_mes(dqm);
   1541	}
   1542
   1543	hanging = dqm->is_hws_hang || dqm->is_resetting;
   1544	dqm->sched_running = false;
   1545
   1546	if (!dqm->dev->shared_resources.enable_mes)
   1547		pm_release_ib(&dqm->packet_mgr);
   1548
   1549	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
   1550	if (!dqm->dev->shared_resources.enable_mes)
   1551		pm_uninit(&dqm->packet_mgr, hanging);
   1552	dqm_unlock(dqm);
   1553
   1554	return 0;
   1555}
   1556
   1557static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
   1558					struct kernel_queue *kq,
   1559					struct qcm_process_device *qpd)
   1560{
   1561	dqm_lock(dqm);
   1562	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
   1563		pr_warn("Can't create new kernel queue because %d queues were already created\n",
   1564				dqm->total_queue_count);
   1565		dqm_unlock(dqm);
   1566		return -EPERM;
   1567	}
   1568
   1569	/*
   1570	 * Unconditionally increment this counter, regardless of the queue's
   1571	 * type or whether the queue is active.
   1572	 */
   1573	dqm->total_queue_count++;
   1574	pr_debug("Total of %d queues are accountable so far\n",
   1575			dqm->total_queue_count);
   1576
   1577	list_add(&kq->list, &qpd->priv_queue_list);
   1578	increment_queue_count(dqm, qpd, kq->queue);
   1579	qpd->is_debug = true;
   1580	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
   1581	dqm_unlock(dqm);
   1582
   1583	return 0;
   1584}
   1585
   1586static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
   1587					struct kernel_queue *kq,
   1588					struct qcm_process_device *qpd)
   1589{
   1590	dqm_lock(dqm);
   1591	list_del(&kq->list);
   1592	decrement_queue_count(dqm, qpd, kq->queue);
   1593	qpd->is_debug = false;
   1594	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
   1595	/*
   1596	 * Unconditionally decrement this counter, regardless of the queue's
   1597	 * type.
   1598	 */
   1599	dqm->total_queue_count--;
   1600	pr_debug("Total of %d queues are accountable so far\n",
   1601			dqm->total_queue_count);
   1602	dqm_unlock(dqm);
   1603}
   1604
   1605static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
   1606			struct qcm_process_device *qpd,
   1607			const struct kfd_criu_queue_priv_data *qd,
   1608			const void *restore_mqd, const void *restore_ctl_stack)
   1609{
   1610	int retval;
   1611	struct mqd_manager *mqd_mgr;
   1612
   1613	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
   1614		pr_warn("Can't create new usermode queue because %d queues were already created\n",
   1615				dqm->total_queue_count);
   1616		retval = -EPERM;
   1617		goto out;
   1618	}
   1619
   1620	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
   1621		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
   1622		dqm_lock(dqm);
   1623		retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
   1624		dqm_unlock(dqm);
   1625		if (retval)
   1626			goto out;
   1627	}
   1628
   1629	retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
   1630	if (retval)
   1631		goto out_deallocate_sdma_queue;
   1632
   1633	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
   1634			q->properties.type)];
   1635
   1636	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
   1637		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
   1638		dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
   1639	q->properties.tba_addr = qpd->tba_addr;
   1640	q->properties.tma_addr = qpd->tma_addr;
   1641	q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
   1642	if (!q->mqd_mem_obj) {
   1643		retval = -ENOMEM;
   1644		goto out_deallocate_doorbell;
   1645	}
   1646
   1647	dqm_lock(dqm);
   1648	/*
   1649	 * Eviction state logic: mark all queues as evicted, even ones
   1650	 * not currently active. Restoring inactive queues later only
   1651	 * updates the is_evicted flag but is a no-op otherwise.
   1652	 */
   1653	q->properties.is_evicted = !!qpd->evicted;
   1654
   1655	if (qd)
   1656		mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
   1657				     &q->properties, restore_mqd, restore_ctl_stack,
   1658				     qd->ctl_stack_size);
   1659	else
   1660		mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
   1661					&q->gart_mqd_addr, &q->properties);
   1662
   1663	list_add(&q->list, &qpd->queues_list);
   1664	qpd->queue_count++;
   1665
   1666	if (q->properties.is_active) {
   1667		increment_queue_count(dqm, qpd, q);
   1668
   1669		if (!dqm->dev->shared_resources.enable_mes) {
   1670			retval = execute_queues_cpsch(dqm,
   1671					     KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
   1672		} else {
   1673			retval = add_queue_mes(dqm, q, qpd);
   1674			if (retval)
   1675				goto cleanup_queue;
   1676		}
   1677	}
   1678
   1679	/*
   1680	 * Unconditionally increment this counter, regardless of the queue's
   1681	 * type or whether the queue is active.
   1682	 */
   1683	dqm->total_queue_count++;
   1684
   1685	pr_debug("Total of %d queues are accountable so far\n",
   1686			dqm->total_queue_count);
   1687
   1688	dqm_unlock(dqm);
   1689	return retval;
   1690
   1691cleanup_queue:
   1692	qpd->queue_count--;
   1693	list_del(&q->list);
   1694	if (q->properties.is_active)
   1695		decrement_queue_count(dqm, qpd, q);
   1696	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
   1697	dqm_unlock(dqm);
   1698out_deallocate_doorbell:
   1699	deallocate_doorbell(qpd, q);
   1700out_deallocate_sdma_queue:
   1701	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
   1702		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
   1703		dqm_lock(dqm);
   1704		deallocate_sdma_queue(dqm, q);
   1705		dqm_unlock(dqm);
   1706	}
   1707out:
   1708	return retval;
   1709}
   1710
   1711int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
   1712				uint64_t fence_value,
   1713				unsigned int timeout_ms)
   1714{
   1715	unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
   1716
   1717	while (*fence_addr != fence_value) {
   1718		if (time_after(jiffies, end_jiffies)) {
   1719			pr_err("qcm fence wait loop timeout expired\n");
   1720			/* In HWS case, this is used to halt the driver thread
   1721			 * in order not to mess up CP states before doing
   1722			 * scandumps for FW debugging.
   1723			 */
   1724			while (halt_if_hws_hang)
   1725				schedule();
   1726
   1727			return -ETIME;
   1728		}
   1729		schedule();
   1730	}
   1731
   1732	return 0;
   1733}
   1734
   1735/* dqm->lock mutex has to be locked before calling this function */
   1736static int map_queues_cpsch(struct device_queue_manager *dqm)
   1737{
   1738	int retval;
   1739
   1740	if (!dqm->sched_running)
   1741		return 0;
   1742	if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
   1743		return 0;
   1744	if (dqm->active_runlist)
   1745		return 0;
   1746
   1747	retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues);
   1748	pr_debug("%s sent runlist\n", __func__);
   1749	if (retval) {
   1750		pr_err("failed to execute runlist\n");
   1751		return retval;
   1752	}
   1753	dqm->active_runlist = true;
   1754
   1755	return retval;
   1756}
   1757
   1758/* dqm->lock mutex has to be locked before calling this function */
   1759static int unmap_queues_cpsch(struct device_queue_manager *dqm,
   1760				enum kfd_unmap_queues_filter filter,
   1761				uint32_t filter_param, bool reset)
   1762{
   1763	int retval = 0;
   1764	struct mqd_manager *mqd_mgr;
   1765
   1766	if (!dqm->sched_running)
   1767		return 0;
   1768	if (dqm->is_hws_hang || dqm->is_resetting)
   1769		return -EIO;
   1770	if (!dqm->active_runlist)
   1771		return retval;
   1772
   1773	retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
   1774	if (retval)
   1775		return retval;
   1776
   1777	*dqm->fence_addr = KFD_FENCE_INIT;
   1778	pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
   1779				KFD_FENCE_COMPLETED);
   1780	/* should be timed out */
   1781	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
   1782				queue_preemption_timeout_ms);
   1783	if (retval) {
   1784		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
   1785		kfd_hws_hang(dqm);
   1786		return retval;
   1787	}
   1788
   1789	/* In the current MEC firmware implementation, if compute queue
   1790	 * doesn't response to the preemption request in time, HIQ will
   1791	 * abandon the unmap request without returning any timeout error
   1792	 * to driver. Instead, MEC firmware will log the doorbell of the
   1793	 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields.
   1794	 * To make sure the queue unmap was successful, driver need to
   1795	 * check those fields
   1796	 */
   1797	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
   1798	if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
   1799		pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
   1800		while (halt_if_hws_hang)
   1801			schedule();
   1802		return -ETIME;
   1803	}
   1804
   1805	pm_release_ib(&dqm->packet_mgr);
   1806	dqm->active_runlist = false;
   1807
   1808	return retval;
   1809}
   1810
   1811/* only for compute queue */
   1812static int reset_queues_cpsch(struct device_queue_manager *dqm,
   1813			uint16_t pasid)
   1814{
   1815	int retval;
   1816
   1817	dqm_lock(dqm);
   1818
   1819	retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
   1820			pasid, true);
   1821
   1822	dqm_unlock(dqm);
   1823	return retval;
   1824}
   1825
   1826/* dqm->lock mutex has to be locked before calling this function */
   1827static int execute_queues_cpsch(struct device_queue_manager *dqm,
   1828				enum kfd_unmap_queues_filter filter,
   1829				uint32_t filter_param)
   1830{
   1831	int retval;
   1832
   1833	if (dqm->is_hws_hang)
   1834		return -EIO;
   1835	retval = unmap_queues_cpsch(dqm, filter, filter_param, false);
   1836	if (retval)
   1837		return retval;
   1838
   1839	return map_queues_cpsch(dqm);
   1840}
   1841
   1842static int destroy_queue_cpsch(struct device_queue_manager *dqm,
   1843				struct qcm_process_device *qpd,
   1844				struct queue *q)
   1845{
   1846	int retval;
   1847	struct mqd_manager *mqd_mgr;
   1848	uint64_t sdma_val = 0;
   1849	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
   1850
   1851	/* Get the SDMA queue stats */
   1852	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
   1853	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
   1854		retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
   1855							&sdma_val);
   1856		if (retval)
   1857			pr_err("Failed to read SDMA queue counter for queue: %d\n",
   1858				q->properties.queue_id);
   1859	}
   1860
   1861	retval = 0;
   1862
   1863	/* remove queue from list to prevent rescheduling after preemption */
   1864	dqm_lock(dqm);
   1865
   1866	if (qpd->is_debug) {
   1867		/*
   1868		 * error, currently we do not allow to destroy a queue
   1869		 * of a currently debugged process
   1870		 */
   1871		retval = -EBUSY;
   1872		goto failed_try_destroy_debugged_queue;
   1873
   1874	}
   1875
   1876	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
   1877			q->properties.type)];
   1878
   1879	deallocate_doorbell(qpd, q);
   1880
   1881	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
   1882	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
   1883		deallocate_sdma_queue(dqm, q);
   1884		pdd->sdma_past_activity_counter += sdma_val;
   1885	}
   1886
   1887	list_del(&q->list);
   1888	qpd->queue_count--;
   1889	if (q->properties.is_active) {
   1890		if (!dqm->dev->shared_resources.enable_mes) {
   1891			decrement_queue_count(dqm, qpd, q);
   1892			retval = execute_queues_cpsch(dqm,
   1893						      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
   1894			if (retval == -ETIME)
   1895				qpd->reset_wavefronts = true;
   1896		} else {
   1897			retval = remove_queue_mes(dqm, q, qpd);
   1898		}
   1899	}
   1900
   1901	/*
   1902	 * Unconditionally decrement this counter, regardless of the queue's
   1903	 * type
   1904	 */
   1905	dqm->total_queue_count--;
   1906	pr_debug("Total of %d queues are accountable so far\n",
   1907			dqm->total_queue_count);
   1908
   1909	dqm_unlock(dqm);
   1910
   1911	/* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */
   1912	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
   1913
   1914	return retval;
   1915
   1916failed_try_destroy_debugged_queue:
   1917
   1918	dqm_unlock(dqm);
   1919	return retval;
   1920}
   1921
   1922/*
   1923 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
   1924 * stay in user mode.
   1925 */
   1926#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
   1927/* APE1 limit is inclusive and 64K aligned. */
   1928#define APE1_LIMIT_ALIGNMENT 0xFFFF
   1929
   1930static bool set_cache_memory_policy(struct device_queue_manager *dqm,
   1931				   struct qcm_process_device *qpd,
   1932				   enum cache_policy default_policy,
   1933				   enum cache_policy alternate_policy,
   1934				   void __user *alternate_aperture_base,
   1935				   uint64_t alternate_aperture_size)
   1936{
   1937	bool retval = true;
   1938
   1939	if (!dqm->asic_ops.set_cache_memory_policy)
   1940		return retval;
   1941
   1942	dqm_lock(dqm);
   1943
   1944	if (alternate_aperture_size == 0) {
   1945		/* base > limit disables APE1 */
   1946		qpd->sh_mem_ape1_base = 1;
   1947		qpd->sh_mem_ape1_limit = 0;
   1948	} else {
   1949		/*
   1950		 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
   1951		 *			SH_MEM_APE1_BASE[31:0], 0x0000 }
   1952		 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
   1953		 *			SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
   1954		 * Verify that the base and size parameters can be
   1955		 * represented in this format and convert them.
   1956		 * Additionally restrict APE1 to user-mode addresses.
   1957		 */
   1958
   1959		uint64_t base = (uintptr_t)alternate_aperture_base;
   1960		uint64_t limit = base + alternate_aperture_size - 1;
   1961
   1962		if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
   1963		   (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
   1964			retval = false;
   1965			goto out;
   1966		}
   1967
   1968		qpd->sh_mem_ape1_base = base >> 16;
   1969		qpd->sh_mem_ape1_limit = limit >> 16;
   1970	}
   1971
   1972	retval = dqm->asic_ops.set_cache_memory_policy(
   1973			dqm,
   1974			qpd,
   1975			default_policy,
   1976			alternate_policy,
   1977			alternate_aperture_base,
   1978			alternate_aperture_size);
   1979
   1980	if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
   1981		program_sh_mem_settings(dqm, qpd);
   1982
   1983	pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
   1984		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
   1985		qpd->sh_mem_ape1_limit);
   1986
   1987out:
   1988	dqm_unlock(dqm);
   1989	return retval;
   1990}
   1991
   1992static int process_termination_nocpsch(struct device_queue_manager *dqm,
   1993		struct qcm_process_device *qpd)
   1994{
   1995	struct queue *q;
   1996	struct device_process_node *cur, *next_dpn;
   1997	int retval = 0;
   1998	bool found = false;
   1999
   2000	dqm_lock(dqm);
   2001
   2002	/* Clear all user mode queues */
   2003	while (!list_empty(&qpd->queues_list)) {
   2004		struct mqd_manager *mqd_mgr;
   2005		int ret;
   2006
   2007		q = list_first_entry(&qpd->queues_list, struct queue, list);
   2008		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
   2009				q->properties.type)];
   2010		ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
   2011		if (ret)
   2012			retval = ret;
   2013		dqm_unlock(dqm);
   2014		mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
   2015		dqm_lock(dqm);
   2016	}
   2017
   2018	/* Unregister process */
   2019	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
   2020		if (qpd == cur->qpd) {
   2021			list_del(&cur->list);
   2022			kfree(cur);
   2023			dqm->processes_count--;
   2024			found = true;
   2025			break;
   2026		}
   2027	}
   2028
   2029	dqm_unlock(dqm);
   2030
   2031	/* Outside the DQM lock because under the DQM lock we can't do
   2032	 * reclaim or take other locks that others hold while reclaiming.
   2033	 */
   2034	if (found)
   2035		kfd_dec_compute_active(dqm->dev);
   2036
   2037	return retval;
   2038}
   2039
   2040static int get_wave_state(struct device_queue_manager *dqm,
   2041			  struct queue *q,
   2042			  void __user *ctl_stack,
   2043			  u32 *ctl_stack_used_size,
   2044			  u32 *save_area_used_size)
   2045{
   2046	struct mqd_manager *mqd_mgr;
   2047
   2048	dqm_lock(dqm);
   2049
   2050	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
   2051
   2052	if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
   2053	    q->properties.is_active || !q->device->cwsr_enabled ||
   2054	    !mqd_mgr->get_wave_state) {
   2055		dqm_unlock(dqm);
   2056		return -EINVAL;
   2057	}
   2058
   2059	dqm_unlock(dqm);
   2060
   2061	/*
   2062	 * get_wave_state is outside the dqm lock to prevent circular locking
   2063	 * and the queue should be protected against destruction by the process
   2064	 * lock.
   2065	 */
   2066	return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
   2067			ctl_stack_used_size, save_area_used_size);
   2068}
   2069
   2070static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
   2071			const struct queue *q,
   2072			u32 *mqd_size,
   2073			u32 *ctl_stack_size)
   2074{
   2075	struct mqd_manager *mqd_mgr;
   2076	enum KFD_MQD_TYPE mqd_type =
   2077			get_mqd_type_from_queue_type(q->properties.type);
   2078
   2079	dqm_lock(dqm);
   2080	mqd_mgr = dqm->mqd_mgrs[mqd_type];
   2081	*mqd_size = mqd_mgr->mqd_size;
   2082	*ctl_stack_size = 0;
   2083
   2084	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info)
   2085		mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size);
   2086
   2087	dqm_unlock(dqm);
   2088}
   2089
   2090static int checkpoint_mqd(struct device_queue_manager *dqm,
   2091			  const struct queue *q,
   2092			  void *mqd,
   2093			  void *ctl_stack)
   2094{
   2095	struct mqd_manager *mqd_mgr;
   2096	int r = 0;
   2097	enum KFD_MQD_TYPE mqd_type =
   2098			get_mqd_type_from_queue_type(q->properties.type);
   2099
   2100	dqm_lock(dqm);
   2101
   2102	if (q->properties.is_active || !q->device->cwsr_enabled) {
   2103		r = -EINVAL;
   2104		goto dqm_unlock;
   2105	}
   2106
   2107	mqd_mgr = dqm->mqd_mgrs[mqd_type];
   2108	if (!mqd_mgr->checkpoint_mqd) {
   2109		r = -EOPNOTSUPP;
   2110		goto dqm_unlock;
   2111	}
   2112
   2113	mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack);
   2114
   2115dqm_unlock:
   2116	dqm_unlock(dqm);
   2117	return r;
   2118}
   2119
   2120static int process_termination_cpsch(struct device_queue_manager *dqm,
   2121		struct qcm_process_device *qpd)
   2122{
   2123	int retval;
   2124	struct queue *q;
   2125	struct kernel_queue *kq, *kq_next;
   2126	struct mqd_manager *mqd_mgr;
   2127	struct device_process_node *cur, *next_dpn;
   2128	enum kfd_unmap_queues_filter filter =
   2129		KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
   2130	bool found = false;
   2131
   2132	retval = 0;
   2133
   2134	dqm_lock(dqm);
   2135
   2136	/* Clean all kernel queues */
   2137	list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
   2138		list_del(&kq->list);
   2139		decrement_queue_count(dqm, qpd, kq->queue);
   2140		qpd->is_debug = false;
   2141		dqm->total_queue_count--;
   2142		filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
   2143	}
   2144
   2145	/* Clear all user mode queues */
   2146	list_for_each_entry(q, &qpd->queues_list, list) {
   2147		if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
   2148			deallocate_sdma_queue(dqm, q);
   2149		else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
   2150			deallocate_sdma_queue(dqm, q);
   2151
   2152		if (q->properties.is_active) {
   2153			decrement_queue_count(dqm, qpd, q);
   2154
   2155			if (dqm->dev->shared_resources.enable_mes) {
   2156				retval = remove_queue_mes(dqm, q, qpd);
   2157				if (retval)
   2158					pr_err("Failed to remove queue %d\n",
   2159						q->properties.queue_id);
   2160			}
   2161		}
   2162
   2163		dqm->total_queue_count--;
   2164	}
   2165
   2166	/* Unregister process */
   2167	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
   2168		if (qpd == cur->qpd) {
   2169			list_del(&cur->list);
   2170			kfree(cur);
   2171			dqm->processes_count--;
   2172			found = true;
   2173			break;
   2174		}
   2175	}
   2176
   2177	if (!dqm->dev->shared_resources.enable_mes)
   2178		retval = execute_queues_cpsch(dqm, filter, 0);
   2179
   2180	if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
   2181		pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
   2182		dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
   2183		qpd->reset_wavefronts = false;
   2184	}
   2185
   2186	/* Lastly, free mqd resources.
   2187	 * Do free_mqd() after dqm_unlock to avoid circular locking.
   2188	 */
   2189	while (!list_empty(&qpd->queues_list)) {
   2190		q = list_first_entry(&qpd->queues_list, struct queue, list);
   2191		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
   2192				q->properties.type)];
   2193		list_del(&q->list);
   2194		qpd->queue_count--;
   2195		dqm_unlock(dqm);
   2196		mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
   2197		dqm_lock(dqm);
   2198	}
   2199	dqm_unlock(dqm);
   2200
   2201	/* Outside the DQM lock because under the DQM lock we can't do
   2202	 * reclaim or take other locks that others hold while reclaiming.
   2203	 */
   2204	if (found)
   2205		kfd_dec_compute_active(dqm->dev);
   2206
   2207	return retval;
   2208}
   2209
   2210static int init_mqd_managers(struct device_queue_manager *dqm)
   2211{
   2212	int i, j;
   2213	struct mqd_manager *mqd_mgr;
   2214
   2215	for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
   2216		mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
   2217		if (!mqd_mgr) {
   2218			pr_err("mqd manager [%d] initialization failed\n", i);
   2219			goto out_free;
   2220		}
   2221		dqm->mqd_mgrs[i] = mqd_mgr;
   2222	}
   2223
   2224	return 0;
   2225
   2226out_free:
   2227	for (j = 0; j < i; j++) {
   2228		kfree(dqm->mqd_mgrs[j]);
   2229		dqm->mqd_mgrs[j] = NULL;
   2230	}
   2231
   2232	return -ENOMEM;
   2233}
   2234
   2235/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
   2236static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
   2237{
   2238	int retval;
   2239	struct kfd_dev *dev = dqm->dev;
   2240	struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
   2241	uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
   2242		get_num_all_sdma_engines(dqm) *
   2243		dev->device_info.num_sdma_queues_per_engine +
   2244		dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
   2245
   2246	retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
   2247		&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
   2248		(void *)&(mem_obj->cpu_ptr), false);
   2249
   2250	return retval;
   2251}
   2252
   2253struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
   2254{
   2255	struct device_queue_manager *dqm;
   2256
   2257	pr_debug("Loading device queue manager\n");
   2258
   2259	dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
   2260	if (!dqm)
   2261		return NULL;
   2262
   2263	switch (dev->adev->asic_type) {
   2264	/* HWS is not available on Hawaii. */
   2265	case CHIP_HAWAII:
   2266	/* HWS depends on CWSR for timely dequeue. CWSR is not
   2267	 * available on Tonga.
   2268	 *
   2269	 * FIXME: This argument also applies to Kaveri.
   2270	 */
   2271	case CHIP_TONGA:
   2272		dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
   2273		break;
   2274	default:
   2275		dqm->sched_policy = sched_policy;
   2276		break;
   2277	}
   2278
   2279	dqm->dev = dev;
   2280	switch (dqm->sched_policy) {
   2281	case KFD_SCHED_POLICY_HWS:
   2282	case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
   2283		/* initialize dqm for cp scheduling */
   2284		dqm->ops.create_queue = create_queue_cpsch;
   2285		dqm->ops.initialize = initialize_cpsch;
   2286		dqm->ops.start = start_cpsch;
   2287		dqm->ops.stop = stop_cpsch;
   2288		dqm->ops.pre_reset = pre_reset;
   2289		dqm->ops.destroy_queue = destroy_queue_cpsch;
   2290		dqm->ops.update_queue = update_queue;
   2291		dqm->ops.register_process = register_process;
   2292		dqm->ops.unregister_process = unregister_process;
   2293		dqm->ops.uninitialize = uninitialize;
   2294		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
   2295		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
   2296		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
   2297		dqm->ops.process_termination = process_termination_cpsch;
   2298		dqm->ops.evict_process_queues = evict_process_queues_cpsch;
   2299		dqm->ops.restore_process_queues = restore_process_queues_cpsch;
   2300		dqm->ops.get_wave_state = get_wave_state;
   2301		dqm->ops.reset_queues = reset_queues_cpsch;
   2302		dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
   2303		dqm->ops.checkpoint_mqd = checkpoint_mqd;
   2304		break;
   2305	case KFD_SCHED_POLICY_NO_HWS:
   2306		/* initialize dqm for no cp scheduling */
   2307		dqm->ops.start = start_nocpsch;
   2308		dqm->ops.stop = stop_nocpsch;
   2309		dqm->ops.pre_reset = pre_reset;
   2310		dqm->ops.create_queue = create_queue_nocpsch;
   2311		dqm->ops.destroy_queue = destroy_queue_nocpsch;
   2312		dqm->ops.update_queue = update_queue;
   2313		dqm->ops.register_process = register_process;
   2314		dqm->ops.unregister_process = unregister_process;
   2315		dqm->ops.initialize = initialize_nocpsch;
   2316		dqm->ops.uninitialize = uninitialize;
   2317		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
   2318		dqm->ops.process_termination = process_termination_nocpsch;
   2319		dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
   2320		dqm->ops.restore_process_queues =
   2321			restore_process_queues_nocpsch;
   2322		dqm->ops.get_wave_state = get_wave_state;
   2323		dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
   2324		dqm->ops.checkpoint_mqd = checkpoint_mqd;
   2325		break;
   2326	default:
   2327		pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
   2328		goto out_free;
   2329	}
   2330
   2331	switch (dev->adev->asic_type) {
   2332	case CHIP_CARRIZO:
   2333		device_queue_manager_init_vi(&dqm->asic_ops);
   2334		break;
   2335
   2336	case CHIP_KAVERI:
   2337		device_queue_manager_init_cik(&dqm->asic_ops);
   2338		break;
   2339
   2340	case CHIP_HAWAII:
   2341		device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
   2342		break;
   2343
   2344	case CHIP_TONGA:
   2345	case CHIP_FIJI:
   2346	case CHIP_POLARIS10:
   2347	case CHIP_POLARIS11:
   2348	case CHIP_POLARIS12:
   2349	case CHIP_VEGAM:
   2350		device_queue_manager_init_vi_tonga(&dqm->asic_ops);
   2351		break;
   2352
   2353	default:
   2354		if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
   2355			device_queue_manager_init_v11(&dqm->asic_ops);
   2356		else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
   2357			device_queue_manager_init_v10_navi10(&dqm->asic_ops);
   2358		else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
   2359			device_queue_manager_init_v9(&dqm->asic_ops);
   2360		else {
   2361			WARN(1, "Unexpected ASIC family %u",
   2362			     dev->adev->asic_type);
   2363			goto out_free;
   2364		}
   2365	}
   2366
   2367	if (init_mqd_managers(dqm))
   2368		goto out_free;
   2369
   2370	if (allocate_hiq_sdma_mqd(dqm)) {
   2371		pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
   2372		goto out_free;
   2373	}
   2374
   2375	if (!dqm->ops.initialize(dqm))
   2376		return dqm;
   2377
   2378out_free:
   2379	kfree(dqm);
   2380	return NULL;
   2381}
   2382
   2383static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
   2384				    struct kfd_mem_obj *mqd)
   2385{
   2386	WARN(!mqd, "No hiq sdma mqd trunk to free");
   2387
   2388	amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem);
   2389}
   2390
   2391void device_queue_manager_uninit(struct device_queue_manager *dqm)
   2392{
   2393	dqm->ops.uninitialize(dqm);
   2394	deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
   2395	kfree(dqm);
   2396}
   2397
   2398int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid)
   2399{
   2400	struct kfd_process_device *pdd;
   2401	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
   2402	int ret = 0;
   2403
   2404	if (!p)
   2405		return -EINVAL;
   2406	WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
   2407	pdd = kfd_get_process_device_data(dqm->dev, p);
   2408	if (pdd)
   2409		ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
   2410	kfd_unref_process(p);
   2411
   2412	return ret;
   2413}
   2414
   2415static void kfd_process_hw_exception(struct work_struct *work)
   2416{
   2417	struct device_queue_manager *dqm = container_of(work,
   2418			struct device_queue_manager, hw_exception_work);
   2419	amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
   2420}
   2421
   2422#if defined(CONFIG_DEBUG_FS)
   2423
   2424static void seq_reg_dump(struct seq_file *m,
   2425			 uint32_t (*dump)[2], uint32_t n_regs)
   2426{
   2427	uint32_t i, count;
   2428
   2429	for (i = 0, count = 0; i < n_regs; i++) {
   2430		if (count == 0 ||
   2431		    dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
   2432			seq_printf(m, "%s    %08x: %08x",
   2433				   i ? "\n" : "",
   2434				   dump[i][0], dump[i][1]);
   2435			count = 7;
   2436		} else {
   2437			seq_printf(m, " %08x", dump[i][1]);
   2438			count--;
   2439		}
   2440	}
   2441
   2442	seq_puts(m, "\n");
   2443}
   2444
   2445int dqm_debugfs_hqds(struct seq_file *m, void *data)
   2446{
   2447	struct device_queue_manager *dqm = data;
   2448	uint32_t (*dump)[2], n_regs;
   2449	int pipe, queue;
   2450	int r = 0;
   2451
   2452	if (!dqm->sched_running) {
   2453		seq_puts(m, " Device is stopped\n");
   2454		return 0;
   2455	}
   2456
   2457	r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
   2458					KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
   2459					&dump, &n_regs);
   2460	if (!r) {
   2461		seq_printf(m, "  HIQ on MEC %d Pipe %d Queue %d\n",
   2462			   KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
   2463			   KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
   2464			   KFD_CIK_HIQ_QUEUE);
   2465		seq_reg_dump(m, dump, n_regs);
   2466
   2467		kfree(dump);
   2468	}
   2469
   2470	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
   2471		int pipe_offset = pipe * get_queues_per_pipe(dqm);
   2472
   2473		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
   2474			if (!test_bit(pipe_offset + queue,
   2475				      dqm->dev->shared_resources.cp_queue_bitmap))
   2476				continue;
   2477
   2478			r = dqm->dev->kfd2kgd->hqd_dump(
   2479				dqm->dev->adev, pipe, queue, &dump, &n_regs);
   2480			if (r)
   2481				break;
   2482
   2483			seq_printf(m, "  CP Pipe %d, Queue %d\n",
   2484				  pipe, queue);
   2485			seq_reg_dump(m, dump, n_regs);
   2486
   2487			kfree(dump);
   2488		}
   2489	}
   2490
   2491	for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
   2492		for (queue = 0;
   2493		     queue < dqm->dev->device_info.num_sdma_queues_per_engine;
   2494		     queue++) {
   2495			r = dqm->dev->kfd2kgd->hqd_sdma_dump(
   2496				dqm->dev->adev, pipe, queue, &dump, &n_regs);
   2497			if (r)
   2498				break;
   2499
   2500			seq_printf(m, "  SDMA Engine %d, RLC %d\n",
   2501				  pipe, queue);
   2502			seq_reg_dump(m, dump, n_regs);
   2503
   2504			kfree(dump);
   2505		}
   2506	}
   2507
   2508	return r;
   2509}
   2510
   2511int dqm_debugfs_hang_hws(struct device_queue_manager *dqm)
   2512{
   2513	int r = 0;
   2514
   2515	dqm_lock(dqm);
   2516	r = pm_debugfs_hang_hws(&dqm->packet_mgr);
   2517	if (r) {
   2518		dqm_unlock(dqm);
   2519		return r;
   2520	}
   2521	dqm->active_runlist = true;
   2522	r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
   2523	dqm_unlock(dqm);
   2524
   2525	return r;
   2526}
   2527
   2528#endif