kfd_process.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
kfd_process.c (53635B)
      1// SPDX-License-Identifier: GPL-2.0 OR MIT
      2/*
      3 * Copyright 2014-2022 Advanced Micro Devices, Inc.
      4 *
      5 * Permission is hereby granted, free of charge, to any person obtaining a
      6 * copy of this software and associated documentation files (the "Software"),
      7 * to deal in the Software without restriction, including without limitation
      8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9 * and/or sell copies of the Software, and to permit persons to whom the
     10 * Software is furnished to do so, subject to the following conditions:
     11 *
     12 * The above copyright notice and this permission notice shall be included in
     13 * all copies or substantial portions of the Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     21 * OTHER DEALINGS IN THE SOFTWARE.
     22 */
     23
     24#include <linux/mutex.h>
     25#include <linux/log2.h>
     26#include <linux/sched.h>
     27#include <linux/sched/mm.h>
     28#include <linux/sched/task.h>
     29#include <linux/mmu_context.h>
     30#include <linux/slab.h>
     31#include <linux/amd-iommu.h>
     32#include <linux/notifier.h>
     33#include <linux/compat.h>
     34#include <linux/mman.h>
     35#include <linux/file.h>
     36#include <linux/pm_runtime.h>
     37#include "amdgpu_amdkfd.h"
     38#include "amdgpu.h"
     39
     40struct mm_struct;
     41
     42#include "kfd_priv.h"
     43#include "kfd_device_queue_manager.h"
     44#include "kfd_iommu.h"
     45#include "kfd_svm.h"
     46
     47/*
     48 * List of struct kfd_process (field kfd_process).
     49 * Unique/indexed by mm_struct*
     50 */
     51DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
     52static DEFINE_MUTEX(kfd_processes_mutex);
     53
     54DEFINE_SRCU(kfd_processes_srcu);
     55
     56/* For process termination handling */
     57static struct workqueue_struct *kfd_process_wq;
     58
     59/* Ordered, single-threaded workqueue for restoring evicted
     60 * processes. Restoring multiple processes concurrently under memory
     61 * pressure can lead to processes blocking each other from validating
     62 * their BOs and result in a live-lock situation where processes
     63 * remain evicted indefinitely.
     64 */
     65static struct workqueue_struct *kfd_restore_wq;
     66
     67static struct kfd_process *find_process(const struct task_struct *thread,
     68					bool ref);
     69static void kfd_process_ref_release(struct kref *ref);
     70static struct kfd_process *create_process(const struct task_struct *thread);
     71static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
     72
     73static void evict_process_worker(struct work_struct *work);
     74static void restore_process_worker(struct work_struct *work);
     75
     76static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd);
     77
     78struct kfd_procfs_tree {
     79	struct kobject *kobj;
     80};
     81
     82static struct kfd_procfs_tree procfs;
     83
     84/*
     85 * Structure for SDMA activity tracking
     86 */
     87struct kfd_sdma_activity_handler_workarea {
     88	struct work_struct sdma_activity_work;
     89	struct kfd_process_device *pdd;
     90	uint64_t sdma_activity_counter;
     91};
     92
     93struct temp_sdma_queue_list {
     94	uint64_t __user *rptr;
     95	uint64_t sdma_val;
     96	unsigned int queue_id;
     97	struct list_head list;
     98};
     99
    100static void kfd_sdma_activity_worker(struct work_struct *work)
    101{
    102	struct kfd_sdma_activity_handler_workarea *workarea;
    103	struct kfd_process_device *pdd;
    104	uint64_t val;
    105	struct mm_struct *mm;
    106	struct queue *q;
    107	struct qcm_process_device *qpd;
    108	struct device_queue_manager *dqm;
    109	int ret = 0;
    110	struct temp_sdma_queue_list sdma_q_list;
    111	struct temp_sdma_queue_list *sdma_q, *next;
    112
    113	workarea = container_of(work, struct kfd_sdma_activity_handler_workarea,
    114				sdma_activity_work);
    115
    116	pdd = workarea->pdd;
    117	if (!pdd)
    118		return;
    119	dqm = pdd->dev->dqm;
    120	qpd = &pdd->qpd;
    121	if (!dqm || !qpd)
    122		return;
    123	/*
    124	 * Total SDMA activity is current SDMA activity + past SDMA activity
    125	 * Past SDMA count is stored in pdd.
    126	 * To get the current activity counters for all active SDMA queues,
    127	 * we loop over all SDMA queues and get their counts from user-space.
    128	 *
    129	 * We cannot call get_user() with dqm_lock held as it can cause
    130	 * a circular lock dependency situation. To read the SDMA stats,
    131	 * we need to do the following:
    132	 *
    133	 * 1. Create a temporary list of SDMA queue nodes from the qpd->queues_list,
    134	 *    with dqm_lock/dqm_unlock().
    135	 * 2. Call get_user() for each node in temporary list without dqm_lock.
    136	 *    Save the SDMA count for each node and also add the count to the total
    137	 *    SDMA count counter.
    138	 *    Its possible, during this step, a few SDMA queue nodes got deleted
    139	 *    from the qpd->queues_list.
    140	 * 3. Do a second pass over qpd->queues_list to check if any nodes got deleted.
    141	 *    If any node got deleted, its SDMA count would be captured in the sdma
    142	 *    past activity counter. So subtract the SDMA counter stored in step 2
    143	 *    for this node from the total SDMA count.
    144	 */
    145	INIT_LIST_HEAD(&sdma_q_list.list);
    146
    147	/*
    148	 * Create the temp list of all SDMA queues
    149	 */
    150	dqm_lock(dqm);
    151
    152	list_for_each_entry(q, &qpd->queues_list, list) {
    153		if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
    154		    (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
    155			continue;
    156
    157		sdma_q = kzalloc(sizeof(struct temp_sdma_queue_list), GFP_KERNEL);
    158		if (!sdma_q) {
    159			dqm_unlock(dqm);
    160			goto cleanup;
    161		}
    162
    163		INIT_LIST_HEAD(&sdma_q->list);
    164		sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr;
    165		sdma_q->queue_id = q->properties.queue_id;
    166		list_add_tail(&sdma_q->list, &sdma_q_list.list);
    167	}
    168
    169	/*
    170	 * If the temp list is empty, then no SDMA queues nodes were found in
    171	 * qpd->queues_list. Return the past activity count as the total sdma
    172	 * count
    173	 */
    174	if (list_empty(&sdma_q_list.list)) {
    175		workarea->sdma_activity_counter = pdd->sdma_past_activity_counter;
    176		dqm_unlock(dqm);
    177		return;
    178	}
    179
    180	dqm_unlock(dqm);
    181
    182	/*
    183	 * Get the usage count for each SDMA queue in temp_list.
    184	 */
    185	mm = get_task_mm(pdd->process->lead_thread);
    186	if (!mm)
    187		goto cleanup;
    188
    189	kthread_use_mm(mm);
    190
    191	list_for_each_entry(sdma_q, &sdma_q_list.list, list) {
    192		val = 0;
    193		ret = read_sdma_queue_counter(sdma_q->rptr, &val);
    194		if (ret) {
    195			pr_debug("Failed to read SDMA queue active counter for queue id: %d",
    196				 sdma_q->queue_id);
    197		} else {
    198			sdma_q->sdma_val = val;
    199			workarea->sdma_activity_counter += val;
    200		}
    201	}
    202
    203	kthread_unuse_mm(mm);
    204	mmput(mm);
    205
    206	/*
    207	 * Do a second iteration over qpd_queues_list to check if any SDMA
    208	 * nodes got deleted while fetching SDMA counter.
    209	 */
    210	dqm_lock(dqm);
    211
    212	workarea->sdma_activity_counter += pdd->sdma_past_activity_counter;
    213
    214	list_for_each_entry(q, &qpd->queues_list, list) {
    215		if (list_empty(&sdma_q_list.list))
    216			break;
    217
    218		if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
    219		    (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
    220			continue;
    221
    222		list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
    223			if (((uint64_t __user *)q->properties.read_ptr == sdma_q->rptr) &&
    224			     (sdma_q->queue_id == q->properties.queue_id)) {
    225				list_del(&sdma_q->list);
    226				kfree(sdma_q);
    227				break;
    228			}
    229		}
    230	}
    231
    232	dqm_unlock(dqm);
    233
    234	/*
    235	 * If temp list is not empty, it implies some queues got deleted
    236	 * from qpd->queues_list during SDMA usage read. Subtract the SDMA
    237	 * count for each node from the total SDMA count.
    238	 */
    239	list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
    240		workarea->sdma_activity_counter -= sdma_q->sdma_val;
    241		list_del(&sdma_q->list);
    242		kfree(sdma_q);
    243	}
    244
    245	return;
    246
    247cleanup:
    248	list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
    249		list_del(&sdma_q->list);
    250		kfree(sdma_q);
    251	}
    252}
    253
    254/**
    255 * kfd_get_cu_occupancy - Collect number of waves in-flight on this device
    256 * by current process. Translates acquired wave count into number of compute units
    257 * that are occupied.
    258 *
    259 * @attr: Handle of attribute that allows reporting of wave count. The attribute
    260 * handle encapsulates GPU device it is associated with, thereby allowing collection
    261 * of waves in flight, etc
    262 * @buffer: Handle of user provided buffer updated with wave count
    263 *
    264 * Return: Number of bytes written to user buffer or an error value
    265 */
    266static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
    267{
    268	int cu_cnt;
    269	int wave_cnt;
    270	int max_waves_per_cu;
    271	struct kfd_dev *dev = NULL;
    272	struct kfd_process *proc = NULL;
    273	struct kfd_process_device *pdd = NULL;
    274
    275	pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
    276	dev = pdd->dev;
    277	if (dev->kfd2kgd->get_cu_occupancy == NULL)
    278		return -EINVAL;
    279
    280	cu_cnt = 0;
    281	proc = pdd->process;
    282	if (pdd->qpd.queue_count == 0) {
    283		pr_debug("Gpu-Id: %d has no active queues for process %d\n",
    284			 dev->id, proc->pasid);
    285		return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
    286	}
    287
    288	/* Collect wave count from device if it supports */
    289	wave_cnt = 0;
    290	max_waves_per_cu = 0;
    291	dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt,
    292			&max_waves_per_cu);
    293
    294	/* Translate wave count to number of compute units */
    295	cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
    296	return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
    297}
    298
    299static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
    300			       char *buffer)
    301{
    302	if (strcmp(attr->name, "pasid") == 0) {
    303		struct kfd_process *p = container_of(attr, struct kfd_process,
    304						     attr_pasid);
    305
    306		return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
    307	} else if (strncmp(attr->name, "vram_", 5) == 0) {
    308		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
    309							      attr_vram);
    310		return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
    311	} else if (strncmp(attr->name, "sdma_", 5) == 0) {
    312		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
    313							      attr_sdma);
    314		struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;
    315
    316		INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
    317					kfd_sdma_activity_worker);
    318
    319		sdma_activity_work_handler.pdd = pdd;
    320		sdma_activity_work_handler.sdma_activity_counter = 0;
    321
    322		schedule_work(&sdma_activity_work_handler.sdma_activity_work);
    323
    324		flush_work(&sdma_activity_work_handler.sdma_activity_work);
    325
    326		return snprintf(buffer, PAGE_SIZE, "%llu\n",
    327				(sdma_activity_work_handler.sdma_activity_counter)/
    328				 SDMA_ACTIVITY_DIVISOR);
    329	} else {
    330		pr_err("Invalid attribute");
    331		return -EINVAL;
    332	}
    333
    334	return 0;
    335}
    336
    337static void kfd_procfs_kobj_release(struct kobject *kobj)
    338{
    339	kfree(kobj);
    340}
    341
    342static const struct sysfs_ops kfd_procfs_ops = {
    343	.show = kfd_procfs_show,
    344};
    345
    346static struct kobj_type procfs_type = {
    347	.release = kfd_procfs_kobj_release,
    348	.sysfs_ops = &kfd_procfs_ops,
    349};
    350
    351void kfd_procfs_init(void)
    352{
    353	int ret = 0;
    354
    355	procfs.kobj = kfd_alloc_struct(procfs.kobj);
    356	if (!procfs.kobj)
    357		return;
    358
    359	ret = kobject_init_and_add(procfs.kobj, &procfs_type,
    360				   &kfd_device->kobj, "proc");
    361	if (ret) {
    362		pr_warn("Could not create procfs proc folder");
    363		/* If we fail to create the procfs, clean up */
    364		kfd_procfs_shutdown();
    365	}
    366}
    367
    368void kfd_procfs_shutdown(void)
    369{
    370	if (procfs.kobj) {
    371		kobject_del(procfs.kobj);
    372		kobject_put(procfs.kobj);
    373		procfs.kobj = NULL;
    374	}
    375}
    376
    377static ssize_t kfd_procfs_queue_show(struct kobject *kobj,
    378				     struct attribute *attr, char *buffer)
    379{
    380	struct queue *q = container_of(kobj, struct queue, kobj);
    381
    382	if (!strcmp(attr->name, "size"))
    383		return snprintf(buffer, PAGE_SIZE, "%llu",
    384				q->properties.queue_size);
    385	else if (!strcmp(attr->name, "type"))
    386		return snprintf(buffer, PAGE_SIZE, "%d", q->properties.type);
    387	else if (!strcmp(attr->name, "gpuid"))
    388		return snprintf(buffer, PAGE_SIZE, "%u", q->device->id);
    389	else
    390		pr_err("Invalid attribute");
    391
    392	return 0;
    393}
    394
    395static ssize_t kfd_procfs_stats_show(struct kobject *kobj,
    396				     struct attribute *attr, char *buffer)
    397{
    398	if (strcmp(attr->name, "evicted_ms") == 0) {
    399		struct kfd_process_device *pdd = container_of(attr,
    400				struct kfd_process_device,
    401				attr_evict);
    402		uint64_t evict_jiffies;
    403
    404		evict_jiffies = atomic64_read(&pdd->evict_duration_counter);
    405
    406		return snprintf(buffer,
    407				PAGE_SIZE,
    408				"%llu\n",
    409				jiffies64_to_msecs(evict_jiffies));
    410
    411	/* Sysfs handle that gets CU occupancy is per device */
    412	} else if (strcmp(attr->name, "cu_occupancy") == 0) {
    413		return kfd_get_cu_occupancy(attr, buffer);
    414	} else {
    415		pr_err("Invalid attribute");
    416	}
    417
    418	return 0;
    419}
    420
    421static ssize_t kfd_sysfs_counters_show(struct kobject *kobj,
    422				       struct attribute *attr, char *buf)
    423{
    424	struct kfd_process_device *pdd;
    425
    426	if (!strcmp(attr->name, "faults")) {
    427		pdd = container_of(attr, struct kfd_process_device,
    428				   attr_faults);
    429		return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->faults));
    430	}
    431	if (!strcmp(attr->name, "page_in")) {
    432		pdd = container_of(attr, struct kfd_process_device,
    433				   attr_page_in);
    434		return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_in));
    435	}
    436	if (!strcmp(attr->name, "page_out")) {
    437		pdd = container_of(attr, struct kfd_process_device,
    438				   attr_page_out);
    439		return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_out));
    440	}
    441	return 0;
    442}
    443
    444static struct attribute attr_queue_size = {
    445	.name = "size",
    446	.mode = KFD_SYSFS_FILE_MODE
    447};
    448
    449static struct attribute attr_queue_type = {
    450	.name = "type",
    451	.mode = KFD_SYSFS_FILE_MODE
    452};
    453
    454static struct attribute attr_queue_gpuid = {
    455	.name = "gpuid",
    456	.mode = KFD_SYSFS_FILE_MODE
    457};
    458
    459static struct attribute *procfs_queue_attrs[] = {
    460	&attr_queue_size,
    461	&attr_queue_type,
    462	&attr_queue_gpuid,
    463	NULL
    464};
    465ATTRIBUTE_GROUPS(procfs_queue);
    466
    467static const struct sysfs_ops procfs_queue_ops = {
    468	.show = kfd_procfs_queue_show,
    469};
    470
    471static struct kobj_type procfs_queue_type = {
    472	.sysfs_ops = &procfs_queue_ops,
    473	.default_groups = procfs_queue_groups,
    474};
    475
    476static const struct sysfs_ops procfs_stats_ops = {
    477	.show = kfd_procfs_stats_show,
    478};
    479
    480static struct kobj_type procfs_stats_type = {
    481	.sysfs_ops = &procfs_stats_ops,
    482	.release = kfd_procfs_kobj_release,
    483};
    484
    485static const struct sysfs_ops sysfs_counters_ops = {
    486	.show = kfd_sysfs_counters_show,
    487};
    488
    489static struct kobj_type sysfs_counters_type = {
    490	.sysfs_ops = &sysfs_counters_ops,
    491	.release = kfd_procfs_kobj_release,
    492};
    493
    494int kfd_procfs_add_queue(struct queue *q)
    495{
    496	struct kfd_process *proc;
    497	int ret;
    498
    499	if (!q || !q->process)
    500		return -EINVAL;
    501	proc = q->process;
    502
    503	/* Create proc/<pid>/queues/<queue id> folder */
    504	if (!proc->kobj_queues)
    505		return -EFAULT;
    506	ret = kobject_init_and_add(&q->kobj, &procfs_queue_type,
    507			proc->kobj_queues, "%u", q->properties.queue_id);
    508	if (ret < 0) {
    509		pr_warn("Creating proc/<pid>/queues/%u failed",
    510			q->properties.queue_id);
    511		kobject_put(&q->kobj);
    512		return ret;
    513	}
    514
    515	return 0;
    516}
    517
    518static void kfd_sysfs_create_file(struct kobject *kobj, struct attribute *attr,
    519				 char *name)
    520{
    521	int ret;
    522
    523	if (!kobj || !attr || !name)
    524		return;
    525
    526	attr->name = name;
    527	attr->mode = KFD_SYSFS_FILE_MODE;
    528	sysfs_attr_init(attr);
    529
    530	ret = sysfs_create_file(kobj, attr);
    531	if (ret)
    532		pr_warn("Create sysfs %s/%s failed %d", kobj->name, name, ret);
    533}
    534
    535static void kfd_procfs_add_sysfs_stats(struct kfd_process *p)
    536{
    537	int ret;
    538	int i;
    539	char stats_dir_filename[MAX_SYSFS_FILENAME_LEN];
    540
    541	if (!p || !p->kobj)
    542		return;
    543
    544	/*
    545	 * Create sysfs files for each GPU:
    546	 * - proc/<pid>/stats_<gpuid>/
    547	 * - proc/<pid>/stats_<gpuid>/evicted_ms
    548	 * - proc/<pid>/stats_<gpuid>/cu_occupancy
    549	 */
    550	for (i = 0; i < p->n_pdds; i++) {
    551		struct kfd_process_device *pdd = p->pdds[i];
    552
    553		snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN,
    554				"stats_%u", pdd->dev->id);
    555		pdd->kobj_stats = kfd_alloc_struct(pdd->kobj_stats);
    556		if (!pdd->kobj_stats)
    557			return;
    558
    559		ret = kobject_init_and_add(pdd->kobj_stats,
    560					   &procfs_stats_type,
    561					   p->kobj,
    562					   stats_dir_filename);
    563
    564		if (ret) {
    565			pr_warn("Creating KFD proc/stats_%s folder failed",
    566				stats_dir_filename);
    567			kobject_put(pdd->kobj_stats);
    568			pdd->kobj_stats = NULL;
    569			return;
    570		}
    571
    572		kfd_sysfs_create_file(pdd->kobj_stats, &pdd->attr_evict,
    573				      "evicted_ms");
    574		/* Add sysfs file to report compute unit occupancy */
    575		if (pdd->dev->kfd2kgd->get_cu_occupancy)
    576			kfd_sysfs_create_file(pdd->kobj_stats,
    577					      &pdd->attr_cu_occupancy,
    578					      "cu_occupancy");
    579	}
    580}
    581
    582static void kfd_procfs_add_sysfs_counters(struct kfd_process *p)
    583{
    584	int ret = 0;
    585	int i;
    586	char counters_dir_filename[MAX_SYSFS_FILENAME_LEN];
    587
    588	if (!p || !p->kobj)
    589		return;
    590
    591	/*
    592	 * Create sysfs files for each GPU which supports SVM
    593	 * - proc/<pid>/counters_<gpuid>/
    594	 * - proc/<pid>/counters_<gpuid>/faults
    595	 * - proc/<pid>/counters_<gpuid>/page_in
    596	 * - proc/<pid>/counters_<gpuid>/page_out
    597	 */
    598	for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) {
    599		struct kfd_process_device *pdd = p->pdds[i];
    600		struct kobject *kobj_counters;
    601
    602		snprintf(counters_dir_filename, MAX_SYSFS_FILENAME_LEN,
    603			"counters_%u", pdd->dev->id);
    604		kobj_counters = kfd_alloc_struct(kobj_counters);
    605		if (!kobj_counters)
    606			return;
    607
    608		ret = kobject_init_and_add(kobj_counters, &sysfs_counters_type,
    609					   p->kobj, counters_dir_filename);
    610		if (ret) {
    611			pr_warn("Creating KFD proc/%s folder failed",
    612				counters_dir_filename);
    613			kobject_put(kobj_counters);
    614			return;
    615		}
    616
    617		pdd->kobj_counters = kobj_counters;
    618		kfd_sysfs_create_file(kobj_counters, &pdd->attr_faults,
    619				      "faults");
    620		kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_in,
    621				      "page_in");
    622		kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_out,
    623				      "page_out");
    624	}
    625}
    626
    627static void kfd_procfs_add_sysfs_files(struct kfd_process *p)
    628{
    629	int i;
    630
    631	if (!p || !p->kobj)
    632		return;
    633
    634	/*
    635	 * Create sysfs files for each GPU:
    636	 * - proc/<pid>/vram_<gpuid>
    637	 * - proc/<pid>/sdma_<gpuid>
    638	 */
    639	for (i = 0; i < p->n_pdds; i++) {
    640		struct kfd_process_device *pdd = p->pdds[i];
    641
    642		snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
    643			 pdd->dev->id);
    644		kfd_sysfs_create_file(p->kobj, &pdd->attr_vram,
    645				      pdd->vram_filename);
    646
    647		snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
    648			 pdd->dev->id);
    649		kfd_sysfs_create_file(p->kobj, &pdd->attr_sdma,
    650					    pdd->sdma_filename);
    651	}
    652}
    653
    654void kfd_procfs_del_queue(struct queue *q)
    655{
    656	if (!q)
    657		return;
    658
    659	kobject_del(&q->kobj);
    660	kobject_put(&q->kobj);
    661}
    662
    663int kfd_process_create_wq(void)
    664{
    665	if (!kfd_process_wq)
    666		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
    667	if (!kfd_restore_wq)
    668		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
    669
    670	if (!kfd_process_wq || !kfd_restore_wq) {
    671		kfd_process_destroy_wq();
    672		return -ENOMEM;
    673	}
    674
    675	return 0;
    676}
    677
    678void kfd_process_destroy_wq(void)
    679{
    680	if (kfd_process_wq) {
    681		destroy_workqueue(kfd_process_wq);
    682		kfd_process_wq = NULL;
    683	}
    684	if (kfd_restore_wq) {
    685		destroy_workqueue(kfd_restore_wq);
    686		kfd_restore_wq = NULL;
    687	}
    688}
    689
    690static void kfd_process_free_gpuvm(struct kgd_mem *mem,
    691			struct kfd_process_device *pdd, void *kptr)
    692{
    693	struct kfd_dev *dev = pdd->dev;
    694
    695	if (kptr) {
    696		amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->adev, mem);
    697		kptr = NULL;
    698	}
    699
    700	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv);
    701	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, mem, pdd->drm_priv,
    702					       NULL);
    703}
    704
    705/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
    706 *	This function should be only called right after the process
    707 *	is created and when kfd_processes_mutex is still being held
    708 *	to avoid concurrency. Because of that exclusiveness, we do
    709 *	not need to take p->mutex.
    710 */
    711static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
    712				   uint64_t gpu_va, uint32_t size,
    713				   uint32_t flags, struct kgd_mem **mem, void **kptr)
    714{
    715	struct kfd_dev *kdev = pdd->dev;
    716	int err;
    717
    718	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size,
    719						 pdd->drm_priv, mem, NULL,
    720						 flags, false);
    721	if (err)
    722		goto err_alloc_mem;
    723
    724	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->adev, *mem,
    725			pdd->drm_priv);
    726	if (err)
    727		goto err_map_mem;
    728
    729	err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->adev, *mem, true);
    730	if (err) {
    731		pr_debug("Sync memory failed, wait interrupted by user signal\n");
    732		goto sync_memory_failed;
    733	}
    734
    735	if (kptr) {
    736		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->adev,
    737				(struct kgd_mem *)*mem, kptr, NULL);
    738		if (err) {
    739			pr_debug("Map GTT BO to kernel failed\n");
    740			goto sync_memory_failed;
    741		}
    742	}
    743
    744	return err;
    745
    746sync_memory_failed:
    747	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->adev, *mem, pdd->drm_priv);
    748
    749err_map_mem:
    750	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->adev, *mem, pdd->drm_priv,
    751					       NULL);
    752err_alloc_mem:
    753	*mem = NULL;
    754	*kptr = NULL;
    755	return err;
    756}
    757
    758/* kfd_process_device_reserve_ib_mem - Reserve memory inside the
    759 *	process for IB usage The memory reserved is for KFD to submit
    760 *	IB to AMDGPU from kernel.  If the memory is reserved
    761 *	successfully, ib_kaddr will have the CPU/kernel
    762 *	address. Check ib_kaddr before accessing the memory.
    763 */
    764static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
    765{
    766	struct qcm_process_device *qpd = &pdd->qpd;
    767	uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT |
    768			KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
    769			KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
    770			KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
    771	struct kgd_mem *mem;
    772	void *kaddr;
    773	int ret;
    774
    775	if (qpd->ib_kaddr || !qpd->ib_base)
    776		return 0;
    777
    778	/* ib_base is only set for dGPU */
    779	ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
    780				      &mem, &kaddr);
    781	if (ret)
    782		return ret;
    783
    784	qpd->ib_mem = mem;
    785	qpd->ib_kaddr = kaddr;
    786
    787	return 0;
    788}
    789
    790static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
    791{
    792	struct qcm_process_device *qpd = &pdd->qpd;
    793
    794	if (!qpd->ib_kaddr || !qpd->ib_base)
    795		return;
    796
    797	kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr);
    798}
    799
    800struct kfd_process *kfd_create_process(struct file *filep)
    801{
    802	struct kfd_process *process;
    803	struct task_struct *thread = current;
    804	int ret;
    805
    806	if (!thread->mm)
    807		return ERR_PTR(-EINVAL);
    808
    809	/* Only the pthreads threading model is supported. */
    810	if (thread->group_leader->mm != thread->mm)
    811		return ERR_PTR(-EINVAL);
    812
    813	/*
    814	 * take kfd processes mutex before starting of process creation
    815	 * so there won't be a case where two threads of the same process
    816	 * create two kfd_process structures
    817	 */
    818	mutex_lock(&kfd_processes_mutex);
    819
    820	/* A prior open of /dev/kfd could have already created the process. */
    821	process = find_process(thread, false);
    822	if (process) {
    823		pr_debug("Process already found\n");
    824	} else {
    825		process = create_process(thread);
    826		if (IS_ERR(process))
    827			goto out;
    828
    829		ret = kfd_process_init_cwsr_apu(process, filep);
    830		if (ret)
    831			goto out_destroy;
    832
    833		if (!procfs.kobj)
    834			goto out;
    835
    836		process->kobj = kfd_alloc_struct(process->kobj);
    837		if (!process->kobj) {
    838			pr_warn("Creating procfs kobject failed");
    839			goto out;
    840		}
    841		ret = kobject_init_and_add(process->kobj, &procfs_type,
    842					   procfs.kobj, "%d",
    843					   (int)process->lead_thread->pid);
    844		if (ret) {
    845			pr_warn("Creating procfs pid directory failed");
    846			kobject_put(process->kobj);
    847			goto out;
    848		}
    849
    850		kfd_sysfs_create_file(process->kobj, &process->attr_pasid,
    851				      "pasid");
    852
    853		process->kobj_queues = kobject_create_and_add("queues",
    854							process->kobj);
    855		if (!process->kobj_queues)
    856			pr_warn("Creating KFD proc/queues folder failed");
    857
    858		kfd_procfs_add_sysfs_stats(process);
    859		kfd_procfs_add_sysfs_files(process);
    860		kfd_procfs_add_sysfs_counters(process);
    861	}
    862out:
    863	if (!IS_ERR(process))
    864		kref_get(&process->ref);
    865	mutex_unlock(&kfd_processes_mutex);
    866
    867	return process;
    868
    869out_destroy:
    870	hash_del_rcu(&process->kfd_processes);
    871	mutex_unlock(&kfd_processes_mutex);
    872	synchronize_srcu(&kfd_processes_srcu);
    873	/* kfd_process_free_notifier will trigger the cleanup */
    874	mmu_notifier_put(&process->mmu_notifier);
    875	return ERR_PTR(ret);
    876}
    877
    878struct kfd_process *kfd_get_process(const struct task_struct *thread)
    879{
    880	struct kfd_process *process;
    881
    882	if (!thread->mm)
    883		return ERR_PTR(-EINVAL);
    884
    885	/* Only the pthreads threading model is supported. */
    886	if (thread->group_leader->mm != thread->mm)
    887		return ERR_PTR(-EINVAL);
    888
    889	process = find_process(thread, false);
    890	if (!process)
    891		return ERR_PTR(-EINVAL);
    892
    893	return process;
    894}
    895
    896static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
    897{
    898	struct kfd_process *process;
    899
    900	hash_for_each_possible_rcu(kfd_processes_table, process,
    901					kfd_processes, (uintptr_t)mm)
    902		if (process->mm == mm)
    903			return process;
    904
    905	return NULL;
    906}
    907
    908static struct kfd_process *find_process(const struct task_struct *thread,
    909					bool ref)
    910{
    911	struct kfd_process *p;
    912	int idx;
    913
    914	idx = srcu_read_lock(&kfd_processes_srcu);
    915	p = find_process_by_mm(thread->mm);
    916	if (p && ref)
    917		kref_get(&p->ref);
    918	srcu_read_unlock(&kfd_processes_srcu, idx);
    919
    920	return p;
    921}
    922
    923void kfd_unref_process(struct kfd_process *p)
    924{
    925	kref_put(&p->ref, kfd_process_ref_release);
    926}
    927
    928/* This increments the process->ref counter. */
    929struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid)
    930{
    931	struct task_struct *task = NULL;
    932	struct kfd_process *p    = NULL;
    933
    934	if (!pid) {
    935		task = current;
    936		get_task_struct(task);
    937	} else {
    938		task = get_pid_task(pid, PIDTYPE_PID);
    939	}
    940
    941	if (task) {
    942		p = find_process(task, true);
    943		put_task_struct(task);
    944	}
    945
    946	return p;
    947}
    948
    949static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
    950{
    951	struct kfd_process *p = pdd->process;
    952	void *mem;
    953	int id;
    954	int i;
    955
    956	/*
    957	 * Remove all handles from idr and release appropriate
    958	 * local memory object
    959	 */
    960	idr_for_each_entry(&pdd->alloc_idr, mem, id) {
    961
    962		for (i = 0; i < p->n_pdds; i++) {
    963			struct kfd_process_device *peer_pdd = p->pdds[i];
    964
    965			if (!peer_pdd->drm_priv)
    966				continue;
    967			amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
    968				peer_pdd->dev->adev, mem, peer_pdd->drm_priv);
    969		}
    970
    971		amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, mem,
    972						       pdd->drm_priv, NULL);
    973		kfd_process_device_remove_obj_handle(pdd, id);
    974	}
    975}
    976
    977/*
    978 * Just kunmap and unpin signal BO here. It will be freed in
    979 * kfd_process_free_outstanding_kfd_bos()
    980 */
    981static void kfd_process_kunmap_signal_bo(struct kfd_process *p)
    982{
    983	struct kfd_process_device *pdd;
    984	struct kfd_dev *kdev;
    985	void *mem;
    986
    987	kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle));
    988	if (!kdev)
    989		return;
    990
    991	mutex_lock(&p->mutex);
    992
    993	pdd = kfd_get_process_device_data(kdev, p);
    994	if (!pdd)
    995		goto out;
    996
    997	mem = kfd_process_device_translate_handle(
    998		pdd, GET_IDR_HANDLE(p->signal_handle));
    999	if (!mem)
   1000		goto out;
   1001
   1002	amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->adev, mem);
   1003
   1004out:
   1005	mutex_unlock(&p->mutex);
   1006}
   1007
   1008static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
   1009{
   1010	int i;
   1011
   1012	for (i = 0; i < p->n_pdds; i++)
   1013		kfd_process_device_free_bos(p->pdds[i]);
   1014}
   1015
   1016static void kfd_process_destroy_pdds(struct kfd_process *p)
   1017{
   1018	int i;
   1019
   1020	for (i = 0; i < p->n_pdds; i++) {
   1021		struct kfd_process_device *pdd = p->pdds[i];
   1022
   1023		pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
   1024				pdd->dev->id, p->pasid);
   1025
   1026		kfd_process_device_destroy_cwsr_dgpu(pdd);
   1027		kfd_process_device_destroy_ib_mem(pdd);
   1028
   1029		if (pdd->drm_file) {
   1030			amdgpu_amdkfd_gpuvm_release_process_vm(
   1031					pdd->dev->adev, pdd->drm_priv);
   1032			fput(pdd->drm_file);
   1033		}
   1034
   1035		if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
   1036			free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
   1037				get_order(KFD_CWSR_TBA_TMA_SIZE));
   1038
   1039		bitmap_free(pdd->qpd.doorbell_bitmap);
   1040		idr_destroy(&pdd->alloc_idr);
   1041
   1042		kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
   1043
   1044		if (pdd->dev->shared_resources.enable_mes)
   1045			amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
   1046						   pdd->proc_ctx_bo);
   1047		/*
   1048		 * before destroying pdd, make sure to report availability
   1049		 * for auto suspend
   1050		 */
   1051		if (pdd->runtime_inuse) {
   1052			pm_runtime_mark_last_busy(pdd->dev->ddev->dev);
   1053			pm_runtime_put_autosuspend(pdd->dev->ddev->dev);
   1054			pdd->runtime_inuse = false;
   1055		}
   1056
   1057		kfree(pdd);
   1058		p->pdds[i] = NULL;
   1059	}
   1060	p->n_pdds = 0;
   1061}
   1062
   1063static void kfd_process_remove_sysfs(struct kfd_process *p)
   1064{
   1065	struct kfd_process_device *pdd;
   1066	int i;
   1067
   1068	if (!p->kobj)
   1069		return;
   1070
   1071	sysfs_remove_file(p->kobj, &p->attr_pasid);
   1072	kobject_del(p->kobj_queues);
   1073	kobject_put(p->kobj_queues);
   1074	p->kobj_queues = NULL;
   1075
   1076	for (i = 0; i < p->n_pdds; i++) {
   1077		pdd = p->pdds[i];
   1078
   1079		sysfs_remove_file(p->kobj, &pdd->attr_vram);
   1080		sysfs_remove_file(p->kobj, &pdd->attr_sdma);
   1081
   1082		sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict);
   1083		if (pdd->dev->kfd2kgd->get_cu_occupancy)
   1084			sysfs_remove_file(pdd->kobj_stats,
   1085					  &pdd->attr_cu_occupancy);
   1086		kobject_del(pdd->kobj_stats);
   1087		kobject_put(pdd->kobj_stats);
   1088		pdd->kobj_stats = NULL;
   1089	}
   1090
   1091	for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) {
   1092		pdd = p->pdds[i];
   1093
   1094		sysfs_remove_file(pdd->kobj_counters, &pdd->attr_faults);
   1095		sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_in);
   1096		sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_out);
   1097		kobject_del(pdd->kobj_counters);
   1098		kobject_put(pdd->kobj_counters);
   1099		pdd->kobj_counters = NULL;
   1100	}
   1101
   1102	kobject_del(p->kobj);
   1103	kobject_put(p->kobj);
   1104	p->kobj = NULL;
   1105}
   1106
   1107/* No process locking is needed in this function, because the process
   1108 * is not findable any more. We must assume that no other thread is
   1109 * using it any more, otherwise we couldn't safely free the process
   1110 * structure in the end.
   1111 */
   1112static void kfd_process_wq_release(struct work_struct *work)
   1113{
   1114	struct kfd_process *p = container_of(work, struct kfd_process,
   1115					     release_work);
   1116
   1117	kfd_process_remove_sysfs(p);
   1118	kfd_iommu_unbind_process(p);
   1119
   1120	kfd_process_kunmap_signal_bo(p);
   1121	kfd_process_free_outstanding_kfd_bos(p);
   1122	svm_range_list_fini(p);
   1123
   1124	kfd_process_destroy_pdds(p);
   1125	dma_fence_put(p->ef);
   1126
   1127	kfd_event_free_process(p);
   1128
   1129	kfd_pasid_free(p->pasid);
   1130	mutex_destroy(&p->mutex);
   1131
   1132	put_task_struct(p->lead_thread);
   1133
   1134	kfree(p);
   1135}
   1136
   1137static void kfd_process_ref_release(struct kref *ref)
   1138{
   1139	struct kfd_process *p = container_of(ref, struct kfd_process, ref);
   1140
   1141	INIT_WORK(&p->release_work, kfd_process_wq_release);
   1142	queue_work(kfd_process_wq, &p->release_work);
   1143}
   1144
   1145static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm)
   1146{
   1147	int idx = srcu_read_lock(&kfd_processes_srcu);
   1148	struct kfd_process *p = find_process_by_mm(mm);
   1149
   1150	srcu_read_unlock(&kfd_processes_srcu, idx);
   1151
   1152	return p ? &p->mmu_notifier : ERR_PTR(-ESRCH);
   1153}
   1154
   1155static void kfd_process_free_notifier(struct mmu_notifier *mn)
   1156{
   1157	kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
   1158}
   1159
   1160static void kfd_process_notifier_release(struct mmu_notifier *mn,
   1161					struct mm_struct *mm)
   1162{
   1163	struct kfd_process *p;
   1164
   1165	/*
   1166	 * The kfd_process structure can not be free because the
   1167	 * mmu_notifier srcu is read locked
   1168	 */
   1169	p = container_of(mn, struct kfd_process, mmu_notifier);
   1170	if (WARN_ON(p->mm != mm))
   1171		return;
   1172
   1173	mutex_lock(&kfd_processes_mutex);
   1174	hash_del_rcu(&p->kfd_processes);
   1175	mutex_unlock(&kfd_processes_mutex);
   1176	synchronize_srcu(&kfd_processes_srcu);
   1177
   1178	cancel_delayed_work_sync(&p->eviction_work);
   1179	cancel_delayed_work_sync(&p->restore_work);
   1180
   1181	mutex_lock(&p->mutex);
   1182
   1183	kfd_process_dequeue_from_all_devices(p);
   1184	pqm_uninit(&p->pqm);
   1185
   1186	/* Indicate to other users that MM is no longer valid */
   1187	p->mm = NULL;
   1188	/* Signal the eviction fence after user mode queues are
   1189	 * destroyed. This allows any BOs to be freed without
   1190	 * triggering pointless evictions or waiting for fences.
   1191	 */
   1192	dma_fence_signal(p->ef);
   1193
   1194	mutex_unlock(&p->mutex);
   1195
   1196	mmu_notifier_put(&p->mmu_notifier);
   1197}
   1198
   1199static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
   1200	.release = kfd_process_notifier_release,
   1201	.alloc_notifier = kfd_process_alloc_notifier,
   1202	.free_notifier = kfd_process_free_notifier,
   1203};
   1204
   1205static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
   1206{
   1207	unsigned long  offset;
   1208	int i;
   1209
   1210	for (i = 0; i < p->n_pdds; i++) {
   1211		struct kfd_dev *dev = p->pdds[i]->dev;
   1212		struct qcm_process_device *qpd = &p->pdds[i]->qpd;
   1213
   1214		if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
   1215			continue;
   1216
   1217		offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
   1218		qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
   1219			KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
   1220			MAP_SHARED, offset);
   1221
   1222		if (IS_ERR_VALUE(qpd->tba_addr)) {
   1223			int err = qpd->tba_addr;
   1224
   1225			pr_err("Failure to set tba address. error %d.\n", err);
   1226			qpd->tba_addr = 0;
   1227			qpd->cwsr_kaddr = NULL;
   1228			return err;
   1229		}
   1230
   1231		memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
   1232
   1233		qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
   1234		pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
   1235			qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
   1236	}
   1237
   1238	return 0;
   1239}
   1240
   1241static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
   1242{
   1243	struct kfd_dev *dev = pdd->dev;
   1244	struct qcm_process_device *qpd = &pdd->qpd;
   1245	uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
   1246			| KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
   1247			| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
   1248	struct kgd_mem *mem;
   1249	void *kaddr;
   1250	int ret;
   1251
   1252	if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
   1253		return 0;
   1254
   1255	/* cwsr_base is only set for dGPU */
   1256	ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
   1257				      KFD_CWSR_TBA_TMA_SIZE, flags, &mem, &kaddr);
   1258	if (ret)
   1259		return ret;
   1260
   1261	qpd->cwsr_mem = mem;
   1262	qpd->cwsr_kaddr = kaddr;
   1263	qpd->tba_addr = qpd->cwsr_base;
   1264
   1265	memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
   1266
   1267	qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
   1268	pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
   1269		 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
   1270
   1271	return 0;
   1272}
   1273
   1274static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd)
   1275{
   1276	struct kfd_dev *dev = pdd->dev;
   1277	struct qcm_process_device *qpd = &pdd->qpd;
   1278
   1279	if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
   1280		return;
   1281
   1282	kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr);
   1283}
   1284
   1285void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
   1286				  uint64_t tba_addr,
   1287				  uint64_t tma_addr)
   1288{
   1289	if (qpd->cwsr_kaddr) {
   1290		/* KFD trap handler is bound, record as second-level TBA/TMA
   1291		 * in first-level TMA. First-level trap will jump to second.
   1292		 */
   1293		uint64_t *tma =
   1294			(uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
   1295		tma[0] = tba_addr;
   1296		tma[1] = tma_addr;
   1297	} else {
   1298		/* No trap handler bound, bind as first-level TBA/TMA. */
   1299		qpd->tba_addr = tba_addr;
   1300		qpd->tma_addr = tma_addr;
   1301	}
   1302}
   1303
   1304bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
   1305{
   1306	int i;
   1307
   1308	/* On most GFXv9 GPUs, the retry mode in the SQ must match the
   1309	 * boot time retry setting. Mixing processes with different
   1310	 * XNACK/retry settings can hang the GPU.
   1311	 *
   1312	 * Different GPUs can have different noretry settings depending
   1313	 * on HW bugs or limitations. We need to find at least one
   1314	 * XNACK mode for this process that's compatible with all GPUs.
   1315	 * Fortunately GPUs with retry enabled (noretry=0) can run code
   1316	 * built for XNACK-off. On GFXv9 it may perform slower.
   1317	 *
   1318	 * Therefore applications built for XNACK-off can always be
   1319	 * supported and will be our fallback if any GPU does not
   1320	 * support retry.
   1321	 */
   1322	for (i = 0; i < p->n_pdds; i++) {
   1323		struct kfd_dev *dev = p->pdds[i]->dev;
   1324
   1325		/* Only consider GFXv9 and higher GPUs. Older GPUs don't
   1326		 * support the SVM APIs and don't need to be considered
   1327		 * for the XNACK mode selection.
   1328		 */
   1329		if (!KFD_IS_SOC15(dev))
   1330			continue;
   1331		/* Aldebaran can always support XNACK because it can support
   1332		 * per-process XNACK mode selection. But let the dev->noretry
   1333		 * setting still influence the default XNACK mode.
   1334		 */
   1335		if (supported && KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2))
   1336			continue;
   1337
   1338		/* GFXv10 and later GPUs do not support shader preemption
   1339		 * during page faults. This can lead to poor QoS for queue
   1340		 * management and memory-manager-related preemptions or
   1341		 * even deadlocks.
   1342		 */
   1343		if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
   1344			return false;
   1345
   1346		if (dev->noretry)
   1347			return false;
   1348	}
   1349
   1350	return true;
   1351}
   1352
   1353/*
   1354 * On return the kfd_process is fully operational and will be freed when the
   1355 * mm is released
   1356 */
   1357static struct kfd_process *create_process(const struct task_struct *thread)
   1358{
   1359	struct kfd_process *process;
   1360	struct mmu_notifier *mn;
   1361	int err = -ENOMEM;
   1362
   1363	process = kzalloc(sizeof(*process), GFP_KERNEL);
   1364	if (!process)
   1365		goto err_alloc_process;
   1366
   1367	kref_init(&process->ref);
   1368	mutex_init(&process->mutex);
   1369	process->mm = thread->mm;
   1370	process->lead_thread = thread->group_leader;
   1371	process->n_pdds = 0;
   1372	process->queues_paused = false;
   1373	INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
   1374	INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
   1375	process->last_restore_timestamp = get_jiffies_64();
   1376	err = kfd_event_init_process(process);
   1377	if (err)
   1378		goto err_event_init;
   1379	process->is_32bit_user_mode = in_compat_syscall();
   1380
   1381	process->pasid = kfd_pasid_alloc();
   1382	if (process->pasid == 0) {
   1383		err = -ENOSPC;
   1384		goto err_alloc_pasid;
   1385	}
   1386
   1387	err = pqm_init(&process->pqm, process);
   1388	if (err != 0)
   1389		goto err_process_pqm_init;
   1390
   1391	/* init process apertures*/
   1392	err = kfd_init_apertures(process);
   1393	if (err != 0)
   1394		goto err_init_apertures;
   1395
   1396	/* Check XNACK support after PDDs are created in kfd_init_apertures */
   1397	process->xnack_enabled = kfd_process_xnack_mode(process, false);
   1398
   1399	err = svm_range_list_init(process);
   1400	if (err)
   1401		goto err_init_svm_range_list;
   1402
   1403	/* alloc_notifier needs to find the process in the hash table */
   1404	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
   1405			(uintptr_t)process->mm);
   1406
   1407	/* MMU notifier registration must be the last call that can fail
   1408	 * because after this point we cannot unwind the process creation.
   1409	 * After this point, mmu_notifier_put will trigger the cleanup by
   1410	 * dropping the last process reference in the free_notifier.
   1411	 */
   1412	mn = mmu_notifier_get(&kfd_process_mmu_notifier_ops, process->mm);
   1413	if (IS_ERR(mn)) {
   1414		err = PTR_ERR(mn);
   1415		goto err_register_notifier;
   1416	}
   1417	BUG_ON(mn != &process->mmu_notifier);
   1418
   1419	get_task_struct(process->lead_thread);
   1420
   1421	return process;
   1422
   1423err_register_notifier:
   1424	hash_del_rcu(&process->kfd_processes);
   1425	svm_range_list_fini(process);
   1426err_init_svm_range_list:
   1427	kfd_process_free_outstanding_kfd_bos(process);
   1428	kfd_process_destroy_pdds(process);
   1429err_init_apertures:
   1430	pqm_uninit(&process->pqm);
   1431err_process_pqm_init:
   1432	kfd_pasid_free(process->pasid);
   1433err_alloc_pasid:
   1434	kfd_event_free_process(process);
   1435err_event_init:
   1436	mutex_destroy(&process->mutex);
   1437	kfree(process);
   1438err_alloc_process:
   1439	return ERR_PTR(err);
   1440}
   1441
   1442static int init_doorbell_bitmap(struct qcm_process_device *qpd,
   1443			struct kfd_dev *dev)
   1444{
   1445	unsigned int i;
   1446	int range_start = dev->shared_resources.non_cp_doorbells_start;
   1447	int range_end = dev->shared_resources.non_cp_doorbells_end;
   1448
   1449	if (!KFD_IS_SOC15(dev))
   1450		return 0;
   1451
   1452	qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
   1453					     GFP_KERNEL);
   1454	if (!qpd->doorbell_bitmap)
   1455		return -ENOMEM;
   1456
   1457	/* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
   1458	pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
   1459	pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
   1460			range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
   1461			range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
   1462
   1463	for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
   1464		if (i >= range_start && i <= range_end) {
   1465			__set_bit(i, qpd->doorbell_bitmap);
   1466			__set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
   1467				  qpd->doorbell_bitmap);
   1468		}
   1469	}
   1470
   1471	return 0;
   1472}
   1473
   1474struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
   1475							struct kfd_process *p)
   1476{
   1477	int i;
   1478
   1479	for (i = 0; i < p->n_pdds; i++)
   1480		if (p->pdds[i]->dev == dev)
   1481			return p->pdds[i];
   1482
   1483	return NULL;
   1484}
   1485
   1486struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
   1487							struct kfd_process *p)
   1488{
   1489	struct kfd_process_device *pdd = NULL;
   1490	int retval = 0;
   1491
   1492	if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
   1493		return NULL;
   1494	pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
   1495	if (!pdd)
   1496		return NULL;
   1497
   1498	if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
   1499		pr_err("Failed to alloc doorbell for pdd\n");
   1500		goto err_free_pdd;
   1501	}
   1502
   1503	if (init_doorbell_bitmap(&pdd->qpd, dev)) {
   1504		pr_err("Failed to init doorbell for process\n");
   1505		goto err_free_pdd;
   1506	}
   1507
   1508	pdd->dev = dev;
   1509	INIT_LIST_HEAD(&pdd->qpd.queues_list);
   1510	INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
   1511	pdd->qpd.dqm = dev->dqm;
   1512	pdd->qpd.pqm = &p->pqm;
   1513	pdd->qpd.evicted = 0;
   1514	pdd->qpd.mapped_gws_queue = false;
   1515	pdd->process = p;
   1516	pdd->bound = PDD_UNBOUND;
   1517	pdd->already_dequeued = false;
   1518	pdd->runtime_inuse = false;
   1519	pdd->vram_usage = 0;
   1520	pdd->sdma_past_activity_counter = 0;
   1521	pdd->user_gpu_id = dev->id;
   1522	atomic64_set(&pdd->evict_duration_counter, 0);
   1523
   1524	if (dev->shared_resources.enable_mes) {
   1525		retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
   1526						AMDGPU_MES_PROC_CTX_SIZE,
   1527						&pdd->proc_ctx_bo,
   1528						&pdd->proc_ctx_gpu_addr,
   1529						&pdd->proc_ctx_cpu_ptr,
   1530						false);
   1531		if (retval) {
   1532			pr_err("failed to allocate process context bo\n");
   1533			goto err_free_pdd;
   1534		}
   1535		memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
   1536	}
   1537
   1538	p->pdds[p->n_pdds++] = pdd;
   1539
   1540	/* Init idr used for memory handle translation */
   1541	idr_init(&pdd->alloc_idr);
   1542
   1543	return pdd;
   1544
   1545err_free_pdd:
   1546	kfree(pdd);
   1547	return NULL;
   1548}
   1549
   1550/**
   1551 * kfd_process_device_init_vm - Initialize a VM for a process-device
   1552 *
   1553 * @pdd: The process-device
   1554 * @drm_file: Optional pointer to a DRM file descriptor
   1555 *
   1556 * If @drm_file is specified, it will be used to acquire the VM from
   1557 * that file descriptor. If successful, the @pdd takes ownership of
   1558 * the file descriptor.
   1559 *
   1560 * If @drm_file is NULL, a new VM is created.
   1561 *
   1562 * Returns 0 on success, -errno on failure.
   1563 */
   1564int kfd_process_device_init_vm(struct kfd_process_device *pdd,
   1565			       struct file *drm_file)
   1566{
   1567	struct kfd_process *p;
   1568	struct kfd_dev *dev;
   1569	int ret;
   1570
   1571	if (!drm_file)
   1572		return -EINVAL;
   1573
   1574	if (pdd->drm_priv)
   1575		return -EBUSY;
   1576
   1577	p = pdd->process;
   1578	dev = pdd->dev;
   1579
   1580	ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
   1581		dev->adev, drm_file, p->pasid,
   1582		&p->kgd_process_info, &p->ef);
   1583	if (ret) {
   1584		pr_err("Failed to create process VM object\n");
   1585		return ret;
   1586	}
   1587	pdd->drm_priv = drm_file->private_data;
   1588	atomic64_set(&pdd->tlb_seq, 0);
   1589
   1590	ret = kfd_process_device_reserve_ib_mem(pdd);
   1591	if (ret)
   1592		goto err_reserve_ib_mem;
   1593	ret = kfd_process_device_init_cwsr_dgpu(pdd);
   1594	if (ret)
   1595		goto err_init_cwsr;
   1596
   1597	pdd->drm_file = drm_file;
   1598
   1599	return 0;
   1600
   1601err_init_cwsr:
   1602err_reserve_ib_mem:
   1603	kfd_process_device_free_bos(pdd);
   1604	pdd->drm_priv = NULL;
   1605
   1606	return ret;
   1607}
   1608
   1609/*
   1610 * Direct the IOMMU to bind the process (specifically the pasid->mm)
   1611 * to the device.
   1612 * Unbinding occurs when the process dies or the device is removed.
   1613 *
   1614 * Assumes that the process lock is held.
   1615 */
   1616struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
   1617							struct kfd_process *p)
   1618{
   1619	struct kfd_process_device *pdd;
   1620	int err;
   1621
   1622	pdd = kfd_get_process_device_data(dev, p);
   1623	if (!pdd) {
   1624		pr_err("Process device data doesn't exist\n");
   1625		return ERR_PTR(-ENOMEM);
   1626	}
   1627
   1628	if (!pdd->drm_priv)
   1629		return ERR_PTR(-ENODEV);
   1630
   1631	/*
   1632	 * signal runtime-pm system to auto resume and prevent
   1633	 * further runtime suspend once device pdd is created until
   1634	 * pdd is destroyed.
   1635	 */
   1636	if (!pdd->runtime_inuse) {
   1637		err = pm_runtime_get_sync(dev->ddev->dev);
   1638		if (err < 0) {
   1639			pm_runtime_put_autosuspend(dev->ddev->dev);
   1640			return ERR_PTR(err);
   1641		}
   1642	}
   1643
   1644	err = kfd_iommu_bind_process_to_device(pdd);
   1645	if (err)
   1646		goto out;
   1647
   1648	/*
   1649	 * make sure that runtime_usage counter is incremented just once
   1650	 * per pdd
   1651	 */
   1652	pdd->runtime_inuse = true;
   1653
   1654	return pdd;
   1655
   1656out:
   1657	/* balance runpm reference count and exit with error */
   1658	if (!pdd->runtime_inuse) {
   1659		pm_runtime_mark_last_busy(dev->ddev->dev);
   1660		pm_runtime_put_autosuspend(dev->ddev->dev);
   1661	}
   1662
   1663	return ERR_PTR(err);
   1664}
   1665
   1666/* Create specific handle mapped to mem from process local memory idr
   1667 * Assumes that the process lock is held.
   1668 */
   1669int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
   1670					void *mem)
   1671{
   1672	return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
   1673}
   1674
   1675/* Translate specific handle from process local memory idr
   1676 * Assumes that the process lock is held.
   1677 */
   1678void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
   1679					int handle)
   1680{
   1681	if (handle < 0)
   1682		return NULL;
   1683
   1684	return idr_find(&pdd->alloc_idr, handle);
   1685}
   1686
   1687/* Remove specific handle from process local memory idr
   1688 * Assumes that the process lock is held.
   1689 */
   1690void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
   1691					int handle)
   1692{
   1693	if (handle >= 0)
   1694		idr_remove(&pdd->alloc_idr, handle);
   1695}
   1696
   1697/* This increments the process->ref counter. */
   1698struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid)
   1699{
   1700	struct kfd_process *p, *ret_p = NULL;
   1701	unsigned int temp;
   1702
   1703	int idx = srcu_read_lock(&kfd_processes_srcu);
   1704
   1705	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
   1706		if (p->pasid == pasid) {
   1707			kref_get(&p->ref);
   1708			ret_p = p;
   1709			break;
   1710		}
   1711	}
   1712
   1713	srcu_read_unlock(&kfd_processes_srcu, idx);
   1714
   1715	return ret_p;
   1716}
   1717
   1718/* This increments the process->ref counter. */
   1719struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
   1720{
   1721	struct kfd_process *p;
   1722
   1723	int idx = srcu_read_lock(&kfd_processes_srcu);
   1724
   1725	p = find_process_by_mm(mm);
   1726	if (p)
   1727		kref_get(&p->ref);
   1728
   1729	srcu_read_unlock(&kfd_processes_srcu, idx);
   1730
   1731	return p;
   1732}
   1733
   1734/* kfd_process_evict_queues - Evict all user queues of a process
   1735 *
   1736 * Eviction is reference-counted per process-device. This means multiple
   1737 * evictions from different sources can be nested safely.
   1738 */
   1739int kfd_process_evict_queues(struct kfd_process *p)
   1740{
   1741	int r = 0;
   1742	int i;
   1743	unsigned int n_evicted = 0;
   1744
   1745	for (i = 0; i < p->n_pdds; i++) {
   1746		struct kfd_process_device *pdd = p->pdds[i];
   1747
   1748		r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
   1749							    &pdd->qpd);
   1750		/* evict return -EIO if HWS is hang or asic is resetting, in this case
   1751		 * we would like to set all the queues to be in evicted state to prevent
   1752		 * them been add back since they actually not be saved right now.
   1753		 */
   1754		if (r && r != -EIO) {
   1755			pr_err("Failed to evict process queues\n");
   1756			goto fail;
   1757		}
   1758		n_evicted++;
   1759	}
   1760
   1761	return r;
   1762
   1763fail:
   1764	/* To keep state consistent, roll back partial eviction by
   1765	 * restoring queues
   1766	 */
   1767	for (i = 0; i < p->n_pdds; i++) {
   1768		struct kfd_process_device *pdd = p->pdds[i];
   1769
   1770		if (n_evicted == 0)
   1771			break;
   1772		if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
   1773							      &pdd->qpd))
   1774			pr_err("Failed to restore queues\n");
   1775
   1776		n_evicted--;
   1777	}
   1778
   1779	return r;
   1780}
   1781
   1782/* kfd_process_restore_queues - Restore all user queues of a process */
   1783int kfd_process_restore_queues(struct kfd_process *p)
   1784{
   1785	int r, ret = 0;
   1786	int i;
   1787
   1788	for (i = 0; i < p->n_pdds; i++) {
   1789		struct kfd_process_device *pdd = p->pdds[i];
   1790
   1791		r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
   1792							      &pdd->qpd);
   1793		if (r) {
   1794			pr_err("Failed to restore process queues\n");
   1795			if (!ret)
   1796				ret = r;
   1797		}
   1798	}
   1799
   1800	return ret;
   1801}
   1802
   1803int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
   1804{
   1805	int i;
   1806
   1807	for (i = 0; i < p->n_pdds; i++)
   1808		if (p->pdds[i] && gpu_id == p->pdds[i]->user_gpu_id)
   1809			return i;
   1810	return -EINVAL;
   1811}
   1812
   1813int
   1814kfd_process_gpuid_from_adev(struct kfd_process *p, struct amdgpu_device *adev,
   1815			   uint32_t *gpuid, uint32_t *gpuidx)
   1816{
   1817	int i;
   1818
   1819	for (i = 0; i < p->n_pdds; i++)
   1820		if (p->pdds[i] && p->pdds[i]->dev->adev == adev) {
   1821			*gpuid = p->pdds[i]->user_gpu_id;
   1822			*gpuidx = i;
   1823			return 0;
   1824		}
   1825	return -EINVAL;
   1826}
   1827
   1828static void evict_process_worker(struct work_struct *work)
   1829{
   1830	int ret;
   1831	struct kfd_process *p;
   1832	struct delayed_work *dwork;
   1833
   1834	dwork = to_delayed_work(work);
   1835
   1836	/* Process termination destroys this worker thread. So during the
   1837	 * lifetime of this thread, kfd_process p will be valid
   1838	 */
   1839	p = container_of(dwork, struct kfd_process, eviction_work);
   1840	WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
   1841		  "Eviction fence mismatch\n");
   1842
   1843	/* Narrow window of overlap between restore and evict work
   1844	 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
   1845	 * unreserves KFD BOs, it is possible to evicted again. But
   1846	 * restore has few more steps of finish. So lets wait for any
   1847	 * previous restore work to complete
   1848	 */
   1849	flush_delayed_work(&p->restore_work);
   1850
   1851	pr_debug("Started evicting pasid 0x%x\n", p->pasid);
   1852	ret = kfd_process_evict_queues(p);
   1853	if (!ret) {
   1854		dma_fence_signal(p->ef);
   1855		dma_fence_put(p->ef);
   1856		p->ef = NULL;
   1857		queue_delayed_work(kfd_restore_wq, &p->restore_work,
   1858				msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
   1859
   1860		pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
   1861	} else
   1862		pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
   1863}
   1864
   1865static void restore_process_worker(struct work_struct *work)
   1866{
   1867	struct delayed_work *dwork;
   1868	struct kfd_process *p;
   1869	int ret = 0;
   1870
   1871	dwork = to_delayed_work(work);
   1872
   1873	/* Process termination destroys this worker thread. So during the
   1874	 * lifetime of this thread, kfd_process p will be valid
   1875	 */
   1876	p = container_of(dwork, struct kfd_process, restore_work);
   1877	pr_debug("Started restoring pasid 0x%x\n", p->pasid);
   1878
   1879	/* Setting last_restore_timestamp before successful restoration.
   1880	 * Otherwise this would have to be set by KGD (restore_process_bos)
   1881	 * before KFD BOs are unreserved. If not, the process can be evicted
   1882	 * again before the timestamp is set.
   1883	 * If restore fails, the timestamp will be set again in the next
   1884	 * attempt. This would mean that the minimum GPU quanta would be
   1885	 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
   1886	 * functions)
   1887	 */
   1888
   1889	p->last_restore_timestamp = get_jiffies_64();
   1890	ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
   1891						     &p->ef);
   1892	if (ret) {
   1893		pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
   1894			 p->pasid, PROCESS_BACK_OFF_TIME_MS);
   1895		ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
   1896				msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
   1897		WARN(!ret, "reschedule restore work failed\n");
   1898		return;
   1899	}
   1900
   1901	ret = kfd_process_restore_queues(p);
   1902	if (!ret)
   1903		pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
   1904	else
   1905		pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
   1906}
   1907
   1908void kfd_suspend_all_processes(void)
   1909{
   1910	struct kfd_process *p;
   1911	unsigned int temp;
   1912	int idx = srcu_read_lock(&kfd_processes_srcu);
   1913
   1914	WARN(debug_evictions, "Evicting all processes");
   1915	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
   1916		cancel_delayed_work_sync(&p->eviction_work);
   1917		cancel_delayed_work_sync(&p->restore_work);
   1918
   1919		if (kfd_process_evict_queues(p))
   1920			pr_err("Failed to suspend process 0x%x\n", p->pasid);
   1921		dma_fence_signal(p->ef);
   1922		dma_fence_put(p->ef);
   1923		p->ef = NULL;
   1924	}
   1925	srcu_read_unlock(&kfd_processes_srcu, idx);
   1926}
   1927
   1928int kfd_resume_all_processes(void)
   1929{
   1930	struct kfd_process *p;
   1931	unsigned int temp;
   1932	int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
   1933
   1934	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
   1935		if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
   1936			pr_err("Restore process %d failed during resume\n",
   1937			       p->pasid);
   1938			ret = -EFAULT;
   1939		}
   1940	}
   1941	srcu_read_unlock(&kfd_processes_srcu, idx);
   1942	return ret;
   1943}
   1944
   1945int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
   1946			  struct vm_area_struct *vma)
   1947{
   1948	struct kfd_process_device *pdd;
   1949	struct qcm_process_device *qpd;
   1950
   1951	if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
   1952		pr_err("Incorrect CWSR mapping size.\n");
   1953		return -EINVAL;
   1954	}
   1955
   1956	pdd = kfd_get_process_device_data(dev, process);
   1957	if (!pdd)
   1958		return -EINVAL;
   1959	qpd = &pdd->qpd;
   1960
   1961	qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
   1962					get_order(KFD_CWSR_TBA_TMA_SIZE));
   1963	if (!qpd->cwsr_kaddr) {
   1964		pr_err("Error allocating per process CWSR buffer.\n");
   1965		return -ENOMEM;
   1966	}
   1967
   1968	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
   1969		| VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
   1970	/* Mapping pages to user process */
   1971	return remap_pfn_range(vma, vma->vm_start,
   1972			       PFN_DOWN(__pa(qpd->cwsr_kaddr)),
   1973			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
   1974}
   1975
   1976void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
   1977{
   1978	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
   1979	uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
   1980	struct kfd_dev *dev = pdd->dev;
   1981
   1982	/*
   1983	 * It can be that we race and lose here, but that is extremely unlikely
   1984	 * and the worst thing which could happen is that we flush the changes
   1985	 * into the TLB once more which is harmless.
   1986	 */
   1987	if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq)
   1988		return;
   1989
   1990	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
   1991		/* Nothing to flush until a VMID is assigned, which
   1992		 * only happens when the first queue is created.
   1993		 */
   1994		if (pdd->qpd.vmid)
   1995			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
   1996							pdd->qpd.vmid);
   1997	} else {
   1998		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev,
   1999					pdd->process->pasid, type);
   2000	}
   2001}
   2002
   2003struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *p, uint32_t gpu_id)
   2004{
   2005	int i;
   2006
   2007	if (gpu_id) {
   2008		for (i = 0; i < p->n_pdds; i++) {
   2009			struct kfd_process_device *pdd = p->pdds[i];
   2010
   2011			if (pdd->user_gpu_id == gpu_id)
   2012				return pdd;
   2013		}
   2014	}
   2015	return NULL;
   2016}
   2017
   2018int kfd_process_get_user_gpu_id(struct kfd_process *p, uint32_t actual_gpu_id)
   2019{
   2020	int i;
   2021
   2022	if (!actual_gpu_id)
   2023		return 0;
   2024
   2025	for (i = 0; i < p->n_pdds; i++) {
   2026		struct kfd_process_device *pdd = p->pdds[i];
   2027
   2028		if (pdd->dev->id == actual_gpu_id)
   2029			return pdd->user_gpu_id;
   2030	}
   2031	return -EINVAL;
   2032}
   2033
   2034#if defined(CONFIG_DEBUG_FS)
   2035
   2036int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
   2037{
   2038	struct kfd_process *p;
   2039	unsigned int temp;
   2040	int r = 0;
   2041
   2042	int idx = srcu_read_lock(&kfd_processes_srcu);
   2043
   2044	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
   2045		seq_printf(m, "Process %d PASID 0x%x:\n",
   2046			   p->lead_thread->tgid, p->pasid);
   2047
   2048		mutex_lock(&p->mutex);
   2049		r = pqm_debugfs_mqds(m, &p->pqm);
   2050		mutex_unlock(&p->mutex);
   2051
   2052		if (r)
   2053			break;
   2054	}
   2055
   2056	srcu_read_unlock(&kfd_processes_srcu, idx);
   2057
   2058	return r;
   2059}
   2060
   2061#endif
   2062