cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

kfd_chardev.c (72931B)


      1// SPDX-License-Identifier: GPL-2.0 OR MIT
      2/*
      3 * Copyright 2014-2022 Advanced Micro Devices, Inc.
      4 *
      5 * Permission is hereby granted, free of charge, to any person obtaining a
      6 * copy of this software and associated documentation files (the "Software"),
      7 * to deal in the Software without restriction, including without limitation
      8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9 * and/or sell copies of the Software, and to permit persons to whom the
     10 * Software is furnished to do so, subject to the following conditions:
     11 *
     12 * The above copyright notice and this permission notice shall be included in
     13 * all copies or substantial portions of the Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     21 * OTHER DEALINGS IN THE SOFTWARE.
     22 */
     23
     24#include <linux/device.h>
     25#include <linux/export.h>
     26#include <linux/err.h>
     27#include <linux/fs.h>
     28#include <linux/file.h>
     29#include <linux/sched.h>
     30#include <linux/slab.h>
     31#include <linux/uaccess.h>
     32#include <linux/compat.h>
     33#include <uapi/linux/kfd_ioctl.h>
     34#include <linux/time.h>
     35#include <linux/mm.h>
     36#include <linux/mman.h>
     37#include <linux/ptrace.h>
     38#include <linux/dma-buf.h>
     39#include <linux/fdtable.h>
     40#include <linux/processor.h>
     41#include "kfd_priv.h"
     42#include "kfd_device_queue_manager.h"
     43#include "kfd_svm.h"
     44#include "amdgpu_amdkfd.h"
     45#include "kfd_smi_events.h"
     46#include "amdgpu_dma_buf.h"
     47
     48static long kfd_ioctl(struct file *, unsigned int, unsigned long);
     49static int kfd_open(struct inode *, struct file *);
     50static int kfd_release(struct inode *, struct file *);
     51static int kfd_mmap(struct file *, struct vm_area_struct *);
     52
     53static const char kfd_dev_name[] = "kfd";
     54
     55static const struct file_operations kfd_fops = {
     56	.owner = THIS_MODULE,
     57	.unlocked_ioctl = kfd_ioctl,
     58	.compat_ioctl = compat_ptr_ioctl,
     59	.open = kfd_open,
     60	.release = kfd_release,
     61	.mmap = kfd_mmap,
     62};
     63
     64static int kfd_char_dev_major = -1;
     65static struct class *kfd_class;
     66struct device *kfd_device;
     67
     68int kfd_chardev_init(void)
     69{
     70	int err = 0;
     71
     72	kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
     73	err = kfd_char_dev_major;
     74	if (err < 0)
     75		goto err_register_chrdev;
     76
     77	kfd_class = class_create(THIS_MODULE, kfd_dev_name);
     78	err = PTR_ERR(kfd_class);
     79	if (IS_ERR(kfd_class))
     80		goto err_class_create;
     81
     82	kfd_device = device_create(kfd_class, NULL,
     83					MKDEV(kfd_char_dev_major, 0),
     84					NULL, kfd_dev_name);
     85	err = PTR_ERR(kfd_device);
     86	if (IS_ERR(kfd_device))
     87		goto err_device_create;
     88
     89	return 0;
     90
     91err_device_create:
     92	class_destroy(kfd_class);
     93err_class_create:
     94	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
     95err_register_chrdev:
     96	return err;
     97}
     98
     99void kfd_chardev_exit(void)
    100{
    101	device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
    102	class_destroy(kfd_class);
    103	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
    104	kfd_device = NULL;
    105}
    106
    107
    108static int kfd_open(struct inode *inode, struct file *filep)
    109{
    110	struct kfd_process *process;
    111	bool is_32bit_user_mode;
    112
    113	if (iminor(inode) != 0)
    114		return -ENODEV;
    115
    116	is_32bit_user_mode = in_compat_syscall();
    117
    118	if (is_32bit_user_mode) {
    119		dev_warn(kfd_device,
    120			"Process %d (32-bit) failed to open /dev/kfd\n"
    121			"32-bit processes are not supported by amdkfd\n",
    122			current->pid);
    123		return -EPERM;
    124	}
    125
    126	process = kfd_create_process(filep);
    127	if (IS_ERR(process))
    128		return PTR_ERR(process);
    129
    130	if (kfd_is_locked()) {
    131		dev_dbg(kfd_device, "kfd is locked!\n"
    132				"process %d unreferenced", process->pasid);
    133		kfd_unref_process(process);
    134		return -EAGAIN;
    135	}
    136
    137	/* filep now owns the reference returned by kfd_create_process */
    138	filep->private_data = process;
    139
    140	dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
    141		process->pasid, process->is_32bit_user_mode);
    142
    143	return 0;
    144}
    145
    146static int kfd_release(struct inode *inode, struct file *filep)
    147{
    148	struct kfd_process *process = filep->private_data;
    149
    150	if (process)
    151		kfd_unref_process(process);
    152
    153	return 0;
    154}
    155
    156static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
    157					void *data)
    158{
    159	struct kfd_ioctl_get_version_args *args = data;
    160
    161	args->major_version = KFD_IOCTL_MAJOR_VERSION;
    162	args->minor_version = KFD_IOCTL_MINOR_VERSION;
    163
    164	return 0;
    165}
    166
    167static int set_queue_properties_from_user(struct queue_properties *q_properties,
    168				struct kfd_ioctl_create_queue_args *args)
    169{
    170	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
    171		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
    172		return -EINVAL;
    173	}
    174
    175	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
    176		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
    177		return -EINVAL;
    178	}
    179
    180	if ((args->ring_base_address) &&
    181		(!access_ok((const void __user *) args->ring_base_address,
    182			sizeof(uint64_t)))) {
    183		pr_err("Can't access ring base address\n");
    184		return -EFAULT;
    185	}
    186
    187	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
    188		pr_err("Ring size must be a power of 2 or 0\n");
    189		return -EINVAL;
    190	}
    191
    192	if (!access_ok((const void __user *) args->read_pointer_address,
    193			sizeof(uint32_t))) {
    194		pr_err("Can't access read pointer\n");
    195		return -EFAULT;
    196	}
    197
    198	if (!access_ok((const void __user *) args->write_pointer_address,
    199			sizeof(uint32_t))) {
    200		pr_err("Can't access write pointer\n");
    201		return -EFAULT;
    202	}
    203
    204	if (args->eop_buffer_address &&
    205		!access_ok((const void __user *) args->eop_buffer_address,
    206			sizeof(uint32_t))) {
    207		pr_debug("Can't access eop buffer");
    208		return -EFAULT;
    209	}
    210
    211	if (args->ctx_save_restore_address &&
    212		!access_ok((const void __user *) args->ctx_save_restore_address,
    213			sizeof(uint32_t))) {
    214		pr_debug("Can't access ctx save restore buffer");
    215		return -EFAULT;
    216	}
    217
    218	q_properties->is_interop = false;
    219	q_properties->is_gws = false;
    220	q_properties->queue_percent = args->queue_percentage;
    221	q_properties->priority = args->queue_priority;
    222	q_properties->queue_address = args->ring_base_address;
    223	q_properties->queue_size = args->ring_size;
    224	q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
    225	q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
    226	q_properties->eop_ring_buffer_address = args->eop_buffer_address;
    227	q_properties->eop_ring_buffer_size = args->eop_buffer_size;
    228	q_properties->ctx_save_restore_area_address =
    229			args->ctx_save_restore_address;
    230	q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
    231	q_properties->ctl_stack_size = args->ctl_stack_size;
    232	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
    233		args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
    234		q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
    235	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
    236		q_properties->type = KFD_QUEUE_TYPE_SDMA;
    237	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
    238		q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
    239	else
    240		return -ENOTSUPP;
    241
    242	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
    243		q_properties->format = KFD_QUEUE_FORMAT_AQL;
    244	else
    245		q_properties->format = KFD_QUEUE_FORMAT_PM4;
    246
    247	pr_debug("Queue Percentage: %d, %d\n",
    248			q_properties->queue_percent, args->queue_percentage);
    249
    250	pr_debug("Queue Priority: %d, %d\n",
    251			q_properties->priority, args->queue_priority);
    252
    253	pr_debug("Queue Address: 0x%llX, 0x%llX\n",
    254			q_properties->queue_address, args->ring_base_address);
    255
    256	pr_debug("Queue Size: 0x%llX, %u\n",
    257			q_properties->queue_size, args->ring_size);
    258
    259	pr_debug("Queue r/w Pointers: %px, %px\n",
    260			q_properties->read_ptr,
    261			q_properties->write_ptr);
    262
    263	pr_debug("Queue Format: %d\n", q_properties->format);
    264
    265	pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
    266
    267	pr_debug("Queue CTX save area: 0x%llX\n",
    268			q_properties->ctx_save_restore_area_address);
    269
    270	return 0;
    271}
    272
    273static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
    274					void *data)
    275{
    276	struct kfd_ioctl_create_queue_args *args = data;
    277	struct kfd_dev *dev;
    278	int err = 0;
    279	unsigned int queue_id;
    280	struct kfd_process_device *pdd;
    281	struct queue_properties q_properties;
    282	uint32_t doorbell_offset_in_process = 0;
    283
    284	memset(&q_properties, 0, sizeof(struct queue_properties));
    285
    286	pr_debug("Creating queue ioctl\n");
    287
    288	err = set_queue_properties_from_user(&q_properties, args);
    289	if (err)
    290		return err;
    291
    292	pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
    293
    294	mutex_lock(&p->mutex);
    295
    296	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
    297	if (!pdd) {
    298		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
    299		err = -EINVAL;
    300		goto err_pdd;
    301	}
    302	dev = pdd->dev;
    303
    304	pdd = kfd_bind_process_to_device(dev, p);
    305	if (IS_ERR(pdd)) {
    306		err = -ESRCH;
    307		goto err_bind_process;
    308	}
    309
    310	pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
    311			p->pasid,
    312			dev->id);
    313
    314	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, NULL, NULL,
    315			&doorbell_offset_in_process);
    316	if (err != 0)
    317		goto err_create_queue;
    318
    319	args->queue_id = queue_id;
    320
    321
    322	/* Return gpu_id as doorbell offset for mmap usage */
    323	args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
    324	args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
    325	if (KFD_IS_SOC15(dev))
    326		/* On SOC15 ASICs, include the doorbell offset within the
    327		 * process doorbell frame, which is 2 pages.
    328		 */
    329		args->doorbell_offset |= doorbell_offset_in_process;
    330
    331	mutex_unlock(&p->mutex);
    332
    333	pr_debug("Queue id %d was created successfully\n", args->queue_id);
    334
    335	pr_debug("Ring buffer address == 0x%016llX\n",
    336			args->ring_base_address);
    337
    338	pr_debug("Read ptr address    == 0x%016llX\n",
    339			args->read_pointer_address);
    340
    341	pr_debug("Write ptr address   == 0x%016llX\n",
    342			args->write_pointer_address);
    343
    344	return 0;
    345
    346err_create_queue:
    347err_bind_process:
    348err_pdd:
    349	mutex_unlock(&p->mutex);
    350	return err;
    351}
    352
    353static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
    354					void *data)
    355{
    356	int retval;
    357	struct kfd_ioctl_destroy_queue_args *args = data;
    358
    359	pr_debug("Destroying queue id %d for pasid 0x%x\n",
    360				args->queue_id,
    361				p->pasid);
    362
    363	mutex_lock(&p->mutex);
    364
    365	retval = pqm_destroy_queue(&p->pqm, args->queue_id);
    366
    367	mutex_unlock(&p->mutex);
    368	return retval;
    369}
    370
    371static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
    372					void *data)
    373{
    374	int retval;
    375	struct kfd_ioctl_update_queue_args *args = data;
    376	struct queue_properties properties;
    377
    378	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
    379		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
    380		return -EINVAL;
    381	}
    382
    383	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
    384		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
    385		return -EINVAL;
    386	}
    387
    388	if ((args->ring_base_address) &&
    389		(!access_ok((const void __user *) args->ring_base_address,
    390			sizeof(uint64_t)))) {
    391		pr_err("Can't access ring base address\n");
    392		return -EFAULT;
    393	}
    394
    395	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
    396		pr_err("Ring size must be a power of 2 or 0\n");
    397		return -EINVAL;
    398	}
    399
    400	properties.queue_address = args->ring_base_address;
    401	properties.queue_size = args->ring_size;
    402	properties.queue_percent = args->queue_percentage;
    403	properties.priority = args->queue_priority;
    404
    405	pr_debug("Updating queue id %d for pasid 0x%x\n",
    406			args->queue_id, p->pasid);
    407
    408	mutex_lock(&p->mutex);
    409
    410	retval = pqm_update_queue_properties(&p->pqm, args->queue_id, &properties);
    411
    412	mutex_unlock(&p->mutex);
    413
    414	return retval;
    415}
    416
    417static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
    418					void *data)
    419{
    420	int retval;
    421	const int max_num_cus = 1024;
    422	struct kfd_ioctl_set_cu_mask_args *args = data;
    423	struct mqd_update_info minfo = {0};
    424	uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
    425	size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
    426
    427	if ((args->num_cu_mask % 32) != 0) {
    428		pr_debug("num_cu_mask 0x%x must be a multiple of 32",
    429				args->num_cu_mask);
    430		return -EINVAL;
    431	}
    432
    433	minfo.cu_mask.count = args->num_cu_mask;
    434	if (minfo.cu_mask.count == 0) {
    435		pr_debug("CU mask cannot be 0");
    436		return -EINVAL;
    437	}
    438
    439	/* To prevent an unreasonably large CU mask size, set an arbitrary
    440	 * limit of max_num_cus bits.  We can then just drop any CU mask bits
    441	 * past max_num_cus bits and just use the first max_num_cus bits.
    442	 */
    443	if (minfo.cu_mask.count > max_num_cus) {
    444		pr_debug("CU mask cannot be greater than 1024 bits");
    445		minfo.cu_mask.count = max_num_cus;
    446		cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
    447	}
    448
    449	minfo.cu_mask.ptr = kzalloc(cu_mask_size, GFP_KERNEL);
    450	if (!minfo.cu_mask.ptr)
    451		return -ENOMEM;
    452
    453	retval = copy_from_user(minfo.cu_mask.ptr, cu_mask_ptr, cu_mask_size);
    454	if (retval) {
    455		pr_debug("Could not copy CU mask from userspace");
    456		retval = -EFAULT;
    457		goto out;
    458	}
    459
    460	minfo.update_flag = UPDATE_FLAG_CU_MASK;
    461
    462	mutex_lock(&p->mutex);
    463
    464	retval = pqm_update_mqd(&p->pqm, args->queue_id, &minfo);
    465
    466	mutex_unlock(&p->mutex);
    467
    468out:
    469	kfree(minfo.cu_mask.ptr);
    470	return retval;
    471}
    472
    473static int kfd_ioctl_get_queue_wave_state(struct file *filep,
    474					  struct kfd_process *p, void *data)
    475{
    476	struct kfd_ioctl_get_queue_wave_state_args *args = data;
    477	int r;
    478
    479	mutex_lock(&p->mutex);
    480
    481	r = pqm_get_wave_state(&p->pqm, args->queue_id,
    482			       (void __user *)args->ctl_stack_address,
    483			       &args->ctl_stack_used_size,
    484			       &args->save_area_used_size);
    485
    486	mutex_unlock(&p->mutex);
    487
    488	return r;
    489}
    490
    491static int kfd_ioctl_set_memory_policy(struct file *filep,
    492					struct kfd_process *p, void *data)
    493{
    494	struct kfd_ioctl_set_memory_policy_args *args = data;
    495	int err = 0;
    496	struct kfd_process_device *pdd;
    497	enum cache_policy default_policy, alternate_policy;
    498
    499	if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
    500	    && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
    501		return -EINVAL;
    502	}
    503
    504	if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
    505	    && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
    506		return -EINVAL;
    507	}
    508
    509	mutex_lock(&p->mutex);
    510	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
    511	if (!pdd) {
    512		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
    513		err = -EINVAL;
    514		goto err_pdd;
    515	}
    516
    517	pdd = kfd_bind_process_to_device(pdd->dev, p);
    518	if (IS_ERR(pdd)) {
    519		err = -ESRCH;
    520		goto out;
    521	}
    522
    523	default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
    524			 ? cache_policy_coherent : cache_policy_noncoherent;
    525
    526	alternate_policy =
    527		(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
    528		   ? cache_policy_coherent : cache_policy_noncoherent;
    529
    530	if (!pdd->dev->dqm->ops.set_cache_memory_policy(pdd->dev->dqm,
    531				&pdd->qpd,
    532				default_policy,
    533				alternate_policy,
    534				(void __user *)args->alternate_aperture_base,
    535				args->alternate_aperture_size))
    536		err = -EINVAL;
    537
    538out:
    539err_pdd:
    540	mutex_unlock(&p->mutex);
    541
    542	return err;
    543}
    544
    545static int kfd_ioctl_set_trap_handler(struct file *filep,
    546					struct kfd_process *p, void *data)
    547{
    548	struct kfd_ioctl_set_trap_handler_args *args = data;
    549	int err = 0;
    550	struct kfd_process_device *pdd;
    551
    552	mutex_lock(&p->mutex);
    553
    554	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
    555	if (!pdd) {
    556		err = -EINVAL;
    557		goto err_pdd;
    558	}
    559
    560	pdd = kfd_bind_process_to_device(pdd->dev, p);
    561	if (IS_ERR(pdd)) {
    562		err = -ESRCH;
    563		goto out;
    564	}
    565
    566	kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr);
    567
    568out:
    569err_pdd:
    570	mutex_unlock(&p->mutex);
    571
    572	return err;
    573}
    574
    575static int kfd_ioctl_dbg_register(struct file *filep,
    576				struct kfd_process *p, void *data)
    577{
    578	return -EPERM;
    579}
    580
    581static int kfd_ioctl_dbg_unregister(struct file *filep,
    582				struct kfd_process *p, void *data)
    583{
    584	return -EPERM;
    585}
    586
    587static int kfd_ioctl_dbg_address_watch(struct file *filep,
    588					struct kfd_process *p, void *data)
    589{
    590	return -EPERM;
    591}
    592
    593/* Parse and generate fixed size data structure for wave control */
    594static int kfd_ioctl_dbg_wave_control(struct file *filep,
    595					struct kfd_process *p, void *data)
    596{
    597	return -EPERM;
    598}
    599
    600static int kfd_ioctl_get_clock_counters(struct file *filep,
    601				struct kfd_process *p, void *data)
    602{
    603	struct kfd_ioctl_get_clock_counters_args *args = data;
    604	struct kfd_process_device *pdd;
    605
    606	mutex_lock(&p->mutex);
    607	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
    608	mutex_unlock(&p->mutex);
    609	if (pdd)
    610		/* Reading GPU clock counter from KGD */
    611		args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(pdd->dev->adev);
    612	else
    613		/* Node without GPU resource */
    614		args->gpu_clock_counter = 0;
    615
    616	/* No access to rdtsc. Using raw monotonic time */
    617	args->cpu_clock_counter = ktime_get_raw_ns();
    618	args->system_clock_counter = ktime_get_boottime_ns();
    619
    620	/* Since the counter is in nano-seconds we use 1GHz frequency */
    621	args->system_clock_freq = 1000000000;
    622
    623	return 0;
    624}
    625
    626
    627static int kfd_ioctl_get_process_apertures(struct file *filp,
    628				struct kfd_process *p, void *data)
    629{
    630	struct kfd_ioctl_get_process_apertures_args *args = data;
    631	struct kfd_process_device_apertures *pAperture;
    632	int i;
    633
    634	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
    635
    636	args->num_of_nodes = 0;
    637
    638	mutex_lock(&p->mutex);
    639	/* Run over all pdd of the process */
    640	for (i = 0; i < p->n_pdds; i++) {
    641		struct kfd_process_device *pdd = p->pdds[i];
    642
    643		pAperture =
    644			&args->process_apertures[args->num_of_nodes];
    645		pAperture->gpu_id = pdd->dev->id;
    646		pAperture->lds_base = pdd->lds_base;
    647		pAperture->lds_limit = pdd->lds_limit;
    648		pAperture->gpuvm_base = pdd->gpuvm_base;
    649		pAperture->gpuvm_limit = pdd->gpuvm_limit;
    650		pAperture->scratch_base = pdd->scratch_base;
    651		pAperture->scratch_limit = pdd->scratch_limit;
    652
    653		dev_dbg(kfd_device,
    654			"node id %u\n", args->num_of_nodes);
    655		dev_dbg(kfd_device,
    656			"gpu id %u\n", pdd->dev->id);
    657		dev_dbg(kfd_device,
    658			"lds_base %llX\n", pdd->lds_base);
    659		dev_dbg(kfd_device,
    660			"lds_limit %llX\n", pdd->lds_limit);
    661		dev_dbg(kfd_device,
    662			"gpuvm_base %llX\n", pdd->gpuvm_base);
    663		dev_dbg(kfd_device,
    664			"gpuvm_limit %llX\n", pdd->gpuvm_limit);
    665		dev_dbg(kfd_device,
    666			"scratch_base %llX\n", pdd->scratch_base);
    667		dev_dbg(kfd_device,
    668			"scratch_limit %llX\n", pdd->scratch_limit);
    669
    670		if (++args->num_of_nodes >= NUM_OF_SUPPORTED_GPUS)
    671			break;
    672	}
    673	mutex_unlock(&p->mutex);
    674
    675	return 0;
    676}
    677
    678static int kfd_ioctl_get_process_apertures_new(struct file *filp,
    679				struct kfd_process *p, void *data)
    680{
    681	struct kfd_ioctl_get_process_apertures_new_args *args = data;
    682	struct kfd_process_device_apertures *pa;
    683	int ret;
    684	int i;
    685
    686	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
    687
    688	if (args->num_of_nodes == 0) {
    689		/* Return number of nodes, so that user space can alloacate
    690		 * sufficient memory
    691		 */
    692		mutex_lock(&p->mutex);
    693		args->num_of_nodes = p->n_pdds;
    694		goto out_unlock;
    695	}
    696
    697	/* Fill in process-aperture information for all available
    698	 * nodes, but not more than args->num_of_nodes as that is
    699	 * the amount of memory allocated by user
    700	 */
    701	pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
    702				args->num_of_nodes), GFP_KERNEL);
    703	if (!pa)
    704		return -ENOMEM;
    705
    706	mutex_lock(&p->mutex);
    707
    708	if (!p->n_pdds) {
    709		args->num_of_nodes = 0;
    710		kfree(pa);
    711		goto out_unlock;
    712	}
    713
    714	/* Run over all pdd of the process */
    715	for (i = 0; i < min(p->n_pdds, args->num_of_nodes); i++) {
    716		struct kfd_process_device *pdd = p->pdds[i];
    717
    718		pa[i].gpu_id = pdd->dev->id;
    719		pa[i].lds_base = pdd->lds_base;
    720		pa[i].lds_limit = pdd->lds_limit;
    721		pa[i].gpuvm_base = pdd->gpuvm_base;
    722		pa[i].gpuvm_limit = pdd->gpuvm_limit;
    723		pa[i].scratch_base = pdd->scratch_base;
    724		pa[i].scratch_limit = pdd->scratch_limit;
    725
    726		dev_dbg(kfd_device,
    727			"gpu id %u\n", pdd->dev->id);
    728		dev_dbg(kfd_device,
    729			"lds_base %llX\n", pdd->lds_base);
    730		dev_dbg(kfd_device,
    731			"lds_limit %llX\n", pdd->lds_limit);
    732		dev_dbg(kfd_device,
    733			"gpuvm_base %llX\n", pdd->gpuvm_base);
    734		dev_dbg(kfd_device,
    735			"gpuvm_limit %llX\n", pdd->gpuvm_limit);
    736		dev_dbg(kfd_device,
    737			"scratch_base %llX\n", pdd->scratch_base);
    738		dev_dbg(kfd_device,
    739			"scratch_limit %llX\n", pdd->scratch_limit);
    740	}
    741	mutex_unlock(&p->mutex);
    742
    743	args->num_of_nodes = i;
    744	ret = copy_to_user(
    745			(void __user *)args->kfd_process_device_apertures_ptr,
    746			pa,
    747			(i * sizeof(struct kfd_process_device_apertures)));
    748	kfree(pa);
    749	return ret ? -EFAULT : 0;
    750
    751out_unlock:
    752	mutex_unlock(&p->mutex);
    753	return 0;
    754}
    755
    756static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
    757					void *data)
    758{
    759	struct kfd_ioctl_create_event_args *args = data;
    760	int err;
    761
    762	/* For dGPUs the event page is allocated in user mode. The
    763	 * handle is passed to KFD with the first call to this IOCTL
    764	 * through the event_page_offset field.
    765	 */
    766	if (args->event_page_offset) {
    767		mutex_lock(&p->mutex);
    768		err = kfd_kmap_event_page(p, args->event_page_offset);
    769		mutex_unlock(&p->mutex);
    770		if (err)
    771			return err;
    772	}
    773
    774	err = kfd_event_create(filp, p, args->event_type,
    775				args->auto_reset != 0, args->node_id,
    776				&args->event_id, &args->event_trigger_data,
    777				&args->event_page_offset,
    778				&args->event_slot_index);
    779
    780	pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__);
    781	return err;
    782}
    783
    784static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
    785					void *data)
    786{
    787	struct kfd_ioctl_destroy_event_args *args = data;
    788
    789	return kfd_event_destroy(p, args->event_id);
    790}
    791
    792static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
    793				void *data)
    794{
    795	struct kfd_ioctl_set_event_args *args = data;
    796
    797	return kfd_set_event(p, args->event_id);
    798}
    799
    800static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
    801				void *data)
    802{
    803	struct kfd_ioctl_reset_event_args *args = data;
    804
    805	return kfd_reset_event(p, args->event_id);
    806}
    807
    808static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
    809				void *data)
    810{
    811	struct kfd_ioctl_wait_events_args *args = data;
    812	int err;
    813
    814	err = kfd_wait_on_events(p, args->num_events,
    815			(void __user *)args->events_ptr,
    816			(args->wait_for_all != 0),
    817			args->timeout, &args->wait_result);
    818
    819	return err;
    820}
    821static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
    822					struct kfd_process *p, void *data)
    823{
    824	struct kfd_ioctl_set_scratch_backing_va_args *args = data;
    825	struct kfd_process_device *pdd;
    826	struct kfd_dev *dev;
    827	long err;
    828
    829	mutex_lock(&p->mutex);
    830	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
    831	if (!pdd) {
    832		err = -EINVAL;
    833		goto err_pdd;
    834	}
    835	dev = pdd->dev;
    836
    837	pdd = kfd_bind_process_to_device(dev, p);
    838	if (IS_ERR(pdd)) {
    839		err = PTR_ERR(pdd);
    840		goto bind_process_to_device_fail;
    841	}
    842
    843	pdd->qpd.sh_hidden_private_base = args->va_addr;
    844
    845	mutex_unlock(&p->mutex);
    846
    847	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
    848	    pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
    849		dev->kfd2kgd->set_scratch_backing_va(
    850			dev->adev, args->va_addr, pdd->qpd.vmid);
    851
    852	return 0;
    853
    854bind_process_to_device_fail:
    855err_pdd:
    856	mutex_unlock(&p->mutex);
    857	return err;
    858}
    859
    860static int kfd_ioctl_get_tile_config(struct file *filep,
    861		struct kfd_process *p, void *data)
    862{
    863	struct kfd_ioctl_get_tile_config_args *args = data;
    864	struct kfd_process_device *pdd;
    865	struct tile_config config;
    866	int err = 0;
    867
    868	mutex_lock(&p->mutex);
    869	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
    870	mutex_unlock(&p->mutex);
    871	if (!pdd)
    872		return -EINVAL;
    873
    874	amdgpu_amdkfd_get_tile_config(pdd->dev->adev, &config);
    875
    876	args->gb_addr_config = config.gb_addr_config;
    877	args->num_banks = config.num_banks;
    878	args->num_ranks = config.num_ranks;
    879
    880	if (args->num_tile_configs > config.num_tile_configs)
    881		args->num_tile_configs = config.num_tile_configs;
    882	err = copy_to_user((void __user *)args->tile_config_ptr,
    883			config.tile_config_ptr,
    884			args->num_tile_configs * sizeof(uint32_t));
    885	if (err) {
    886		args->num_tile_configs = 0;
    887		return -EFAULT;
    888	}
    889
    890	if (args->num_macro_tile_configs > config.num_macro_tile_configs)
    891		args->num_macro_tile_configs =
    892				config.num_macro_tile_configs;
    893	err = copy_to_user((void __user *)args->macro_tile_config_ptr,
    894			config.macro_tile_config_ptr,
    895			args->num_macro_tile_configs * sizeof(uint32_t));
    896	if (err) {
    897		args->num_macro_tile_configs = 0;
    898		return -EFAULT;
    899	}
    900
    901	return 0;
    902}
    903
    904static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
    905				void *data)
    906{
    907	struct kfd_ioctl_acquire_vm_args *args = data;
    908	struct kfd_process_device *pdd;
    909	struct file *drm_file;
    910	int ret;
    911
    912	drm_file = fget(args->drm_fd);
    913	if (!drm_file)
    914		return -EINVAL;
    915
    916	mutex_lock(&p->mutex);
    917	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
    918	if (!pdd) {
    919		ret = -EINVAL;
    920		goto err_pdd;
    921	}
    922
    923	if (pdd->drm_file) {
    924		ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
    925		goto err_drm_file;
    926	}
    927
    928	ret = kfd_process_device_init_vm(pdd, drm_file);
    929	if (ret)
    930		goto err_unlock;
    931
    932	/* On success, the PDD keeps the drm_file reference */
    933	mutex_unlock(&p->mutex);
    934
    935	return 0;
    936
    937err_unlock:
    938err_pdd:
    939err_drm_file:
    940	mutex_unlock(&p->mutex);
    941	fput(drm_file);
    942	return ret;
    943}
    944
    945bool kfd_dev_is_large_bar(struct kfd_dev *dev)
    946{
    947	if (debug_largebar) {
    948		pr_debug("Simulate large-bar allocation on non large-bar machine\n");
    949		return true;
    950	}
    951
    952	if (dev->use_iommu_v2)
    953		return false;
    954
    955	if (dev->local_mem_info.local_mem_size_private == 0 &&
    956			dev->local_mem_info.local_mem_size_public > 0)
    957		return true;
    958	return false;
    959}
    960
    961static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
    962					struct kfd_process *p, void *data)
    963{
    964	struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
    965	struct kfd_process_device *pdd;
    966	void *mem;
    967	struct kfd_dev *dev;
    968	int idr_handle;
    969	long err;
    970	uint64_t offset = args->mmap_offset;
    971	uint32_t flags = args->flags;
    972
    973	if (args->size == 0)
    974		return -EINVAL;
    975
    976#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
    977	/* Flush pending deferred work to avoid racing with deferred actions
    978	 * from previous memory map changes (e.g. munmap).
    979	 */
    980	svm_range_list_lock_and_flush_work(&p->svms, current->mm);
    981	mutex_lock(&p->svms.lock);
    982	mmap_write_unlock(current->mm);
    983	if (interval_tree_iter_first(&p->svms.objects,
    984				     args->va_addr >> PAGE_SHIFT,
    985				     (args->va_addr + args->size - 1) >> PAGE_SHIFT)) {
    986		pr_err("Address: 0x%llx already allocated by SVM\n",
    987			args->va_addr);
    988		mutex_unlock(&p->svms.lock);
    989		return -EADDRINUSE;
    990	}
    991	mutex_unlock(&p->svms.lock);
    992#endif
    993	mutex_lock(&p->mutex);
    994	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
    995	if (!pdd) {
    996		err = -EINVAL;
    997		goto err_pdd;
    998	}
    999
   1000	dev = pdd->dev;
   1001
   1002	if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
   1003		(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
   1004		!kfd_dev_is_large_bar(dev)) {
   1005		pr_err("Alloc host visible vram on small bar is not allowed\n");
   1006		err = -EINVAL;
   1007		goto err_large_bar;
   1008	}
   1009
   1010	pdd = kfd_bind_process_to_device(dev, p);
   1011	if (IS_ERR(pdd)) {
   1012		err = PTR_ERR(pdd);
   1013		goto err_unlock;
   1014	}
   1015
   1016	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
   1017		if (args->size != kfd_doorbell_process_slice(dev)) {
   1018			err = -EINVAL;
   1019			goto err_unlock;
   1020		}
   1021		offset = kfd_get_process_doorbells(pdd);
   1022	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
   1023		if (args->size != PAGE_SIZE) {
   1024			err = -EINVAL;
   1025			goto err_unlock;
   1026		}
   1027		offset = dev->adev->rmmio_remap.bus_addr;
   1028		if (!offset) {
   1029			err = -ENOMEM;
   1030			goto err_unlock;
   1031		}
   1032	}
   1033
   1034	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
   1035		dev->adev, args->va_addr, args->size,
   1036		pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
   1037		flags, false);
   1038
   1039	if (err)
   1040		goto err_unlock;
   1041
   1042	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
   1043	if (idr_handle < 0) {
   1044		err = -EFAULT;
   1045		goto err_free;
   1046	}
   1047
   1048	/* Update the VRAM usage count */
   1049	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
   1050		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
   1051
   1052	mutex_unlock(&p->mutex);
   1053
   1054	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
   1055	args->mmap_offset = offset;
   1056
   1057	/* MMIO is mapped through kfd device
   1058	 * Generate a kfd mmap offset
   1059	 */
   1060	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
   1061		args->mmap_offset = KFD_MMAP_TYPE_MMIO
   1062					| KFD_MMAP_GPU_ID(args->gpu_id);
   1063
   1064	return 0;
   1065
   1066err_free:
   1067	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem,
   1068					       pdd->drm_priv, NULL);
   1069err_unlock:
   1070err_pdd:
   1071err_large_bar:
   1072	mutex_unlock(&p->mutex);
   1073	return err;
   1074}
   1075
   1076static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
   1077					struct kfd_process *p, void *data)
   1078{
   1079	struct kfd_ioctl_free_memory_of_gpu_args *args = data;
   1080	struct kfd_process_device *pdd;
   1081	void *mem;
   1082	int ret;
   1083	uint64_t size = 0;
   1084
   1085	mutex_lock(&p->mutex);
   1086	/*
   1087	 * Safeguard to prevent user space from freeing signal BO.
   1088	 * It will be freed at process termination.
   1089	 */
   1090	if (p->signal_handle && (p->signal_handle == args->handle)) {
   1091		pr_err("Free signal BO is not allowed\n");
   1092		ret = -EPERM;
   1093		goto err_unlock;
   1094	}
   1095
   1096	pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
   1097	if (!pdd) {
   1098		pr_err("Process device data doesn't exist\n");
   1099		ret = -EINVAL;
   1100		goto err_pdd;
   1101	}
   1102
   1103	mem = kfd_process_device_translate_handle(
   1104		pdd, GET_IDR_HANDLE(args->handle));
   1105	if (!mem) {
   1106		ret = -EINVAL;
   1107		goto err_unlock;
   1108	}
   1109
   1110	ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev,
   1111				(struct kgd_mem *)mem, pdd->drm_priv, &size);
   1112
   1113	/* If freeing the buffer failed, leave the handle in place for
   1114	 * clean-up during process tear-down.
   1115	 */
   1116	if (!ret)
   1117		kfd_process_device_remove_obj_handle(
   1118			pdd, GET_IDR_HANDLE(args->handle));
   1119
   1120	WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
   1121
   1122err_unlock:
   1123err_pdd:
   1124	mutex_unlock(&p->mutex);
   1125	return ret;
   1126}
   1127
   1128static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
   1129					struct kfd_process *p, void *data)
   1130{
   1131	struct kfd_ioctl_map_memory_to_gpu_args *args = data;
   1132	struct kfd_process_device *pdd, *peer_pdd;
   1133	void *mem;
   1134	struct kfd_dev *dev;
   1135	long err = 0;
   1136	int i;
   1137	uint32_t *devices_arr = NULL;
   1138
   1139	if (!args->n_devices) {
   1140		pr_debug("Device IDs array empty\n");
   1141		return -EINVAL;
   1142	}
   1143	if (args->n_success > args->n_devices) {
   1144		pr_debug("n_success exceeds n_devices\n");
   1145		return -EINVAL;
   1146	}
   1147
   1148	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
   1149				    GFP_KERNEL);
   1150	if (!devices_arr)
   1151		return -ENOMEM;
   1152
   1153	err = copy_from_user(devices_arr,
   1154			     (void __user *)args->device_ids_array_ptr,
   1155			     args->n_devices * sizeof(*devices_arr));
   1156	if (err != 0) {
   1157		err = -EFAULT;
   1158		goto copy_from_user_failed;
   1159	}
   1160
   1161	mutex_lock(&p->mutex);
   1162	pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
   1163	if (!pdd) {
   1164		err = -EINVAL;
   1165		goto get_process_device_data_failed;
   1166	}
   1167	dev = pdd->dev;
   1168
   1169	pdd = kfd_bind_process_to_device(dev, p);
   1170	if (IS_ERR(pdd)) {
   1171		err = PTR_ERR(pdd);
   1172		goto bind_process_to_device_failed;
   1173	}
   1174
   1175	mem = kfd_process_device_translate_handle(pdd,
   1176						GET_IDR_HANDLE(args->handle));
   1177	if (!mem) {
   1178		err = -ENOMEM;
   1179		goto get_mem_obj_from_handle_failed;
   1180	}
   1181
   1182	for (i = args->n_success; i < args->n_devices; i++) {
   1183		peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
   1184		if (!peer_pdd) {
   1185			pr_debug("Getting device by id failed for 0x%x\n",
   1186				 devices_arr[i]);
   1187			err = -EINVAL;
   1188			goto get_mem_obj_from_handle_failed;
   1189		}
   1190
   1191		peer_pdd = kfd_bind_process_to_device(peer_pdd->dev, p);
   1192		if (IS_ERR(peer_pdd)) {
   1193			err = PTR_ERR(peer_pdd);
   1194			goto get_mem_obj_from_handle_failed;
   1195		}
   1196
   1197		err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
   1198			peer_pdd->dev->adev, (struct kgd_mem *)mem,
   1199			peer_pdd->drm_priv);
   1200		if (err) {
   1201			struct pci_dev *pdev = peer_pdd->dev->adev->pdev;
   1202
   1203			dev_err(dev->adev->dev,
   1204			       "Failed to map peer:%04x:%02x:%02x.%d mem_domain:%d\n",
   1205			       pci_domain_nr(pdev->bus),
   1206			       pdev->bus->number,
   1207			       PCI_SLOT(pdev->devfn),
   1208			       PCI_FUNC(pdev->devfn),
   1209			       ((struct kgd_mem *)mem)->domain);
   1210			goto map_memory_to_gpu_failed;
   1211		}
   1212		args->n_success = i+1;
   1213	}
   1214
   1215	mutex_unlock(&p->mutex);
   1216
   1217	err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true);
   1218	if (err) {
   1219		pr_debug("Sync memory failed, wait interrupted by user signal\n");
   1220		goto sync_memory_failed;
   1221	}
   1222
   1223	/* Flush TLBs after waiting for the page table updates to complete */
   1224	for (i = 0; i < args->n_devices; i++) {
   1225		peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
   1226		if (WARN_ON_ONCE(!peer_pdd))
   1227			continue;
   1228		kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
   1229	}
   1230	kfree(devices_arr);
   1231
   1232	return err;
   1233
   1234get_process_device_data_failed:
   1235bind_process_to_device_failed:
   1236get_mem_obj_from_handle_failed:
   1237map_memory_to_gpu_failed:
   1238	mutex_unlock(&p->mutex);
   1239copy_from_user_failed:
   1240sync_memory_failed:
   1241	kfree(devices_arr);
   1242
   1243	return err;
   1244}
   1245
   1246static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
   1247					struct kfd_process *p, void *data)
   1248{
   1249	struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
   1250	struct kfd_process_device *pdd, *peer_pdd;
   1251	void *mem;
   1252	long err = 0;
   1253	uint32_t *devices_arr = NULL, i;
   1254
   1255	if (!args->n_devices) {
   1256		pr_debug("Device IDs array empty\n");
   1257		return -EINVAL;
   1258	}
   1259	if (args->n_success > args->n_devices) {
   1260		pr_debug("n_success exceeds n_devices\n");
   1261		return -EINVAL;
   1262	}
   1263
   1264	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
   1265				    GFP_KERNEL);
   1266	if (!devices_arr)
   1267		return -ENOMEM;
   1268
   1269	err = copy_from_user(devices_arr,
   1270			     (void __user *)args->device_ids_array_ptr,
   1271			     args->n_devices * sizeof(*devices_arr));
   1272	if (err != 0) {
   1273		err = -EFAULT;
   1274		goto copy_from_user_failed;
   1275	}
   1276
   1277	mutex_lock(&p->mutex);
   1278	pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
   1279	if (!pdd) {
   1280		err = -EINVAL;
   1281		goto bind_process_to_device_failed;
   1282	}
   1283
   1284	mem = kfd_process_device_translate_handle(pdd,
   1285						GET_IDR_HANDLE(args->handle));
   1286	if (!mem) {
   1287		err = -ENOMEM;
   1288		goto get_mem_obj_from_handle_failed;
   1289	}
   1290
   1291	for (i = args->n_success; i < args->n_devices; i++) {
   1292		peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
   1293		if (!peer_pdd) {
   1294			err = -EINVAL;
   1295			goto get_mem_obj_from_handle_failed;
   1296		}
   1297		err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
   1298			peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv);
   1299		if (err) {
   1300			pr_err("Failed to unmap from gpu %d/%d\n",
   1301			       i, args->n_devices);
   1302			goto unmap_memory_from_gpu_failed;
   1303		}
   1304		args->n_success = i+1;
   1305	}
   1306	mutex_unlock(&p->mutex);
   1307
   1308	if (kfd_flush_tlb_after_unmap(pdd->dev)) {
   1309		err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
   1310				(struct kgd_mem *) mem, true);
   1311		if (err) {
   1312			pr_debug("Sync memory failed, wait interrupted by user signal\n");
   1313			goto sync_memory_failed;
   1314		}
   1315
   1316		/* Flush TLBs after waiting for the page table updates to complete */
   1317		for (i = 0; i < args->n_devices; i++) {
   1318			peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
   1319			if (WARN_ON_ONCE(!peer_pdd))
   1320				continue;
   1321			kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
   1322		}
   1323	}
   1324	kfree(devices_arr);
   1325
   1326	return 0;
   1327
   1328bind_process_to_device_failed:
   1329get_mem_obj_from_handle_failed:
   1330unmap_memory_from_gpu_failed:
   1331	mutex_unlock(&p->mutex);
   1332copy_from_user_failed:
   1333sync_memory_failed:
   1334	kfree(devices_arr);
   1335	return err;
   1336}
   1337
   1338static int kfd_ioctl_alloc_queue_gws(struct file *filep,
   1339		struct kfd_process *p, void *data)
   1340{
   1341	int retval;
   1342	struct kfd_ioctl_alloc_queue_gws_args *args = data;
   1343	struct queue *q;
   1344	struct kfd_dev *dev;
   1345
   1346	mutex_lock(&p->mutex);
   1347	q = pqm_get_user_queue(&p->pqm, args->queue_id);
   1348
   1349	if (q) {
   1350		dev = q->device;
   1351	} else {
   1352		retval = -EINVAL;
   1353		goto out_unlock;
   1354	}
   1355
   1356	if (!dev->gws) {
   1357		retval = -ENODEV;
   1358		goto out_unlock;
   1359	}
   1360
   1361	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
   1362		retval = -ENODEV;
   1363		goto out_unlock;
   1364	}
   1365
   1366	retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
   1367	mutex_unlock(&p->mutex);
   1368
   1369	args->first_gws = 0;
   1370	return retval;
   1371
   1372out_unlock:
   1373	mutex_unlock(&p->mutex);
   1374	return retval;
   1375}
   1376
   1377static int kfd_ioctl_get_dmabuf_info(struct file *filep,
   1378		struct kfd_process *p, void *data)
   1379{
   1380	struct kfd_ioctl_get_dmabuf_info_args *args = data;
   1381	struct kfd_dev *dev = NULL;
   1382	struct amdgpu_device *dmabuf_adev;
   1383	void *metadata_buffer = NULL;
   1384	uint32_t flags;
   1385	unsigned int i;
   1386	int r;
   1387
   1388	/* Find a KFD GPU device that supports the get_dmabuf_info query */
   1389	for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
   1390		if (dev)
   1391			break;
   1392	if (!dev)
   1393		return -EINVAL;
   1394
   1395	if (args->metadata_ptr) {
   1396		metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
   1397		if (!metadata_buffer)
   1398			return -ENOMEM;
   1399	}
   1400
   1401	/* Get dmabuf info from KGD */
   1402	r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd,
   1403					  &dmabuf_adev, &args->size,
   1404					  metadata_buffer, args->metadata_size,
   1405					  &args->metadata_size, &flags);
   1406	if (r)
   1407		goto exit;
   1408
   1409	/* Reverse-lookup gpu_id from kgd pointer */
   1410	dev = kfd_device_by_adev(dmabuf_adev);
   1411	if (!dev) {
   1412		r = -EINVAL;
   1413		goto exit;
   1414	}
   1415	args->gpu_id = dev->id;
   1416	args->flags = flags;
   1417
   1418	/* Copy metadata buffer to user mode */
   1419	if (metadata_buffer) {
   1420		r = copy_to_user((void __user *)args->metadata_ptr,
   1421				 metadata_buffer, args->metadata_size);
   1422		if (r != 0)
   1423			r = -EFAULT;
   1424	}
   1425
   1426exit:
   1427	kfree(metadata_buffer);
   1428
   1429	return r;
   1430}
   1431
   1432static int kfd_ioctl_import_dmabuf(struct file *filep,
   1433				   struct kfd_process *p, void *data)
   1434{
   1435	struct kfd_ioctl_import_dmabuf_args *args = data;
   1436	struct kfd_process_device *pdd;
   1437	struct dma_buf *dmabuf;
   1438	int idr_handle;
   1439	uint64_t size;
   1440	void *mem;
   1441	int r;
   1442
   1443	dmabuf = dma_buf_get(args->dmabuf_fd);
   1444	if (IS_ERR(dmabuf))
   1445		return PTR_ERR(dmabuf);
   1446
   1447	mutex_lock(&p->mutex);
   1448	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
   1449	if (!pdd) {
   1450		r = -EINVAL;
   1451		goto err_unlock;
   1452	}
   1453
   1454	pdd = kfd_bind_process_to_device(pdd->dev, p);
   1455	if (IS_ERR(pdd)) {
   1456		r = PTR_ERR(pdd);
   1457		goto err_unlock;
   1458	}
   1459
   1460	r = amdgpu_amdkfd_gpuvm_import_dmabuf(pdd->dev->adev, dmabuf,
   1461					      args->va_addr, pdd->drm_priv,
   1462					      (struct kgd_mem **)&mem, &size,
   1463					      NULL);
   1464	if (r)
   1465		goto err_unlock;
   1466
   1467	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
   1468	if (idr_handle < 0) {
   1469		r = -EFAULT;
   1470		goto err_free;
   1471	}
   1472
   1473	mutex_unlock(&p->mutex);
   1474	dma_buf_put(dmabuf);
   1475
   1476	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
   1477
   1478	return 0;
   1479
   1480err_free:
   1481	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, (struct kgd_mem *)mem,
   1482					       pdd->drm_priv, NULL);
   1483err_unlock:
   1484	mutex_unlock(&p->mutex);
   1485	dma_buf_put(dmabuf);
   1486	return r;
   1487}
   1488
   1489/* Handle requests for watching SMI events */
   1490static int kfd_ioctl_smi_events(struct file *filep,
   1491				struct kfd_process *p, void *data)
   1492{
   1493	struct kfd_ioctl_smi_events_args *args = data;
   1494	struct kfd_process_device *pdd;
   1495
   1496	mutex_lock(&p->mutex);
   1497
   1498	pdd = kfd_process_device_data_by_id(p, args->gpuid);
   1499	mutex_unlock(&p->mutex);
   1500	if (!pdd)
   1501		return -EINVAL;
   1502
   1503	return kfd_smi_event_open(pdd->dev, &args->anon_fd);
   1504}
   1505
   1506static int kfd_ioctl_set_xnack_mode(struct file *filep,
   1507				    struct kfd_process *p, void *data)
   1508{
   1509	struct kfd_ioctl_set_xnack_mode_args *args = data;
   1510	int r = 0;
   1511
   1512	mutex_lock(&p->mutex);
   1513	if (args->xnack_enabled >= 0) {
   1514		if (!list_empty(&p->pqm.queues)) {
   1515			pr_debug("Process has user queues running\n");
   1516			mutex_unlock(&p->mutex);
   1517			return -EBUSY;
   1518		}
   1519		if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
   1520			r = -EPERM;
   1521		else
   1522			p->xnack_enabled = args->xnack_enabled;
   1523	} else {
   1524		args->xnack_enabled = p->xnack_enabled;
   1525	}
   1526	mutex_unlock(&p->mutex);
   1527
   1528	return r;
   1529}
   1530
   1531#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
   1532static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
   1533{
   1534	struct kfd_ioctl_svm_args *args = data;
   1535	int r = 0;
   1536
   1537	pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",
   1538		 args->start_addr, args->size, args->op, args->nattr);
   1539
   1540	if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))
   1541		return -EINVAL;
   1542	if (!args->start_addr || !args->size)
   1543		return -EINVAL;
   1544
   1545	r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
   1546		      args->attrs);
   1547
   1548	return r;
   1549}
   1550#else
   1551static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
   1552{
   1553	return -EPERM;
   1554}
   1555#endif
   1556
   1557static int criu_checkpoint_process(struct kfd_process *p,
   1558			     uint8_t __user *user_priv_data,
   1559			     uint64_t *priv_offset)
   1560{
   1561	struct kfd_criu_process_priv_data process_priv;
   1562	int ret;
   1563
   1564	memset(&process_priv, 0, sizeof(process_priv));
   1565
   1566	process_priv.version = KFD_CRIU_PRIV_VERSION;
   1567	/* For CR, we don't consider negative xnack mode which is used for
   1568	 * querying without changing it, here 0 simply means disabled and 1
   1569	 * means enabled so retry for finding a valid PTE.
   1570	 */
   1571	process_priv.xnack_mode = p->xnack_enabled ? 1 : 0;
   1572
   1573	ret = copy_to_user(user_priv_data + *priv_offset,
   1574				&process_priv, sizeof(process_priv));
   1575
   1576	if (ret) {
   1577		pr_err("Failed to copy process information to user\n");
   1578		ret = -EFAULT;
   1579	}
   1580
   1581	*priv_offset += sizeof(process_priv);
   1582	return ret;
   1583}
   1584
   1585static int criu_checkpoint_devices(struct kfd_process *p,
   1586			     uint32_t num_devices,
   1587			     uint8_t __user *user_addr,
   1588			     uint8_t __user *user_priv_data,
   1589			     uint64_t *priv_offset)
   1590{
   1591	struct kfd_criu_device_priv_data *device_priv = NULL;
   1592	struct kfd_criu_device_bucket *device_buckets = NULL;
   1593	int ret = 0, i;
   1594
   1595	device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), GFP_KERNEL);
   1596	if (!device_buckets) {
   1597		ret = -ENOMEM;
   1598		goto exit;
   1599	}
   1600
   1601	device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL);
   1602	if (!device_priv) {
   1603		ret = -ENOMEM;
   1604		goto exit;
   1605	}
   1606
   1607	for (i = 0; i < num_devices; i++) {
   1608		struct kfd_process_device *pdd = p->pdds[i];
   1609
   1610		device_buckets[i].user_gpu_id = pdd->user_gpu_id;
   1611		device_buckets[i].actual_gpu_id = pdd->dev->id;
   1612
   1613		/*
   1614		 * priv_data does not contain useful information for now and is reserved for
   1615		 * future use, so we do not set its contents.
   1616		 */
   1617	}
   1618
   1619	ret = copy_to_user(user_addr, device_buckets, num_devices * sizeof(*device_buckets));
   1620	if (ret) {
   1621		pr_err("Failed to copy device information to user\n");
   1622		ret = -EFAULT;
   1623		goto exit;
   1624	}
   1625
   1626	ret = copy_to_user(user_priv_data + *priv_offset,
   1627			   device_priv,
   1628			   num_devices * sizeof(*device_priv));
   1629	if (ret) {
   1630		pr_err("Failed to copy device information to user\n");
   1631		ret = -EFAULT;
   1632	}
   1633	*priv_offset += num_devices * sizeof(*device_priv);
   1634
   1635exit:
   1636	kvfree(device_buckets);
   1637	kvfree(device_priv);
   1638	return ret;
   1639}
   1640
   1641static uint32_t get_process_num_bos(struct kfd_process *p)
   1642{
   1643	uint32_t num_of_bos = 0;
   1644	int i;
   1645
   1646	/* Run over all PDDs of the process */
   1647	for (i = 0; i < p->n_pdds; i++) {
   1648		struct kfd_process_device *pdd = p->pdds[i];
   1649		void *mem;
   1650		int id;
   1651
   1652		idr_for_each_entry(&pdd->alloc_idr, mem, id) {
   1653			struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
   1654
   1655			if ((uint64_t)kgd_mem->va > pdd->gpuvm_base)
   1656				num_of_bos++;
   1657		}
   1658	}
   1659	return num_of_bos;
   1660}
   1661
   1662static int criu_get_prime_handle(struct drm_gem_object *gobj, int flags,
   1663				      u32 *shared_fd)
   1664{
   1665	struct dma_buf *dmabuf;
   1666	int ret;
   1667
   1668	dmabuf = amdgpu_gem_prime_export(gobj, flags);
   1669	if (IS_ERR(dmabuf)) {
   1670		ret = PTR_ERR(dmabuf);
   1671		pr_err("dmabuf export failed for the BO\n");
   1672		return ret;
   1673	}
   1674
   1675	ret = dma_buf_fd(dmabuf, flags);
   1676	if (ret < 0) {
   1677		pr_err("dmabuf create fd failed, ret:%d\n", ret);
   1678		goto out_free_dmabuf;
   1679	}
   1680
   1681	*shared_fd = ret;
   1682	return 0;
   1683
   1684out_free_dmabuf:
   1685	dma_buf_put(dmabuf);
   1686	return ret;
   1687}
   1688
   1689static int criu_checkpoint_bos(struct kfd_process *p,
   1690			       uint32_t num_bos,
   1691			       uint8_t __user *user_bos,
   1692			       uint8_t __user *user_priv_data,
   1693			       uint64_t *priv_offset)
   1694{
   1695	struct kfd_criu_bo_bucket *bo_buckets;
   1696	struct kfd_criu_bo_priv_data *bo_privs;
   1697	int ret = 0, pdd_index, bo_index = 0, id;
   1698	void *mem;
   1699
   1700	bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);
   1701	if (!bo_buckets)
   1702		return -ENOMEM;
   1703
   1704	bo_privs = kvzalloc(num_bos * sizeof(*bo_privs), GFP_KERNEL);
   1705	if (!bo_privs) {
   1706		ret = -ENOMEM;
   1707		goto exit;
   1708	}
   1709
   1710	for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
   1711		struct kfd_process_device *pdd = p->pdds[pdd_index];
   1712		struct amdgpu_bo *dumper_bo;
   1713		struct kgd_mem *kgd_mem;
   1714
   1715		idr_for_each_entry(&pdd->alloc_idr, mem, id) {
   1716			struct kfd_criu_bo_bucket *bo_bucket;
   1717			struct kfd_criu_bo_priv_data *bo_priv;
   1718			int i, dev_idx = 0;
   1719
   1720			if (!mem) {
   1721				ret = -ENOMEM;
   1722				goto exit;
   1723			}
   1724
   1725			kgd_mem = (struct kgd_mem *)mem;
   1726			dumper_bo = kgd_mem->bo;
   1727
   1728			if ((uint64_t)kgd_mem->va <= pdd->gpuvm_base)
   1729				continue;
   1730
   1731			bo_bucket = &bo_buckets[bo_index];
   1732			bo_priv = &bo_privs[bo_index];
   1733
   1734			bo_bucket->gpu_id = pdd->user_gpu_id;
   1735			bo_bucket->addr = (uint64_t)kgd_mem->va;
   1736			bo_bucket->size = amdgpu_bo_size(dumper_bo);
   1737			bo_bucket->alloc_flags = (uint32_t)kgd_mem->alloc_flags;
   1738			bo_priv->idr_handle = id;
   1739
   1740			if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
   1741				ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo,
   1742								&bo_priv->user_addr);
   1743				if (ret) {
   1744					pr_err("Failed to obtain user address for user-pointer bo\n");
   1745					goto exit;
   1746				}
   1747			}
   1748			if (bo_bucket->alloc_flags
   1749			    & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
   1750				ret = criu_get_prime_handle(&dumper_bo->tbo.base,
   1751						bo_bucket->alloc_flags &
   1752						KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
   1753						&bo_bucket->dmabuf_fd);
   1754				if (ret)
   1755					goto exit;
   1756			} else {
   1757				bo_bucket->dmabuf_fd = KFD_INVALID_FD;
   1758			}
   1759
   1760			if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
   1761				bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |
   1762					KFD_MMAP_GPU_ID(pdd->dev->id);
   1763			else if (bo_bucket->alloc_flags &
   1764				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
   1765				bo_bucket->offset = KFD_MMAP_TYPE_MMIO |
   1766					KFD_MMAP_GPU_ID(pdd->dev->id);
   1767			else
   1768				bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo);
   1769
   1770			for (i = 0; i < p->n_pdds; i++) {
   1771				if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->dev->adev, kgd_mem))
   1772					bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id;
   1773			}
   1774
   1775			pr_debug("bo_size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n"
   1776					"gpu_id = 0x%x alloc_flags = 0x%x idr_handle = 0x%x",
   1777					bo_bucket->size,
   1778					bo_bucket->addr,
   1779					bo_bucket->offset,
   1780					bo_bucket->gpu_id,
   1781					bo_bucket->alloc_flags,
   1782					bo_priv->idr_handle);
   1783			bo_index++;
   1784		}
   1785	}
   1786
   1787	ret = copy_to_user(user_bos, bo_buckets, num_bos * sizeof(*bo_buckets));
   1788	if (ret) {
   1789		pr_err("Failed to copy BO information to user\n");
   1790		ret = -EFAULT;
   1791		goto exit;
   1792	}
   1793
   1794	ret = copy_to_user(user_priv_data + *priv_offset, bo_privs, num_bos * sizeof(*bo_privs));
   1795	if (ret) {
   1796		pr_err("Failed to copy BO priv information to user\n");
   1797		ret = -EFAULT;
   1798		goto exit;
   1799	}
   1800
   1801	*priv_offset += num_bos * sizeof(*bo_privs);
   1802
   1803exit:
   1804	while (ret && bo_index--) {
   1805		if (bo_buckets[bo_index].alloc_flags
   1806		    & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
   1807			close_fd(bo_buckets[bo_index].dmabuf_fd);
   1808	}
   1809
   1810	kvfree(bo_buckets);
   1811	kvfree(bo_privs);
   1812	return ret;
   1813}
   1814
   1815static int criu_get_process_object_info(struct kfd_process *p,
   1816					uint32_t *num_devices,
   1817					uint32_t *num_bos,
   1818					uint32_t *num_objects,
   1819					uint64_t *objs_priv_size)
   1820{
   1821	uint64_t queues_priv_data_size, svm_priv_data_size, priv_size;
   1822	uint32_t num_queues, num_events, num_svm_ranges;
   1823	int ret;
   1824
   1825	*num_devices = p->n_pdds;
   1826	*num_bos = get_process_num_bos(p);
   1827
   1828	ret = kfd_process_get_queue_info(p, &num_queues, &queues_priv_data_size);
   1829	if (ret)
   1830		return ret;
   1831
   1832	num_events = kfd_get_num_events(p);
   1833
   1834	ret = svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
   1835	if (ret)
   1836		return ret;
   1837
   1838	*num_objects = num_queues + num_events + num_svm_ranges;
   1839
   1840	if (objs_priv_size) {
   1841		priv_size = sizeof(struct kfd_criu_process_priv_data);
   1842		priv_size += *num_devices * sizeof(struct kfd_criu_device_priv_data);
   1843		priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);
   1844		priv_size += queues_priv_data_size;
   1845		priv_size += num_events * sizeof(struct kfd_criu_event_priv_data);
   1846		priv_size += svm_priv_data_size;
   1847		*objs_priv_size = priv_size;
   1848	}
   1849	return 0;
   1850}
   1851
   1852static int criu_checkpoint(struct file *filep,
   1853			   struct kfd_process *p,
   1854			   struct kfd_ioctl_criu_args *args)
   1855{
   1856	int ret;
   1857	uint32_t num_devices, num_bos, num_objects;
   1858	uint64_t priv_size, priv_offset = 0;
   1859
   1860	if (!args->devices || !args->bos || !args->priv_data)
   1861		return -EINVAL;
   1862
   1863	mutex_lock(&p->mutex);
   1864
   1865	if (!p->n_pdds) {
   1866		pr_err("No pdd for given process\n");
   1867		ret = -ENODEV;
   1868		goto exit_unlock;
   1869	}
   1870
   1871	/* Confirm all process queues are evicted */
   1872	if (!p->queues_paused) {
   1873		pr_err("Cannot dump process when queues are not in evicted state\n");
   1874		/* CRIU plugin did not call op PROCESS_INFO before checkpointing */
   1875		ret = -EINVAL;
   1876		goto exit_unlock;
   1877	}
   1878
   1879	ret = criu_get_process_object_info(p, &num_devices, &num_bos, &num_objects, &priv_size);
   1880	if (ret)
   1881		goto exit_unlock;
   1882
   1883	if (num_devices != args->num_devices ||
   1884	    num_bos != args->num_bos ||
   1885	    num_objects != args->num_objects ||
   1886	    priv_size != args->priv_data_size) {
   1887
   1888		ret = -EINVAL;
   1889		goto exit_unlock;
   1890	}
   1891
   1892	/* each function will store private data inside priv_data and adjust priv_offset */
   1893	ret = criu_checkpoint_process(p, (uint8_t __user *)args->priv_data, &priv_offset);
   1894	if (ret)
   1895		goto exit_unlock;
   1896
   1897	ret = criu_checkpoint_devices(p, num_devices, (uint8_t __user *)args->devices,
   1898				(uint8_t __user *)args->priv_data, &priv_offset);
   1899	if (ret)
   1900		goto exit_unlock;
   1901
   1902	ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
   1903			    (uint8_t __user *)args->priv_data, &priv_offset);
   1904	if (ret)
   1905		goto exit_unlock;
   1906
   1907	if (num_objects) {
   1908		ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,
   1909						 &priv_offset);
   1910		if (ret)
   1911			goto close_bo_fds;
   1912
   1913		ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
   1914						 &priv_offset);
   1915		if (ret)
   1916			goto close_bo_fds;
   1917
   1918		ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);
   1919		if (ret)
   1920			goto close_bo_fds;
   1921	}
   1922
   1923close_bo_fds:
   1924	if (ret) {
   1925		/* If IOCTL returns err, user assumes all FDs opened in criu_dump_bos are closed */
   1926		uint32_t i;
   1927		struct kfd_criu_bo_bucket *bo_buckets = (struct kfd_criu_bo_bucket *) args->bos;
   1928
   1929		for (i = 0; i < num_bos; i++) {
   1930			if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
   1931				close_fd(bo_buckets[i].dmabuf_fd);
   1932		}
   1933	}
   1934
   1935exit_unlock:
   1936	mutex_unlock(&p->mutex);
   1937	if (ret)
   1938		pr_err("Failed to dump CRIU ret:%d\n", ret);
   1939	else
   1940		pr_debug("CRIU dump ret:%d\n", ret);
   1941
   1942	return ret;
   1943}
   1944
   1945static int criu_restore_process(struct kfd_process *p,
   1946				struct kfd_ioctl_criu_args *args,
   1947				uint64_t *priv_offset,
   1948				uint64_t max_priv_data_size)
   1949{
   1950	int ret = 0;
   1951	struct kfd_criu_process_priv_data process_priv;
   1952
   1953	if (*priv_offset + sizeof(process_priv) > max_priv_data_size)
   1954		return -EINVAL;
   1955
   1956	ret = copy_from_user(&process_priv,
   1957				(void __user *)(args->priv_data + *priv_offset),
   1958				sizeof(process_priv));
   1959	if (ret) {
   1960		pr_err("Failed to copy process private information from user\n");
   1961		ret = -EFAULT;
   1962		goto exit;
   1963	}
   1964	*priv_offset += sizeof(process_priv);
   1965
   1966	if (process_priv.version != KFD_CRIU_PRIV_VERSION) {
   1967		pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n",
   1968			process_priv.version, KFD_CRIU_PRIV_VERSION);
   1969		return -EINVAL;
   1970	}
   1971
   1972	pr_debug("Setting XNACK mode\n");
   1973	if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) {
   1974		pr_err("xnack mode cannot be set\n");
   1975		ret = -EPERM;
   1976		goto exit;
   1977	} else {
   1978		pr_debug("set xnack mode: %d\n", process_priv.xnack_mode);
   1979		p->xnack_enabled = process_priv.xnack_mode;
   1980	}
   1981
   1982exit:
   1983	return ret;
   1984}
   1985
   1986static int criu_restore_devices(struct kfd_process *p,
   1987				struct kfd_ioctl_criu_args *args,
   1988				uint64_t *priv_offset,
   1989				uint64_t max_priv_data_size)
   1990{
   1991	struct kfd_criu_device_bucket *device_buckets;
   1992	struct kfd_criu_device_priv_data *device_privs;
   1993	int ret = 0;
   1994	uint32_t i;
   1995
   1996	if (args->num_devices != p->n_pdds)
   1997		return -EINVAL;
   1998
   1999	if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size)
   2000		return -EINVAL;
   2001
   2002	device_buckets = kmalloc_array(args->num_devices, sizeof(*device_buckets), GFP_KERNEL);
   2003	if (!device_buckets)
   2004		return -ENOMEM;
   2005
   2006	ret = copy_from_user(device_buckets, (void __user *)args->devices,
   2007				args->num_devices * sizeof(*device_buckets));
   2008	if (ret) {
   2009		pr_err("Failed to copy devices buckets from user\n");
   2010		ret = -EFAULT;
   2011		goto exit;
   2012	}
   2013
   2014	for (i = 0; i < args->num_devices; i++) {
   2015		struct kfd_dev *dev;
   2016		struct kfd_process_device *pdd;
   2017		struct file *drm_file;
   2018
   2019		/* device private data is not currently used */
   2020
   2021		if (!device_buckets[i].user_gpu_id) {
   2022			pr_err("Invalid user gpu_id\n");
   2023			ret = -EINVAL;
   2024			goto exit;
   2025		}
   2026
   2027		dev = kfd_device_by_id(device_buckets[i].actual_gpu_id);
   2028		if (!dev) {
   2029			pr_err("Failed to find device with gpu_id = %x\n",
   2030				device_buckets[i].actual_gpu_id);
   2031			ret = -EINVAL;
   2032			goto exit;
   2033		}
   2034
   2035		pdd = kfd_get_process_device_data(dev, p);
   2036		if (!pdd) {
   2037			pr_err("Failed to get pdd for gpu_id = %x\n",
   2038					device_buckets[i].actual_gpu_id);
   2039			ret = -EINVAL;
   2040			goto exit;
   2041		}
   2042		pdd->user_gpu_id = device_buckets[i].user_gpu_id;
   2043
   2044		drm_file = fget(device_buckets[i].drm_fd);
   2045		if (!drm_file) {
   2046			pr_err("Invalid render node file descriptor sent from plugin (%d)\n",
   2047				device_buckets[i].drm_fd);
   2048			ret = -EINVAL;
   2049			goto exit;
   2050		}
   2051
   2052		if (pdd->drm_file) {
   2053			ret = -EINVAL;
   2054			goto exit;
   2055		}
   2056
   2057		/* create the vm using render nodes for kfd pdd */
   2058		if (kfd_process_device_init_vm(pdd, drm_file)) {
   2059			pr_err("could not init vm for given pdd\n");
   2060			/* On success, the PDD keeps the drm_file reference */
   2061			fput(drm_file);
   2062			ret = -EINVAL;
   2063			goto exit;
   2064		}
   2065		/*
   2066		 * pdd now already has the vm bound to render node so below api won't create a new
   2067		 * exclusive kfd mapping but use existing one with renderDXXX but is still needed
   2068		 * for iommu v2 binding  and runtime pm.
   2069		 */
   2070		pdd = kfd_bind_process_to_device(dev, p);
   2071		if (IS_ERR(pdd)) {
   2072			ret = PTR_ERR(pdd);
   2073			goto exit;
   2074		}
   2075	}
   2076
   2077	/*
   2078	 * We are not copying device private data from user as we are not using the data for now,
   2079	 * but we still adjust for its private data.
   2080	 */
   2081	*priv_offset += args->num_devices * sizeof(*device_privs);
   2082
   2083exit:
   2084	kfree(device_buckets);
   2085	return ret;
   2086}
   2087
   2088static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
   2089				      struct kfd_criu_bo_bucket *bo_bucket,
   2090				      struct kfd_criu_bo_priv_data *bo_priv,
   2091				      struct kgd_mem **kgd_mem)
   2092{
   2093	int idr_handle;
   2094	int ret;
   2095	const bool criu_resume = true;
   2096	u64 offset;
   2097
   2098	if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
   2099		if (bo_bucket->size != kfd_doorbell_process_slice(pdd->dev))
   2100			return -EINVAL;
   2101
   2102		offset = kfd_get_process_doorbells(pdd);
   2103	} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
   2104		/* MMIO BOs need remapped bus address */
   2105		if (bo_bucket->size != PAGE_SIZE) {
   2106			pr_err("Invalid page size\n");
   2107			return -EINVAL;
   2108		}
   2109		offset = pdd->dev->adev->rmmio_remap.bus_addr;
   2110		if (!offset) {
   2111			pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n");
   2112			return -ENOMEM;
   2113		}
   2114	} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
   2115		offset = bo_priv->user_addr;
   2116	}
   2117	/* Create the BO */
   2118	ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr,
   2119						      bo_bucket->size, pdd->drm_priv, kgd_mem,
   2120						      &offset, bo_bucket->alloc_flags, criu_resume);
   2121	if (ret) {
   2122		pr_err("Could not create the BO\n");
   2123		return ret;
   2124	}
   2125	pr_debug("New BO created: size:0x%llx addr:0x%llx offset:0x%llx\n",
   2126		 bo_bucket->size, bo_bucket->addr, offset);
   2127
   2128	/* Restore previous IDR handle */
   2129	pr_debug("Restoring old IDR handle for the BO");
   2130	idr_handle = idr_alloc(&pdd->alloc_idr, *kgd_mem, bo_priv->idr_handle,
   2131			       bo_priv->idr_handle + 1, GFP_KERNEL);
   2132
   2133	if (idr_handle < 0) {
   2134		pr_err("Could not allocate idr\n");
   2135		amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, *kgd_mem, pdd->drm_priv,
   2136						       NULL);
   2137		return -ENOMEM;
   2138	}
   2139
   2140	if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
   2141		bo_bucket->restored_offset = KFD_MMAP_TYPE_DOORBELL | KFD_MMAP_GPU_ID(pdd->dev->id);
   2142	if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
   2143		bo_bucket->restored_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(pdd->dev->id);
   2144	} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
   2145		bo_bucket->restored_offset = offset;
   2146	} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
   2147		bo_bucket->restored_offset = offset;
   2148		/* Update the VRAM usage count */
   2149		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
   2150	}
   2151	return 0;
   2152}
   2153
   2154static int criu_restore_bo(struct kfd_process *p,
   2155			   struct kfd_criu_bo_bucket *bo_bucket,
   2156			   struct kfd_criu_bo_priv_data *bo_priv)
   2157{
   2158	struct kfd_process_device *pdd;
   2159	struct kgd_mem *kgd_mem;
   2160	int ret;
   2161	int j;
   2162
   2163	pr_debug("Restoring BO size:0x%llx addr:0x%llx gpu_id:0x%x flags:0x%x idr_handle:0x%x\n",
   2164		 bo_bucket->size, bo_bucket->addr, bo_bucket->gpu_id, bo_bucket->alloc_flags,
   2165		 bo_priv->idr_handle);
   2166
   2167	pdd = kfd_process_device_data_by_id(p, bo_bucket->gpu_id);
   2168	if (!pdd) {
   2169		pr_err("Failed to get pdd\n");
   2170		return -ENODEV;
   2171	}
   2172
   2173	ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem);
   2174	if (ret)
   2175		return ret;
   2176
   2177	/* now map these BOs to GPU/s */
   2178	for (j = 0; j < p->n_pdds; j++) {
   2179		struct kfd_dev *peer;
   2180		struct kfd_process_device *peer_pdd;
   2181
   2182		if (!bo_priv->mapped_gpuids[j])
   2183			break;
   2184
   2185		peer_pdd = kfd_process_device_data_by_id(p, bo_priv->mapped_gpuids[j]);
   2186		if (!peer_pdd)
   2187			return -EINVAL;
   2188
   2189		peer = peer_pdd->dev;
   2190
   2191		peer_pdd = kfd_bind_process_to_device(peer, p);
   2192		if (IS_ERR(peer_pdd))
   2193			return PTR_ERR(peer_pdd);
   2194
   2195		ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem,
   2196							    peer_pdd->drm_priv);
   2197		if (ret) {
   2198			pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds);
   2199			return ret;
   2200		}
   2201	}
   2202
   2203	pr_debug("map memory was successful for the BO\n");
   2204	/* create the dmabuf object and export the bo */
   2205	if (bo_bucket->alloc_flags
   2206	    & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
   2207		ret = criu_get_prime_handle(&kgd_mem->bo->tbo.base, DRM_RDWR,
   2208					    &bo_bucket->dmabuf_fd);
   2209		if (ret)
   2210			return ret;
   2211	} else {
   2212		bo_bucket->dmabuf_fd = KFD_INVALID_FD;
   2213	}
   2214
   2215	return 0;
   2216}
   2217
   2218static int criu_restore_bos(struct kfd_process *p,
   2219			    struct kfd_ioctl_criu_args *args,
   2220			    uint64_t *priv_offset,
   2221			    uint64_t max_priv_data_size)
   2222{
   2223	struct kfd_criu_bo_bucket *bo_buckets = NULL;
   2224	struct kfd_criu_bo_priv_data *bo_privs = NULL;
   2225	int ret = 0;
   2226	uint32_t i = 0;
   2227
   2228	if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
   2229		return -EINVAL;
   2230
   2231	/* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */
   2232	amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);
   2233
   2234	bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL);
   2235	if (!bo_buckets)
   2236		return -ENOMEM;
   2237
   2238	ret = copy_from_user(bo_buckets, (void __user *)args->bos,
   2239			     args->num_bos * sizeof(*bo_buckets));
   2240	if (ret) {
   2241		pr_err("Failed to copy BOs information from user\n");
   2242		ret = -EFAULT;
   2243		goto exit;
   2244	}
   2245
   2246	bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);
   2247	if (!bo_privs) {
   2248		ret = -ENOMEM;
   2249		goto exit;
   2250	}
   2251
   2252	ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,
   2253			     args->num_bos * sizeof(*bo_privs));
   2254	if (ret) {
   2255		pr_err("Failed to copy BOs information from user\n");
   2256		ret = -EFAULT;
   2257		goto exit;
   2258	}
   2259	*priv_offset += args->num_bos * sizeof(*bo_privs);
   2260
   2261	/* Create and map new BOs */
   2262	for (; i < args->num_bos; i++) {
   2263		ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]);
   2264		if (ret) {
   2265			pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
   2266			goto exit;
   2267		}
   2268	} /* done */
   2269
   2270	/* Copy only the buckets back so user can read bo_buckets[N].restored_offset */
   2271	ret = copy_to_user((void __user *)args->bos,
   2272				bo_buckets,
   2273				(args->num_bos * sizeof(*bo_buckets)));
   2274	if (ret)
   2275		ret = -EFAULT;
   2276
   2277exit:
   2278	while (ret && i--) {
   2279		if (bo_buckets[i].alloc_flags
   2280		   & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
   2281			close_fd(bo_buckets[i].dmabuf_fd);
   2282	}
   2283	kvfree(bo_buckets);
   2284	kvfree(bo_privs);
   2285	return ret;
   2286}
   2287
   2288static int criu_restore_objects(struct file *filep,
   2289				struct kfd_process *p,
   2290				struct kfd_ioctl_criu_args *args,
   2291				uint64_t *priv_offset,
   2292				uint64_t max_priv_data_size)
   2293{
   2294	int ret = 0;
   2295	uint32_t i;
   2296
   2297	BUILD_BUG_ON(offsetof(struct kfd_criu_queue_priv_data, object_type));
   2298	BUILD_BUG_ON(offsetof(struct kfd_criu_event_priv_data, object_type));
   2299	BUILD_BUG_ON(offsetof(struct kfd_criu_svm_range_priv_data, object_type));
   2300
   2301	for (i = 0; i < args->num_objects; i++) {
   2302		uint32_t object_type;
   2303
   2304		if (*priv_offset + sizeof(object_type) > max_priv_data_size) {
   2305			pr_err("Invalid private data size\n");
   2306			return -EINVAL;
   2307		}
   2308
   2309		ret = get_user(object_type, (uint32_t __user *)(args->priv_data + *priv_offset));
   2310		if (ret) {
   2311			pr_err("Failed to copy private information from user\n");
   2312			goto exit;
   2313		}
   2314
   2315		switch (object_type) {
   2316		case KFD_CRIU_OBJECT_TYPE_QUEUE:
   2317			ret = kfd_criu_restore_queue(p, (uint8_t __user *)args->priv_data,
   2318						     priv_offset, max_priv_data_size);
   2319			if (ret)
   2320				goto exit;
   2321			break;
   2322		case KFD_CRIU_OBJECT_TYPE_EVENT:
   2323			ret = kfd_criu_restore_event(filep, p, (uint8_t __user *)args->priv_data,
   2324						     priv_offset, max_priv_data_size);
   2325			if (ret)
   2326				goto exit;
   2327			break;
   2328		case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
   2329			ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data,
   2330						     priv_offset, max_priv_data_size);
   2331			if (ret)
   2332				goto exit;
   2333			break;
   2334		default:
   2335			pr_err("Invalid object type:%u at index:%d\n", object_type, i);
   2336			ret = -EINVAL;
   2337			goto exit;
   2338		}
   2339	}
   2340exit:
   2341	return ret;
   2342}
   2343
   2344static int criu_restore(struct file *filep,
   2345			struct kfd_process *p,
   2346			struct kfd_ioctl_criu_args *args)
   2347{
   2348	uint64_t priv_offset = 0;
   2349	int ret = 0;
   2350
   2351	pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",
   2352		 args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);
   2353
   2354	if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size ||
   2355	    !args->num_devices || !args->num_bos)
   2356		return -EINVAL;
   2357
   2358	mutex_lock(&p->mutex);
   2359
   2360	/*
   2361	 * Set the process to evicted state to avoid running any new queues before all the memory
   2362	 * mappings are ready.
   2363	 */
   2364	ret = kfd_process_evict_queues(p);
   2365	if (ret)
   2366		goto exit_unlock;
   2367
   2368	/* Each function will adjust priv_offset based on how many bytes they consumed */
   2369	ret = criu_restore_process(p, args, &priv_offset, args->priv_data_size);
   2370	if (ret)
   2371		goto exit_unlock;
   2372
   2373	ret = criu_restore_devices(p, args, &priv_offset, args->priv_data_size);
   2374	if (ret)
   2375		goto exit_unlock;
   2376
   2377	ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size);
   2378	if (ret)
   2379		goto exit_unlock;
   2380
   2381	ret = criu_restore_objects(filep, p, args, &priv_offset, args->priv_data_size);
   2382	if (ret)
   2383		goto exit_unlock;
   2384
   2385	if (priv_offset != args->priv_data_size) {
   2386		pr_err("Invalid private data size\n");
   2387		ret = -EINVAL;
   2388	}
   2389
   2390exit_unlock:
   2391	mutex_unlock(&p->mutex);
   2392	if (ret)
   2393		pr_err("Failed to restore CRIU ret:%d\n", ret);
   2394	else
   2395		pr_debug("CRIU restore successful\n");
   2396
   2397	return ret;
   2398}
   2399
   2400static int criu_unpause(struct file *filep,
   2401			struct kfd_process *p,
   2402			struct kfd_ioctl_criu_args *args)
   2403{
   2404	int ret;
   2405
   2406	mutex_lock(&p->mutex);
   2407
   2408	if (!p->queues_paused) {
   2409		mutex_unlock(&p->mutex);
   2410		return -EINVAL;
   2411	}
   2412
   2413	ret = kfd_process_restore_queues(p);
   2414	if (ret)
   2415		pr_err("Failed to unpause queues ret:%d\n", ret);
   2416	else
   2417		p->queues_paused = false;
   2418
   2419	mutex_unlock(&p->mutex);
   2420
   2421	return ret;
   2422}
   2423
   2424static int criu_resume(struct file *filep,
   2425			struct kfd_process *p,
   2426			struct kfd_ioctl_criu_args *args)
   2427{
   2428	struct kfd_process *target = NULL;
   2429	struct pid *pid = NULL;
   2430	int ret = 0;
   2431
   2432	pr_debug("Inside %s, target pid for criu restore: %d\n", __func__,
   2433		 args->pid);
   2434
   2435	pid = find_get_pid(args->pid);
   2436	if (!pid) {
   2437		pr_err("Cannot find pid info for %i\n", args->pid);
   2438		return -ESRCH;
   2439	}
   2440
   2441	pr_debug("calling kfd_lookup_process_by_pid\n");
   2442	target = kfd_lookup_process_by_pid(pid);
   2443
   2444	put_pid(pid);
   2445
   2446	if (!target) {
   2447		pr_debug("Cannot find process info for %i\n", args->pid);
   2448		return -ESRCH;
   2449	}
   2450
   2451	mutex_lock(&target->mutex);
   2452	ret = kfd_criu_resume_svm(target);
   2453	if (ret) {
   2454		pr_err("kfd_criu_resume_svm failed for %i\n", args->pid);
   2455		goto exit;
   2456	}
   2457
   2458	ret =  amdgpu_amdkfd_criu_resume(target->kgd_process_info);
   2459	if (ret)
   2460		pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid);
   2461
   2462exit:
   2463	mutex_unlock(&target->mutex);
   2464
   2465	kfd_unref_process(target);
   2466	return ret;
   2467}
   2468
   2469static int criu_process_info(struct file *filep,
   2470				struct kfd_process *p,
   2471				struct kfd_ioctl_criu_args *args)
   2472{
   2473	int ret = 0;
   2474
   2475	mutex_lock(&p->mutex);
   2476
   2477	if (!p->n_pdds) {
   2478		pr_err("No pdd for given process\n");
   2479		ret = -ENODEV;
   2480		goto err_unlock;
   2481	}
   2482
   2483	ret = kfd_process_evict_queues(p);
   2484	if (ret)
   2485		goto err_unlock;
   2486
   2487	p->queues_paused = true;
   2488
   2489	args->pid = task_pid_nr_ns(p->lead_thread,
   2490					task_active_pid_ns(p->lead_thread));
   2491
   2492	ret = criu_get_process_object_info(p, &args->num_devices, &args->num_bos,
   2493					   &args->num_objects, &args->priv_data_size);
   2494	if (ret)
   2495		goto err_unlock;
   2496
   2497	dev_dbg(kfd_device, "Num of devices:%u bos:%u objects:%u priv_data_size:%lld\n",
   2498				args->num_devices, args->num_bos, args->num_objects,
   2499				args->priv_data_size);
   2500
   2501err_unlock:
   2502	if (ret) {
   2503		kfd_process_restore_queues(p);
   2504		p->queues_paused = false;
   2505	}
   2506	mutex_unlock(&p->mutex);
   2507	return ret;
   2508}
   2509
   2510static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)
   2511{
   2512	struct kfd_ioctl_criu_args *args = data;
   2513	int ret;
   2514
   2515	dev_dbg(kfd_device, "CRIU operation: %d\n", args->op);
   2516	switch (args->op) {
   2517	case KFD_CRIU_OP_PROCESS_INFO:
   2518		ret = criu_process_info(filep, p, args);
   2519		break;
   2520	case KFD_CRIU_OP_CHECKPOINT:
   2521		ret = criu_checkpoint(filep, p, args);
   2522		break;
   2523	case KFD_CRIU_OP_UNPAUSE:
   2524		ret = criu_unpause(filep, p, args);
   2525		break;
   2526	case KFD_CRIU_OP_RESTORE:
   2527		ret = criu_restore(filep, p, args);
   2528		break;
   2529	case KFD_CRIU_OP_RESUME:
   2530		ret = criu_resume(filep, p, args);
   2531		break;
   2532	default:
   2533		dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op);
   2534		ret = -EINVAL;
   2535		break;
   2536	}
   2537
   2538	if (ret)
   2539		dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret);
   2540
   2541	return ret;
   2542}
   2543
   2544#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
   2545	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
   2546			    .cmd_drv = 0, .name = #ioctl}
   2547
   2548/** Ioctl table */
   2549static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
   2550	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
   2551			kfd_ioctl_get_version, 0),
   2552
   2553	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
   2554			kfd_ioctl_create_queue, 0),
   2555
   2556	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
   2557			kfd_ioctl_destroy_queue, 0),
   2558
   2559	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
   2560			kfd_ioctl_set_memory_policy, 0),
   2561
   2562	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
   2563			kfd_ioctl_get_clock_counters, 0),
   2564
   2565	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
   2566			kfd_ioctl_get_process_apertures, 0),
   2567
   2568	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
   2569			kfd_ioctl_update_queue, 0),
   2570
   2571	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
   2572			kfd_ioctl_create_event, 0),
   2573
   2574	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
   2575			kfd_ioctl_destroy_event, 0),
   2576
   2577	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
   2578			kfd_ioctl_set_event, 0),
   2579
   2580	AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
   2581			kfd_ioctl_reset_event, 0),
   2582
   2583	AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
   2584			kfd_ioctl_wait_events, 0),
   2585
   2586	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER_DEPRECATED,
   2587			kfd_ioctl_dbg_register, 0),
   2588
   2589	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED,
   2590			kfd_ioctl_dbg_unregister, 0),
   2591
   2592	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED,
   2593			kfd_ioctl_dbg_address_watch, 0),
   2594
   2595	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED,
   2596			kfd_ioctl_dbg_wave_control, 0),
   2597
   2598	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
   2599			kfd_ioctl_set_scratch_backing_va, 0),
   2600
   2601	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
   2602			kfd_ioctl_get_tile_config, 0),
   2603
   2604	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
   2605			kfd_ioctl_set_trap_handler, 0),
   2606
   2607	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
   2608			kfd_ioctl_get_process_apertures_new, 0),
   2609
   2610	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
   2611			kfd_ioctl_acquire_vm, 0),
   2612
   2613	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
   2614			kfd_ioctl_alloc_memory_of_gpu, 0),
   2615
   2616	AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
   2617			kfd_ioctl_free_memory_of_gpu, 0),
   2618
   2619	AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
   2620			kfd_ioctl_map_memory_to_gpu, 0),
   2621
   2622	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
   2623			kfd_ioctl_unmap_memory_from_gpu, 0),
   2624
   2625	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
   2626			kfd_ioctl_set_cu_mask, 0),
   2627
   2628	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
   2629			kfd_ioctl_get_queue_wave_state, 0),
   2630
   2631	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
   2632				kfd_ioctl_get_dmabuf_info, 0),
   2633
   2634	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
   2635				kfd_ioctl_import_dmabuf, 0),
   2636
   2637	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
   2638			kfd_ioctl_alloc_queue_gws, 0),
   2639
   2640	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
   2641			kfd_ioctl_smi_events, 0),
   2642
   2643	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
   2644
   2645	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
   2646			kfd_ioctl_set_xnack_mode, 0),
   2647
   2648	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
   2649			kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),
   2650
   2651};
   2652
   2653#define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
   2654
   2655static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
   2656{
   2657	struct kfd_process *process;
   2658	amdkfd_ioctl_t *func;
   2659	const struct amdkfd_ioctl_desc *ioctl = NULL;
   2660	unsigned int nr = _IOC_NR(cmd);
   2661	char stack_kdata[128];
   2662	char *kdata = NULL;
   2663	unsigned int usize, asize;
   2664	int retcode = -EINVAL;
   2665	bool ptrace_attached = false;
   2666
   2667	if (nr >= AMDKFD_CORE_IOCTL_COUNT)
   2668		goto err_i1;
   2669
   2670	if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
   2671		u32 amdkfd_size;
   2672
   2673		ioctl = &amdkfd_ioctls[nr];
   2674
   2675		amdkfd_size = _IOC_SIZE(ioctl->cmd);
   2676		usize = asize = _IOC_SIZE(cmd);
   2677		if (amdkfd_size > asize)
   2678			asize = amdkfd_size;
   2679
   2680		cmd = ioctl->cmd;
   2681	} else
   2682		goto err_i1;
   2683
   2684	dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
   2685
   2686	/* Get the process struct from the filep. Only the process
   2687	 * that opened /dev/kfd can use the file descriptor. Child
   2688	 * processes need to create their own KFD device context.
   2689	 */
   2690	process = filep->private_data;
   2691
   2692	rcu_read_lock();
   2693	if ((ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE) &&
   2694	    ptrace_parent(process->lead_thread) == current)
   2695		ptrace_attached = true;
   2696	rcu_read_unlock();
   2697
   2698	if (process->lead_thread != current->group_leader
   2699	    && !ptrace_attached) {
   2700		dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
   2701		retcode = -EBADF;
   2702		goto err_i1;
   2703	}
   2704
   2705	/* Do not trust userspace, use our own definition */
   2706	func = ioctl->func;
   2707
   2708	if (unlikely(!func)) {
   2709		dev_dbg(kfd_device, "no function\n");
   2710		retcode = -EINVAL;
   2711		goto err_i1;
   2712	}
   2713
   2714	/*
   2715	 * Versions of docker shipped in Ubuntu 18.xx and 20.xx do not support
   2716	 * CAP_CHECKPOINT_RESTORE, so we also allow access if CAP_SYS_ADMIN as CAP_SYS_ADMIN is a
   2717	 * more priviledged access.
   2718	 */
   2719	if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) {
   2720		if (!capable(CAP_CHECKPOINT_RESTORE) &&
   2721						!capable(CAP_SYS_ADMIN)) {
   2722			retcode = -EACCES;
   2723			goto err_i1;
   2724		}
   2725	}
   2726
   2727	if (cmd & (IOC_IN | IOC_OUT)) {
   2728		if (asize <= sizeof(stack_kdata)) {
   2729			kdata = stack_kdata;
   2730		} else {
   2731			kdata = kmalloc(asize, GFP_KERNEL);
   2732			if (!kdata) {
   2733				retcode = -ENOMEM;
   2734				goto err_i1;
   2735			}
   2736		}
   2737		if (asize > usize)
   2738			memset(kdata + usize, 0, asize - usize);
   2739	}
   2740
   2741	if (cmd & IOC_IN) {
   2742		if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
   2743			retcode = -EFAULT;
   2744			goto err_i1;
   2745		}
   2746	} else if (cmd & IOC_OUT) {
   2747		memset(kdata, 0, usize);
   2748	}
   2749
   2750	retcode = func(filep, process, kdata);
   2751
   2752	if (cmd & IOC_OUT)
   2753		if (copy_to_user((void __user *)arg, kdata, usize) != 0)
   2754			retcode = -EFAULT;
   2755
   2756err_i1:
   2757	if (!ioctl)
   2758		dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
   2759			  task_pid_nr(current), cmd, nr);
   2760
   2761	if (kdata != stack_kdata)
   2762		kfree(kdata);
   2763
   2764	if (retcode)
   2765		dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
   2766				nr, arg, retcode);
   2767
   2768	return retcode;
   2769}
   2770
   2771static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
   2772		      struct vm_area_struct *vma)
   2773{
   2774	phys_addr_t address;
   2775	int ret;
   2776
   2777	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
   2778		return -EINVAL;
   2779
   2780	address = dev->adev->rmmio_remap.bus_addr;
   2781
   2782	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
   2783				VM_DONTDUMP | VM_PFNMAP;
   2784
   2785	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
   2786
   2787	pr_debug("pasid 0x%x mapping mmio page\n"
   2788		 "     target user address == 0x%08llX\n"
   2789		 "     physical address    == 0x%08llX\n"
   2790		 "     vm_flags            == 0x%04lX\n"
   2791		 "     size                == 0x%04lX\n",
   2792		 process->pasid, (unsigned long long) vma->vm_start,
   2793		 address, vma->vm_flags, PAGE_SIZE);
   2794
   2795	ret = io_remap_pfn_range(vma,
   2796				vma->vm_start,
   2797				address >> PAGE_SHIFT,
   2798				PAGE_SIZE,
   2799				vma->vm_page_prot);
   2800	return ret;
   2801}
   2802
   2803
   2804static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
   2805{
   2806	struct kfd_process *process;
   2807	struct kfd_dev *dev = NULL;
   2808	unsigned long mmap_offset;
   2809	unsigned int gpu_id;
   2810
   2811	process = kfd_get_process(current);
   2812	if (IS_ERR(process))
   2813		return PTR_ERR(process);
   2814
   2815	mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
   2816	gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
   2817	if (gpu_id)
   2818		dev = kfd_device_by_id(gpu_id);
   2819
   2820	switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
   2821	case KFD_MMAP_TYPE_DOORBELL:
   2822		if (!dev)
   2823			return -ENODEV;
   2824		return kfd_doorbell_mmap(dev, process, vma);
   2825
   2826	case KFD_MMAP_TYPE_EVENTS:
   2827		return kfd_event_mmap(process, vma);
   2828
   2829	case KFD_MMAP_TYPE_RESERVED_MEM:
   2830		if (!dev)
   2831			return -ENODEV;
   2832		return kfd_reserved_mem_mmap(dev, process, vma);
   2833	case KFD_MMAP_TYPE_MMIO:
   2834		if (!dev)
   2835			return -ENODEV;
   2836		return kfd_mmio_mmap(dev, process, vma);
   2837	}
   2838
   2839	return -EFAULT;
   2840}