cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

kfd_doorbell.c (9335B)


      1// SPDX-License-Identifier: GPL-2.0 OR MIT
      2/*
      3 * Copyright 2014-2022 Advanced Micro Devices, Inc.
      4 *
      5 * Permission is hereby granted, free of charge, to any person obtaining a
      6 * copy of this software and associated documentation files (the "Software"),
      7 * to deal in the Software without restriction, including without limitation
      8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9 * and/or sell copies of the Software, and to permit persons to whom the
     10 * Software is furnished to do so, subject to the following conditions:
     11 *
     12 * The above copyright notice and this permission notice shall be included in
     13 * all copies or substantial portions of the Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     21 * OTHER DEALINGS IN THE SOFTWARE.
     22 */
     23#include "kfd_priv.h"
     24#include <linux/mm.h>
     25#include <linux/mman.h>
     26#include <linux/slab.h>
     27#include <linux/io.h>
     28#include <linux/idr.h>
     29
     30/*
     31 * This extension supports a kernel level doorbells management for the
     32 * kernel queues using the first doorbell page reserved for the kernel.
     33 */
     34
     35/*
     36 * Each device exposes a doorbell aperture, a PCI MMIO aperture that
     37 * receives 32-bit writes that are passed to queues as wptr values.
     38 * The doorbells are intended to be written by applications as part
     39 * of queueing work on user-mode queues.
     40 * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks.
     41 * We map the doorbell address space into user-mode when a process creates
     42 * its first queue on each device.
     43 * Although the mapping is done by KFD, it is equivalent to an mmap of
     44 * the /dev/kfd with the particular device encoded in the mmap offset.
     45 * There will be other uses for mmap of /dev/kfd, so only a range of
     46 * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells.
     47 */
     48
     49/* # of doorbell bytes allocated for each process. */
     50size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
     51{
     52	if (!kfd->shared_resources.enable_mes)
     53		return roundup(kfd->device_info.doorbell_size *
     54				KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
     55				PAGE_SIZE);
     56	else
     57		return amdgpu_mes_doorbell_process_slice(
     58					(struct amdgpu_device *)kfd->adev);
     59}
     60
     61/* Doorbell calculations for device init. */
     62int kfd_doorbell_init(struct kfd_dev *kfd)
     63{
     64	size_t doorbell_start_offset;
     65	size_t doorbell_aperture_size;
     66	size_t doorbell_process_limit;
     67
     68	/*
     69	 * With MES enabled, just set the doorbell base as it is needed
     70	 * to calculate doorbell physical address.
     71	 */
     72	if (kfd->shared_resources.enable_mes) {
     73		kfd->doorbell_base =
     74			kfd->shared_resources.doorbell_physical_address;
     75		return 0;
     76	}
     77
     78	/*
     79	 * We start with calculations in bytes because the input data might
     80	 * only be byte-aligned.
     81	 * Only after we have done the rounding can we assume any alignment.
     82	 */
     83
     84	doorbell_start_offset =
     85			roundup(kfd->shared_resources.doorbell_start_offset,
     86					kfd_doorbell_process_slice(kfd));
     87
     88	doorbell_aperture_size =
     89			rounddown(kfd->shared_resources.doorbell_aperture_size,
     90					kfd_doorbell_process_slice(kfd));
     91
     92	if (doorbell_aperture_size > doorbell_start_offset)
     93		doorbell_process_limit =
     94			(doorbell_aperture_size - doorbell_start_offset) /
     95						kfd_doorbell_process_slice(kfd);
     96	else
     97		return -ENOSPC;
     98
     99	if (!kfd->max_doorbell_slices ||
    100	    doorbell_process_limit < kfd->max_doorbell_slices)
    101		kfd->max_doorbell_slices = doorbell_process_limit;
    102
    103	kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
    104				doorbell_start_offset;
    105
    106	kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
    107
    108	kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
    109					   kfd_doorbell_process_slice(kfd));
    110
    111	if (!kfd->doorbell_kernel_ptr)
    112		return -ENOMEM;
    113
    114	pr_debug("Doorbell initialization:\n");
    115	pr_debug("doorbell base           == 0x%08lX\n",
    116			(uintptr_t)kfd->doorbell_base);
    117
    118	pr_debug("doorbell_base_dw_offset      == 0x%08lX\n",
    119			kfd->doorbell_base_dw_offset);
    120
    121	pr_debug("doorbell_process_limit  == 0x%08lX\n",
    122			doorbell_process_limit);
    123
    124	pr_debug("doorbell_kernel_offset  == 0x%08lX\n",
    125			(uintptr_t)kfd->doorbell_base);
    126
    127	pr_debug("doorbell aperture size  == 0x%08lX\n",
    128			kfd->shared_resources.doorbell_aperture_size);
    129
    130	pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
    131
    132	return 0;
    133}
    134
    135void kfd_doorbell_fini(struct kfd_dev *kfd)
    136{
    137	if (kfd->doorbell_kernel_ptr)
    138		iounmap(kfd->doorbell_kernel_ptr);
    139}
    140
    141int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
    142		      struct vm_area_struct *vma)
    143{
    144	phys_addr_t address;
    145	struct kfd_process_device *pdd;
    146
    147	/*
    148	 * For simplicitly we only allow mapping of the entire doorbell
    149	 * allocation of a single device & process.
    150	 */
    151	if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
    152		return -EINVAL;
    153
    154	pdd = kfd_get_process_device_data(dev, process);
    155	if (!pdd)
    156		return -EINVAL;
    157
    158	/* Calculate physical address of doorbell */
    159	address = kfd_get_process_doorbells(pdd);
    160	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
    161				VM_DONTDUMP | VM_PFNMAP;
    162
    163	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
    164
    165	pr_debug("Mapping doorbell page\n"
    166		 "     target user address == 0x%08llX\n"
    167		 "     physical address    == 0x%08llX\n"
    168		 "     vm_flags            == 0x%04lX\n"
    169		 "     size                == 0x%04lX\n",
    170		 (unsigned long long) vma->vm_start, address, vma->vm_flags,
    171		 kfd_doorbell_process_slice(dev));
    172
    173
    174	return io_remap_pfn_range(vma,
    175				vma->vm_start,
    176				address >> PAGE_SHIFT,
    177				kfd_doorbell_process_slice(dev),
    178				vma->vm_page_prot);
    179}
    180
    181
    182/* get kernel iomem pointer for a doorbell */
    183void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
    184					unsigned int *doorbell_off)
    185{
    186	u32 inx;
    187
    188	mutex_lock(&kfd->doorbell_mutex);
    189	inx = find_first_zero_bit(kfd->doorbell_available_index,
    190					KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
    191
    192	__set_bit(inx, kfd->doorbell_available_index);
    193	mutex_unlock(&kfd->doorbell_mutex);
    194
    195	if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
    196		return NULL;
    197
    198	inx *= kfd->device_info.doorbell_size / sizeof(u32);
    199
    200	/*
    201	 * Calculating the kernel doorbell offset using the first
    202	 * doorbell page.
    203	 */
    204	*doorbell_off = kfd->doorbell_base_dw_offset + inx;
    205
    206	pr_debug("Get kernel queue doorbell\n"
    207			"     doorbell offset   == 0x%08X\n"
    208			"     doorbell index    == 0x%x\n",
    209		*doorbell_off, inx);
    210
    211	return kfd->doorbell_kernel_ptr + inx;
    212}
    213
    214void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
    215{
    216	unsigned int inx;
    217
    218	inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
    219		* sizeof(u32) / kfd->device_info.doorbell_size;
    220
    221	mutex_lock(&kfd->doorbell_mutex);
    222	__clear_bit(inx, kfd->doorbell_available_index);
    223	mutex_unlock(&kfd->doorbell_mutex);
    224}
    225
    226void write_kernel_doorbell(void __iomem *db, u32 value)
    227{
    228	if (db) {
    229		writel(value, db);
    230		pr_debug("Writing %d to doorbell address %p\n", value, db);
    231	}
    232}
    233
    234void write_kernel_doorbell64(void __iomem *db, u64 value)
    235{
    236	if (db) {
    237		WARN(((unsigned long)db & 7) != 0,
    238		     "Unaligned 64-bit doorbell");
    239		writeq(value, (u64 __iomem *)db);
    240		pr_debug("writing %llu to doorbell address %p\n", value, db);
    241	}
    242}
    243
    244unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
    245					struct kfd_process_device *pdd,
    246					unsigned int doorbell_id)
    247{
    248	/*
    249	 * doorbell_base_dw_offset accounts for doorbells taken by KGD.
    250	 * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
    251	 * the process's doorbells. The offset returned is in dword
    252	 * units regardless of the ASIC-dependent doorbell size.
    253	 */
    254	if (!kfd->shared_resources.enable_mes)
    255		return kfd->doorbell_base_dw_offset +
    256			pdd->doorbell_index
    257			* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
    258			doorbell_id *
    259			kfd->device_info.doorbell_size / sizeof(u32);
    260	else
    261		return amdgpu_mes_get_doorbell_dw_offset_in_bar(
    262				(struct amdgpu_device *)kfd->adev,
    263				pdd->doorbell_index, doorbell_id);
    264}
    265
    266uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
    267{
    268	uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
    269				kfd->shared_resources.doorbell_start_offset) /
    270					kfd_doorbell_process_slice(kfd) + 1;
    271
    272	return num_of_elems;
    273
    274}
    275
    276phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
    277{
    278	return pdd->dev->doorbell_base +
    279		pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev);
    280}
    281
    282int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
    283{
    284	int r = 0;
    285
    286	if (!kfd->shared_resources.enable_mes)
    287		r = ida_simple_get(&kfd->doorbell_ida, 1,
    288				   kfd->max_doorbell_slices, GFP_KERNEL);
    289	else
    290		r = amdgpu_mes_alloc_process_doorbells(
    291				(struct amdgpu_device *)kfd->adev,
    292				doorbell_index);
    293
    294	if (r > 0)
    295		*doorbell_index = r;
    296
    297	return r;
    298}
    299
    300void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
    301{
    302	if (doorbell_index) {
    303		if (!kfd->shared_resources.enable_mes)
    304			ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
    305		else
    306			amdgpu_mes_free_process_doorbells(
    307					(struct amdgpu_device *)kfd->adev,
    308					doorbell_index);
    309	}
    310}