cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

amdgpu_amdkfd_arcturus.c (9872B)


      1/*
      2 * Copyright 2019 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 */
     22#include <linux/module.h>
     23#include <linux/fdtable.h>
     24#include <linux/uaccess.h>
     25#include <linux/firmware.h>
     26#include "amdgpu.h"
     27#include "amdgpu_amdkfd.h"
     28#include "amdgpu_amdkfd_arcturus.h"
     29#include "sdma0/sdma0_4_2_2_offset.h"
     30#include "sdma0/sdma0_4_2_2_sh_mask.h"
     31#include "sdma1/sdma1_4_2_2_offset.h"
     32#include "sdma1/sdma1_4_2_2_sh_mask.h"
     33#include "sdma2/sdma2_4_2_2_offset.h"
     34#include "sdma2/sdma2_4_2_2_sh_mask.h"
     35#include "sdma3/sdma3_4_2_2_offset.h"
     36#include "sdma3/sdma3_4_2_2_sh_mask.h"
     37#include "sdma4/sdma4_4_2_2_offset.h"
     38#include "sdma4/sdma4_4_2_2_sh_mask.h"
     39#include "sdma5/sdma5_4_2_2_offset.h"
     40#include "sdma5/sdma5_4_2_2_sh_mask.h"
     41#include "sdma6/sdma6_4_2_2_offset.h"
     42#include "sdma6/sdma6_4_2_2_sh_mask.h"
     43#include "sdma7/sdma7_4_2_2_offset.h"
     44#include "sdma7/sdma7_4_2_2_sh_mask.h"
     45#include "v9_structs.h"
     46#include "soc15.h"
     47#include "soc15d.h"
     48#include "amdgpu_amdkfd_gfx_v9.h"
     49#include "gfxhub_v1_0.h"
     50#include "mmhub_v9_4.h"
     51
     52#define HQD_N_REGS 56
     53#define DUMP_REG(addr) do {				\
     54		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
     55			break;				\
     56		(*dump)[i][0] = (addr) << 2;		\
     57		(*dump)[i++][1] = RREG32(addr);		\
     58	} while (0)
     59
     60static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
     61{
     62	return (struct v9_sdma_mqd *)mqd;
     63}
     64
     65static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
     66				unsigned int engine_id,
     67				unsigned int queue_id)
     68{
     69	uint32_t sdma_engine_reg_base = 0;
     70	uint32_t sdma_rlc_reg_offset;
     71
     72	switch (engine_id) {
     73	default:
     74		dev_warn(adev->dev,
     75			 "Invalid sdma engine id (%d), using engine id 0\n",
     76			 engine_id);
     77		fallthrough;
     78	case 0:
     79		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
     80				mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
     81		break;
     82	case 1:
     83		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
     84				mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
     85		break;
     86	case 2:
     87		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
     88				mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
     89		break;
     90	case 3:
     91		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
     92				mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
     93		break;
     94	case 4:
     95		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
     96				mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
     97		break;
     98	case 5:
     99		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
    100				mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
    101		break;
    102	case 6:
    103		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
    104				mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
    105		break;
    106	case 7:
    107		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
    108				mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
    109		break;
    110	}
    111
    112	sdma_rlc_reg_offset = sdma_engine_reg_base
    113		+ queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
    114
    115	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
    116			queue_id, sdma_rlc_reg_offset);
    117
    118	return sdma_rlc_reg_offset;
    119}
    120
    121int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
    122			     uint32_t __user *wptr, struct mm_struct *mm)
    123{
    124	struct v9_sdma_mqd *m;
    125	uint32_t sdma_rlc_reg_offset;
    126	unsigned long end_jiffies;
    127	uint32_t data;
    128	uint64_t data64;
    129	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
    130
    131	m = get_sdma_mqd(mqd);
    132	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
    133					    m->sdma_queue_id);
    134
    135	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
    136		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
    137
    138	end_jiffies = msecs_to_jiffies(2000) + jiffies;
    139	while (true) {
    140		data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
    141		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
    142			break;
    143		if (time_after(jiffies, end_jiffies)) {
    144			pr_err("SDMA RLC not idle in %s\n", __func__);
    145			return -ETIME;
    146		}
    147		usleep_range(500, 1000);
    148	}
    149
    150	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
    151	       m->sdmax_rlcx_doorbell_offset);
    152
    153	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
    154			     ENABLE, 1);
    155	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
    156	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
    157				m->sdmax_rlcx_rb_rptr);
    158	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
    159				m->sdmax_rlcx_rb_rptr_hi);
    160
    161	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
    162	if (read_user_wptr(mm, wptr64, data64)) {
    163		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
    164		       lower_32_bits(data64));
    165		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
    166		       upper_32_bits(data64));
    167	} else {
    168		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
    169		       m->sdmax_rlcx_rb_rptr);
    170		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
    171		       m->sdmax_rlcx_rb_rptr_hi);
    172	}
    173	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
    174
    175	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
    176	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
    177			m->sdmax_rlcx_rb_base_hi);
    178	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
    179			m->sdmax_rlcx_rb_rptr_addr_lo);
    180	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
    181			m->sdmax_rlcx_rb_rptr_addr_hi);
    182
    183	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
    184			     RB_ENABLE, 1);
    185	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
    186
    187	return 0;
    188}
    189
    190int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
    191			     uint32_t engine_id, uint32_t queue_id,
    192			     uint32_t (**dump)[2], uint32_t *n_regs)
    193{
    194	uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
    195			engine_id, queue_id);
    196	uint32_t i = 0, reg;
    197#undef HQD_N_REGS
    198#define HQD_N_REGS (19+6+7+10)
    199
    200	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
    201	if (*dump == NULL)
    202		return -ENOMEM;
    203
    204	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
    205		DUMP_REG(sdma_rlc_reg_offset + reg);
    206	for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
    207		DUMP_REG(sdma_rlc_reg_offset + reg);
    208	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
    209	     reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
    210		DUMP_REG(sdma_rlc_reg_offset + reg);
    211	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
    212	     reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
    213		DUMP_REG(sdma_rlc_reg_offset + reg);
    214
    215	WARN_ON_ONCE(i != HQD_N_REGS);
    216	*n_regs = i;
    217
    218	return 0;
    219}
    220
    221bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev,
    222				void *mqd)
    223{
    224	struct v9_sdma_mqd *m;
    225	uint32_t sdma_rlc_reg_offset;
    226	uint32_t sdma_rlc_rb_cntl;
    227
    228	m = get_sdma_mqd(mqd);
    229	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
    230					    m->sdma_queue_id);
    231
    232	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
    233
    234	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
    235		return true;
    236
    237	return false;
    238}
    239
    240int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
    241				unsigned int utimeout)
    242{
    243	struct v9_sdma_mqd *m;
    244	uint32_t sdma_rlc_reg_offset;
    245	uint32_t temp;
    246	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
    247
    248	m = get_sdma_mqd(mqd);
    249	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
    250					    m->sdma_queue_id);
    251
    252	temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
    253	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
    254	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
    255
    256	while (true) {
    257		temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
    258		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
    259			break;
    260		if (time_after(jiffies, end_jiffies)) {
    261			pr_err("SDMA RLC not idle in %s\n", __func__);
    262			return -ETIME;
    263		}
    264		usleep_range(500, 1000);
    265	}
    266
    267	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
    268	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
    269		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
    270		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
    271
    272	m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
    273	m->sdmax_rlcx_rb_rptr_hi =
    274		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
    275
    276	return 0;
    277}
    278
    279const struct kfd2kgd_calls arcturus_kfd2kgd = {
    280	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
    281	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
    282	.init_interrupts = kgd_gfx_v9_init_interrupts,
    283	.hqd_load = kgd_gfx_v9_hqd_load,
    284	.hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
    285	.hqd_sdma_load = kgd_arcturus_hqd_sdma_load,
    286	.hqd_dump = kgd_gfx_v9_hqd_dump,
    287	.hqd_sdma_dump = kgd_arcturus_hqd_sdma_dump,
    288	.hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
    289	.hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
    290	.hqd_destroy = kgd_gfx_v9_hqd_destroy,
    291	.hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
    292	.wave_control_execute = kgd_gfx_v9_wave_control_execute,
    293	.get_atc_vmid_pasid_mapping_info =
    294				kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
    295	.set_vm_context_page_table_base =
    296				kgd_gfx_v9_set_vm_context_page_table_base,
    297	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
    298	.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
    299};