cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

amdgpu_vcn.h (12558B)


      1/*
      2 * Copyright 2016 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 *
     22 */
     23
     24#ifndef __AMDGPU_VCN_H__
     25#define __AMDGPU_VCN_H__
     26
     27#include "amdgpu_ras.h"
     28
     29#define AMDGPU_VCN_STACK_SIZE		(128*1024)
     30#define AMDGPU_VCN_CONTEXT_SIZE 	(512*1024)
     31
     32#define AMDGPU_VCN_FIRMWARE_OFFSET	256
     33#define AMDGPU_VCN_MAX_ENC_RINGS	3
     34
     35#define AMDGPU_MAX_VCN_INSTANCES	2
     36#define AMDGPU_MAX_VCN_ENC_RINGS  AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES
     37
     38#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0)
     39#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1)
     40
     41#define VCN_DEC_KMD_CMD 		0x80000000
     42#define VCN_DEC_CMD_FENCE		0x00000000
     43#define VCN_DEC_CMD_TRAP		0x00000001
     44#define VCN_DEC_CMD_WRITE_REG		0x00000004
     45#define VCN_DEC_CMD_REG_READ_COND_WAIT	0x00000006
     46#define VCN_DEC_CMD_PACKET_START	0x0000000a
     47#define VCN_DEC_CMD_PACKET_END		0x0000000b
     48
     49#define VCN_DEC_SW_CMD_NO_OP		0x00000000
     50#define VCN_DEC_SW_CMD_END		0x00000001
     51#define VCN_DEC_SW_CMD_IB		0x00000002
     52#define VCN_DEC_SW_CMD_FENCE		0x00000003
     53#define VCN_DEC_SW_CMD_TRAP		0x00000004
     54#define VCN_DEC_SW_CMD_IB_AUTO		0x00000005
     55#define VCN_DEC_SW_CMD_SEMAPHORE	0x00000006
     56#define VCN_DEC_SW_CMD_PREEMPT_FENCE	0x00000009
     57#define VCN_DEC_SW_CMD_REG_WRITE	0x0000000b
     58#define VCN_DEC_SW_CMD_REG_WAIT		0x0000000c
     59
     60#define VCN_ENC_CMD_NO_OP		0x00000000
     61#define VCN_ENC_CMD_END 		0x00000001
     62#define VCN_ENC_CMD_IB			0x00000002
     63#define VCN_ENC_CMD_FENCE		0x00000003
     64#define VCN_ENC_CMD_TRAP		0x00000004
     65#define VCN_ENC_CMD_REG_WRITE		0x0000000b
     66#define VCN_ENC_CMD_REG_WAIT		0x0000000c
     67
     68#define VCN_AON_SOC_ADDRESS_2_0 	0x1f800
     69#define VCN1_AON_SOC_ADDRESS_3_0 	0x48000
     70#define VCN_VID_IP_ADDRESS_2_0		0x0
     71#define VCN_AON_IP_ADDRESS_2_0		0x30000
     72
     73#define mmUVD_RBC_XX_IB_REG_CHECK 					0x026b
     74#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 				1
     75#define mmUVD_REG_XX_MASK 						0x026c
     76#define mmUVD_REG_XX_MASK_BASE_IDX 					1
     77
     78/* 1 second timeout */
     79#define VCN_IDLE_TIMEOUT	msecs_to_jiffies(1000)
     80
     81#define RREG32_SOC15_DPG_MODE_1_0(ip, inst_idx, reg, mask, sram_sel) 			\
     82	({	WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); 			\
     83		WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, 				\
     84			UVD_DPG_LMA_CTL__MASK_EN_MASK | 				\
     85			((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) 	\
     86			<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | 			\
     87			(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); 		\
     88		RREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA); 			\
     89	})
     90
     91#define WREG32_SOC15_DPG_MODE_1_0(ip, inst_idx, reg, value, mask, sram_sel) 		\
     92	do { 										\
     93		WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA, value); 			\
     94		WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); 			\
     95		WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, 				\
     96			UVD_DPG_LMA_CTL__READ_WRITE_MASK | 				\
     97			((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) 	\
     98			<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | 			\
     99			(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); 		\
    100	} while (0)
    101
    102#define SOC15_DPG_MODE_OFFSET(ip, inst_idx, reg) 						\
    103	({											\
    104		uint32_t internal_reg_offset, addr;						\
    105		bool video_range, video1_range, aon_range, aon1_range;				\
    106												\
    107		addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg);		\
    108		addr <<= 2; 									\
    109		video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS_2_0)) && 		\
    110				((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS_2_0 + 0x2600)))));	\
    111		video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS_3_0)) && 		\
    112				((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS_3_0 + 0x2600)))));	\
    113		aon_range   = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS_2_0)) && 		\
    114				((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS_2_0 + 0x600)))));	\
    115		aon1_range   = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS_3_0)) && 		\
    116				((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS_3_0 + 0x600)))));	\
    117		if (video_range) 								\
    118			internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS_2_0) + 	\
    119				(VCN_VID_IP_ADDRESS_2_0));					\
    120		else if (aon_range)								\
    121			internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS_2_0) + 	\
    122				(VCN_AON_IP_ADDRESS_2_0));					\
    123		else if (video1_range) 								\
    124			internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS_3_0) + 	\
    125				(VCN_VID_IP_ADDRESS_2_0));					\
    126		else if (aon1_range)								\
    127			internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS_3_0) + 	\
    128				(VCN_AON_IP_ADDRESS_2_0));					\
    129		else										\
    130			internal_reg_offset = (0xFFFFF & addr);					\
    131												\
    132		internal_reg_offset >>= 2;							\
    133	})
    134
    135#define RREG32_SOC15_DPG_MODE(inst_idx, offset, mask_en) 					\
    136	({											\
    137		WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, 					\
    138			(0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT |				\
    139			mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT |				\
    140			offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT));			\
    141		RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA);				\
    142	})
    143
    144#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect)			\
    145	do {											\
    146		if (!indirect) {								\
    147			WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value);			\
    148			WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, 				\
    149				(0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT |			\
    150				 mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT |			\
    151				 offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT));		\
    152		} else {									\
    153			*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset;		\
    154			*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value;			\
    155		}										\
    156	} while (0)
    157
    158#define AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE (1 << 2)
    159#define AMDGPU_VCN_FW_SHARED_FLAG_0_RB	(1 << 6)
    160#define AMDGPU_VCN_MULTI_QUEUE_FLAG	(1 << 8)
    161#define AMDGPU_VCN_SW_RING_FLAG		(1 << 9)
    162#define AMDGPU_VCN_FW_LOGGING_FLAG	(1 << 10)
    163#define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11)
    164
    165#define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER	0x00000001
    166#define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER		0x00000001
    167
    168#define VCN_CODEC_DISABLE_MASK_AV1  (1 << 0)
    169#define VCN_CODEC_DISABLE_MASK_VP9  (1 << 1)
    170#define VCN_CODEC_DISABLE_MASK_HEVC (1 << 2)
    171#define VCN_CODEC_DISABLE_MASK_H264 (1 << 3)
    172
    173enum fw_queue_mode {
    174	FW_QUEUE_RING_RESET = 1,
    175	FW_QUEUE_DPG_HOLD_OFF = 2,
    176};
    177
    178enum engine_status_constants {
    179	UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
    180	UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0 = 0xAAAA0,
    181	UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0 = 0x2A2A8AA0,
    182	UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
    183	UVD_STATUS__UVD_BUSY = 0x00000004,
    184	GB_ADDR_CONFIG_DEFAULT = 0x26010011,
    185	UVD_STATUS__IDLE = 0x2,
    186	UVD_STATUS__BUSY = 0x5,
    187	UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1,
    188	UVD_STATUS__RBC_BUSY = 0x1,
    189	UVD_PGFSM_STATUS_UVDJ_PWR_ON = 0,
    190};
    191
    192enum internal_dpg_state {
    193	VCN_DPG_STATE__UNPAUSE = 0,
    194	VCN_DPG_STATE__PAUSE,
    195};
    196
    197struct dpg_pause_state {
    198	enum internal_dpg_state fw_based;
    199	enum internal_dpg_state jpeg;
    200};
    201
    202struct amdgpu_vcn_reg{
    203	unsigned	data0;
    204	unsigned	data1;
    205	unsigned	cmd;
    206	unsigned	nop;
    207	unsigned	context_id;
    208	unsigned	ib_vmid;
    209	unsigned	ib_bar_low;
    210	unsigned	ib_bar_high;
    211	unsigned	ib_size;
    212	unsigned	gp_scratch8;
    213	unsigned	scratch9;
    214};
    215
    216struct amdgpu_vcn_fw_shared {
    217	void        *cpu_addr;
    218	uint64_t    gpu_addr;
    219	uint32_t    mem_size;
    220	uint32_t    log_offset;
    221};
    222
    223struct amdgpu_vcn_inst {
    224	struct amdgpu_bo	*vcpu_bo;
    225	void			*cpu_addr;
    226	uint64_t		gpu_addr;
    227	void			*saved_bo;
    228	struct amdgpu_ring	ring_dec;
    229	struct amdgpu_ring	ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
    230	atomic_t		sched_score;
    231	struct amdgpu_irq_src	irq;
    232	struct amdgpu_vcn_reg	external;
    233	struct amdgpu_bo	*dpg_sram_bo;
    234	struct dpg_pause_state	pause_state;
    235	void			*dpg_sram_cpu_addr;
    236	uint64_t		dpg_sram_gpu_addr;
    237	uint32_t		*dpg_sram_curr_addr;
    238	atomic_t		dpg_enc_submission_cnt;
    239	struct amdgpu_vcn_fw_shared fw_shared;
    240};
    241
    242struct amdgpu_vcn_ras {
    243	struct amdgpu_ras_block_object ras_block;
    244};
    245
    246struct amdgpu_vcn {
    247	unsigned		fw_version;
    248	struct delayed_work	idle_work;
    249	const struct firmware	*fw;	/* VCN firmware */
    250	unsigned		num_enc_rings;
    251	enum amd_powergating_state cur_state;
    252	bool			indirect_sram;
    253
    254	uint8_t	num_vcn_inst;
    255	struct amdgpu_vcn_inst	 inst[AMDGPU_MAX_VCN_INSTANCES];
    256	uint8_t			 vcn_config[AMDGPU_MAX_VCN_INSTANCES];
    257	uint32_t		 vcn_codec_disable_mask[AMDGPU_MAX_VCN_INSTANCES];
    258	struct amdgpu_vcn_reg	 internal;
    259	struct mutex		 vcn_pg_lock;
    260	struct mutex		vcn1_jpeg1_workaround;
    261	atomic_t		 total_submission_cnt;
    262
    263	unsigned	harvest_config;
    264	int (*pause_dpg_mode)(struct amdgpu_device *adev,
    265		int inst_idx, struct dpg_pause_state *new_state);
    266
    267	struct ras_common_if    *ras_if;
    268	struct amdgpu_vcn_ras   *ras;
    269};
    270
    271struct amdgpu_fw_shared_rb_ptrs_struct {
    272	/* to WA DPG R/W ptr issues.*/
    273	uint32_t  rptr;
    274	uint32_t  wptr;
    275};
    276
    277struct amdgpu_fw_shared_multi_queue {
    278	uint8_t decode_queue_mode;
    279	uint8_t encode_generalpurpose_queue_mode;
    280	uint8_t encode_lowlatency_queue_mode;
    281	uint8_t encode_realtime_queue_mode;
    282	uint8_t padding[4];
    283};
    284
    285struct amdgpu_fw_shared_sw_ring {
    286	uint8_t is_enabled;
    287	uint8_t padding[3];
    288};
    289
    290struct amdgpu_fw_shared_unified_queue_struct {
    291	uint8_t is_enabled;
    292	uint8_t queue_mode;
    293	uint8_t queue_status;
    294	uint8_t padding[5];
    295};
    296
    297struct amdgpu_fw_shared_fw_logging {
    298	uint8_t is_enabled;
    299	uint32_t addr_lo;
    300	uint32_t addr_hi;
    301	uint32_t size;
    302};
    303
    304struct amdgpu_fw_shared_smu_interface_info {
    305	uint8_t smu_interface_type;
    306	uint8_t padding[3];
    307};
    308
    309struct amdgpu_fw_shared {
    310	uint32_t present_flag_0;
    311	uint8_t pad[44];
    312	struct amdgpu_fw_shared_rb_ptrs_struct rb;
    313	uint8_t pad1[1];
    314	struct amdgpu_fw_shared_multi_queue multi_queue;
    315	struct amdgpu_fw_shared_sw_ring sw_ring;
    316	struct amdgpu_fw_shared_fw_logging fw_log;
    317	struct amdgpu_fw_shared_smu_interface_info smu_interface_info;
    318};
    319
    320struct amdgpu_vcn4_fw_shared {
    321	uint32_t present_flag_0;
    322	uint8_t pad[12];
    323	struct amdgpu_fw_shared_unified_queue_struct sq;
    324	uint8_t pad1[8];
    325	struct amdgpu_fw_shared_fw_logging fw_log;
    326};
    327
    328struct amdgpu_vcn_fwlog {
    329	uint32_t rptr;
    330	uint32_t wptr;
    331	uint32_t buffer_size;
    332	uint32_t header_size;
    333	uint8_t wrapped;
    334};
    335
    336struct amdgpu_vcn_decode_buffer {
    337	uint32_t valid_buf_flag;
    338	uint32_t msg_buffer_address_hi;
    339	uint32_t msg_buffer_address_lo;
    340	uint32_t pad[30];
    341};
    342
    343#define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80
    344#define VCN_BLOCK_DECODE_DISABLE_MASK 0x40
    345#define VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0
    346
    347enum vcn_ring_type {
    348	VCN_ENCODE_RING,
    349	VCN_DECODE_RING,
    350	VCN_UNIFIED_RING,
    351};
    352
    353int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
    354int amdgpu_vcn_sw_fini(struct amdgpu_device *adev);
    355int amdgpu_vcn_suspend(struct amdgpu_device *adev);
    356int amdgpu_vcn_resume(struct amdgpu_device *adev);
    357void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring);
    358void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring);
    359
    360bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev,
    361				enum vcn_ring_type type, uint32_t vcn_instance);
    362
    363int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring);
    364int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
    365int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring);
    366int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout);
    367
    368int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
    369int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
    370
    371enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring);
    372
    373void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev);
    374
    375void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn);
    376void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev,
    377                                   uint8_t i, struct amdgpu_vcn_inst *vcn);
    378
    379int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
    380			struct amdgpu_irq_src *source,
    381			struct amdgpu_iv_entry *entry);
    382
    383#endif