cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sdma_v4_4.c (9062B)


      1/*
      2 * Copyright 2020 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 *
     22 */
     23#include "amdgpu.h"
     24#include "sdma/sdma_4_4_0_offset.h"
     25#include "sdma/sdma_4_4_0_sh_mask.h"
     26#include "soc15.h"
     27#include "amdgpu_ras.h"
     28
     29#define SDMA1_REG_OFFSET 0x600
     30#define SDMA2_REG_OFFSET 0x1cda0
     31#define SDMA3_REG_OFFSET 0x1d1a0
     32#define SDMA4_REG_OFFSET 0x1d5a0
     33
     34/* helper function that allow only use sdma0 register offset
     35 * to calculate register offset for all the sdma instances */
     36static uint32_t sdma_v4_4_get_reg_offset(struct amdgpu_device *adev,
     37					 uint32_t instance,
     38					 uint32_t offset)
     39{
     40	uint32_t sdma_base = adev->reg_offset[SDMA0_HWIP][0][0];
     41
     42	switch (instance) {
     43	case 0:
     44		return (sdma_base + offset);
     45	case 1:
     46		return (sdma_base + SDMA1_REG_OFFSET + offset);
     47	case 2:
     48		return (sdma_base + SDMA2_REG_OFFSET + offset);
     49	case 3:
     50		return (sdma_base + SDMA3_REG_OFFSET + offset);
     51	case 4:
     52		return (sdma_base + SDMA4_REG_OFFSET + offset);
     53	default:
     54		break;
     55	}
     56	return 0;
     57}
     58
     59static const struct soc15_ras_field_entry sdma_v4_4_ras_fields[] = {
     60	{ "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
     61	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED),
     62	0, 0,
     63	},
     64	{ "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
     65	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED),
     66	0, 0,
     67	},
     68	{ "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
     69	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED),
     70	0, 0,
     71	},
     72	{ "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
     73	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED),
     74	0, 0,
     75	},
     76	{ "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
     77	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED),
     78	0, 0,
     79	},
     80	{ "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
     81	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED),
     82	0, 0,
     83	},
     84	{ "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
     85	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED),
     86	0, 0,
     87	},
     88	{ "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
     89	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED),
     90	0, 0,
     91	},
     92	{ "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
     93	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED),
     94	0, 0,
     95	},
     96	{ "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
     97	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED),
     98	0, 0,
     99	},
    100	{ "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
    101	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED),
    102	0, 0,
    103	},
    104	{ "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
    105	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED),
    106	0, 0,
    107	},
    108	{ "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
    109	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED),
    110	0, 0,
    111	},
    112	{ "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
    113	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED),
    114	0, 0,
    115	},
    116	{ "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
    117	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED),
    118	0, 0,
    119	},
    120	{ "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
    121	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED),
    122	0, 0,
    123	},
    124	{ "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
    125	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UCODE_BUF_SED),
    126	0, 0,
    127	},
    128	{ "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
    129	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_RB_CMD_BUF_SED),
    130	0, 0,
    131	},
    132	{ "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
    133	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_IB_CMD_BUF_SED),
    134	0, 0,
    135	},
    136	{ "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
    137	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RD_FIFO_SED),
    138	0, 0,
    139	},
    140	{ "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
    141	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RDBST_FIFO_SED),
    142	0, 0,
    143	},
    144	{ "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
    145	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_DATA_LUT_FIFO_SED),
    146	0, 0,
    147	},
    148	{ "SDMA_SPLIT_DATA_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
    149	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_SPLIT_DATA_BUF_SED),
    150	0, 0,
    151	},
    152	{ "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
    153	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_MC_WR_ADDR_FIFO_SED),
    154	0, 0,
    155	},
    156	{ "SDMA_MC_RDRET_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
    157	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_MC_WR_ADDR_FIFO_SED),
    158	0, 0,
    159	},
    160};
    161
    162static void sdma_v4_4_get_ras_error_count(struct amdgpu_device *adev,
    163					  uint32_t reg_offset,
    164					  uint32_t value,
    165					  uint32_t instance,
    166					  uint32_t *sec_count)
    167{
    168	uint32_t i;
    169	uint32_t sec_cnt;
    170
    171	/* double bits error (multiple bits) error detection is not supported */
    172	for (i = 0; i < ARRAY_SIZE(sdma_v4_4_ras_fields); i++) {
    173		if (sdma_v4_4_ras_fields[i].reg_offset != reg_offset)
    174			continue;
    175
    176		/* the SDMA_EDC_COUNTER register in each sdma instance
    177		 * shares the same sed shift_mask
    178		 * */
    179		sec_cnt = (value &
    180			sdma_v4_4_ras_fields[i].sec_count_mask) >>
    181			sdma_v4_4_ras_fields[i].sec_count_shift;
    182		if (sec_cnt) {
    183			dev_info(adev->dev, "Detected %s in SDMA%d, SED %d\n",
    184				 sdma_v4_4_ras_fields[i].name,
    185				 instance, sec_cnt);
    186			*sec_count += sec_cnt;
    187		}
    188	}
    189}
    190
    191static int sdma_v4_4_query_ras_error_count_by_instance(struct amdgpu_device *adev,
    192					   uint32_t instance,
    193					   void *ras_error_status)
    194{
    195	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
    196	uint32_t sec_count = 0;
    197	uint32_t reg_value = 0;
    198	uint32_t reg_offset = 0;
    199
    200	reg_offset = sdma_v4_4_get_reg_offset(adev, instance, regSDMA0_EDC_COUNTER);
    201	reg_value = RREG32(reg_offset);
    202	/* double bit error is not supported */
    203	if (reg_value)
    204		sdma_v4_4_get_ras_error_count(adev, regSDMA0_EDC_COUNTER, reg_value,
    205					      instance, &sec_count);
    206
    207	reg_offset = sdma_v4_4_get_reg_offset(adev, instance, regSDMA0_EDC_COUNTER2);
    208	reg_value = RREG32(reg_offset);
    209	/* double bit error is not supported */
    210	if (reg_value)
    211		sdma_v4_4_get_ras_error_count(adev, regSDMA0_EDC_COUNTER2, reg_value,
    212					      instance, &sec_count);
    213
    214	/*
    215	 * err_data->ue_count should be initialized to 0
    216	 * before calling into this function
    217	 *
    218	 * SDMA RAS supports single bit uncorrectable error detection.
    219	 * So, increment uncorrectable error count.
    220	 */
    221	err_data->ue_count += sec_count;
    222
    223	/*
    224	 * SDMA RAS does not support correctable errors.
    225	 * Set ce count to 0.
    226	 */
    227	err_data->ce_count = 0;
    228
    229	return 0;
    230};
    231
    232static void sdma_v4_4_reset_ras_error_count(struct amdgpu_device *adev)
    233{
    234	int i;
    235	uint32_t reg_offset;
    236
    237	/* write 0 to EDC_COUNTER reg to clear sdma edc counters */
    238	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
    239		for (i = 0; i < adev->sdma.num_instances; i++) {
    240			reg_offset = sdma_v4_4_get_reg_offset(adev, i, regSDMA0_EDC_COUNTER);
    241			WREG32(reg_offset, 0);
    242			reg_offset = sdma_v4_4_get_reg_offset(adev, i, regSDMA0_EDC_COUNTER2);
    243			WREG32(reg_offset, 0);
    244		}
    245	}
    246}
    247
    248static void sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev,  void *ras_error_status)
    249{
    250	int i = 0;
    251
    252	for (i = 0; i < adev->sdma.num_instances; i++) {
    253		if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, ras_error_status)) {
    254			dev_err(adev->dev, "Query ras error count failed in SDMA%d\n", i);
    255			return;
    256		}
    257	}
    258
    259}
    260
    261const struct amdgpu_ras_block_hw_ops sdma_v4_4_ras_hw_ops = {
    262	.query_ras_error_count = sdma_v4_4_query_ras_error_count,
    263	.reset_ras_error_count = sdma_v4_4_reset_ras_error_count,
    264};
    265
    266struct amdgpu_sdma_ras sdma_v4_4_ras = {
    267	.ras_block = {
    268		.hw_ops = &sdma_v4_4_ras_hw_ops,
    269	},
    270};