cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

amdgpu_ras.c (79042B)


      1/*
      2 * Copyright 2018 Advanced Micro Devices, Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 *
     22 *
     23 */
     24#include <linux/debugfs.h>
     25#include <linux/list.h>
     26#include <linux/module.h>
     27#include <linux/uaccess.h>
     28#include <linux/reboot.h>
     29#include <linux/syscalls.h>
     30#include <linux/pm_runtime.h>
     31
     32#include "amdgpu.h"
     33#include "amdgpu_ras.h"
     34#include "amdgpu_atomfirmware.h"
     35#include "amdgpu_xgmi.h"
     36#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
     37#include "atom.h"
     38#ifdef CONFIG_X86_MCE_AMD
     39#include <asm/mce.h>
     40
     41static bool notifier_registered;
     42#endif
     43static const char *RAS_FS_NAME = "ras";
     44
     45const char *ras_error_string[] = {
     46	"none",
     47	"parity",
     48	"single_correctable",
     49	"multi_uncorrectable",
     50	"poison",
     51};
     52
     53const char *ras_block_string[] = {
     54	"umc",
     55	"sdma",
     56	"gfx",
     57	"mmhub",
     58	"athub",
     59	"pcie_bif",
     60	"hdp",
     61	"xgmi_wafl",
     62	"df",
     63	"smn",
     64	"sem",
     65	"mp0",
     66	"mp1",
     67	"fuse",
     68	"mca",
     69	"vcn",
     70	"jpeg",
     71};
     72
     73const char *ras_mca_block_string[] = {
     74	"mca_mp0",
     75	"mca_mp1",
     76	"mca_mpio",
     77	"mca_iohc",
     78};
     79
     80struct amdgpu_ras_block_list {
     81	/* ras block link */
     82	struct list_head node;
     83
     84	struct amdgpu_ras_block_object *ras_obj;
     85};
     86
     87const char *get_ras_block_str(struct ras_common_if *ras_block)
     88{
     89	if (!ras_block)
     90		return "NULL";
     91
     92	if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT)
     93		return "OUT OF RANGE";
     94
     95	if (ras_block->block == AMDGPU_RAS_BLOCK__MCA)
     96		return ras_mca_block_string[ras_block->sub_block_index];
     97
     98	return ras_block_string[ras_block->block];
     99}
    100
    101#define ras_block_str(_BLOCK_) \
    102	(((_BLOCK_) < ARRAY_SIZE(ras_block_string)) ? ras_block_string[_BLOCK_] : "Out Of Range")
    103
    104#define ras_err_str(i) (ras_error_string[ffs(i)])
    105
    106#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
    107
    108/* inject address is 52 bits */
    109#define	RAS_UMC_INJECT_ADDR_LIMIT	(0x1ULL << 52)
    110
    111/* typical ECC bad page rate is 1 bad page per 100MB VRAM */
    112#define RAS_BAD_PAGE_COVER              (100 * 1024 * 1024ULL)
    113
    114enum amdgpu_ras_retire_page_reservation {
    115	AMDGPU_RAS_RETIRE_PAGE_RESERVED,
    116	AMDGPU_RAS_RETIRE_PAGE_PENDING,
    117	AMDGPU_RAS_RETIRE_PAGE_FAULT,
    118};
    119
    120atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
    121
    122static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
    123				uint64_t addr);
    124static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
    125				uint64_t addr);
    126#ifdef CONFIG_X86_MCE_AMD
    127static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
    128struct mce_notifier_adev_list {
    129	struct amdgpu_device *devs[MAX_GPU_INSTANCE];
    130	int num_gpu;
    131};
    132static struct mce_notifier_adev_list mce_adev_list;
    133#endif
    134
    135void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
    136{
    137	if (adev && amdgpu_ras_get_context(adev))
    138		amdgpu_ras_get_context(adev)->error_query_ready = ready;
    139}
    140
    141static bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev)
    142{
    143	if (adev && amdgpu_ras_get_context(adev))
    144		return amdgpu_ras_get_context(adev)->error_query_ready;
    145
    146	return false;
    147}
    148
    149static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t address)
    150{
    151	struct ras_err_data err_data = {0, 0, 0, NULL};
    152	struct eeprom_table_record err_rec;
    153
    154	if ((address >= adev->gmc.mc_vram_size) ||
    155	    (address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
    156		dev_warn(adev->dev,
    157		         "RAS WARN: input address 0x%llx is invalid.\n",
    158		         address);
    159		return -EINVAL;
    160	}
    161
    162	if (amdgpu_ras_check_bad_page(adev, address)) {
    163		dev_warn(adev->dev,
    164			 "RAS WARN: 0x%llx has already been marked as bad page!\n",
    165			 address);
    166		return 0;
    167	}
    168
    169	memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
    170	err_data.err_addr = &err_rec;
    171	amdgpu_umc_fill_error_record(&err_data, address,
    172			(address >> AMDGPU_GPU_PAGE_SHIFT), 0, 0);
    173
    174	if (amdgpu_bad_page_threshold != 0) {
    175		amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
    176					 err_data.err_addr_cnt);
    177		amdgpu_ras_save_bad_pages(adev);
    178	}
    179
    180	dev_warn(adev->dev, "WARNING: THIS IS ONLY FOR TEST PURPOSES AND WILL CORRUPT RAS EEPROM\n");
    181	dev_warn(adev->dev, "Clear EEPROM:\n");
    182	dev_warn(adev->dev, "    echo 1 > /sys/kernel/debug/dri/0/ras/ras_eeprom_reset\n");
    183
    184	return 0;
    185}
    186
    187static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
    188					size_t size, loff_t *pos)
    189{
    190	struct ras_manager *obj = (struct ras_manager *)file_inode(f)->i_private;
    191	struct ras_query_if info = {
    192		.head = obj->head,
    193	};
    194	ssize_t s;
    195	char val[128];
    196
    197	if (amdgpu_ras_query_error_status(obj->adev, &info))
    198		return -EINVAL;
    199
    200	/* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */
    201	if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
    202	    obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
    203		if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
    204			dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
    205	}
    206
    207	s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n",
    208			"ue", info.ue_count,
    209			"ce", info.ce_count);
    210	if (*pos >= s)
    211		return 0;
    212
    213	s -= *pos;
    214	s = min_t(u64, s, size);
    215
    216
    217	if (copy_to_user(buf, &val[*pos], s))
    218		return -EINVAL;
    219
    220	*pos += s;
    221
    222	return s;
    223}
    224
    225static const struct file_operations amdgpu_ras_debugfs_ops = {
    226	.owner = THIS_MODULE,
    227	.read = amdgpu_ras_debugfs_read,
    228	.write = NULL,
    229	.llseek = default_llseek
    230};
    231
    232static int amdgpu_ras_find_block_id_by_name(const char *name, int *block_id)
    233{
    234	int i;
    235
    236	for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) {
    237		*block_id = i;
    238		if (strcmp(name, ras_block_string[i]) == 0)
    239			return 0;
    240	}
    241	return -EINVAL;
    242}
    243
    244static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
    245		const char __user *buf, size_t size,
    246		loff_t *pos, struct ras_debug_if *data)
    247{
    248	ssize_t s = min_t(u64, 64, size);
    249	char str[65];
    250	char block_name[33];
    251	char err[9] = "ue";
    252	int op = -1;
    253	int block_id;
    254	uint32_t sub_block;
    255	u64 address, value;
    256
    257	if (*pos)
    258		return -EINVAL;
    259	*pos = size;
    260
    261	memset(str, 0, sizeof(str));
    262	memset(data, 0, sizeof(*data));
    263
    264	if (copy_from_user(str, buf, s))
    265		return -EINVAL;
    266
    267	if (sscanf(str, "disable %32s", block_name) == 1)
    268		op = 0;
    269	else if (sscanf(str, "enable %32s %8s", block_name, err) == 2)
    270		op = 1;
    271	else if (sscanf(str, "inject %32s %8s", block_name, err) == 2)
    272		op = 2;
    273	else if (strstr(str, "retire_page") != NULL)
    274		op = 3;
    275	else if (str[0] && str[1] && str[2] && str[3])
    276		/* ascii string, but commands are not matched. */
    277		return -EINVAL;
    278
    279	if (op != -1) {
    280		if (op == 3) {
    281			if (sscanf(str, "%*s 0x%llx", &address) != 1 &&
    282			    sscanf(str, "%*s %llu", &address) != 1)
    283				return -EINVAL;
    284
    285			data->op = op;
    286			data->inject.address = address;
    287
    288			return 0;
    289		}
    290
    291		if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
    292			return -EINVAL;
    293
    294		data->head.block = block_id;
    295		/* only ue and ce errors are supported */
    296		if (!memcmp("ue", err, 2))
    297			data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
    298		else if (!memcmp("ce", err, 2))
    299			data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
    300		else
    301			return -EINVAL;
    302
    303		data->op = op;
    304
    305		if (op == 2) {
    306			if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
    307				   &sub_block, &address, &value) != 3 &&
    308			    sscanf(str, "%*s %*s %*s %u %llu %llu",
    309				   &sub_block, &address, &value) != 3)
    310				return -EINVAL;
    311			data->head.sub_block_index = sub_block;
    312			data->inject.address = address;
    313			data->inject.value = value;
    314		}
    315	} else {
    316		if (size < sizeof(*data))
    317			return -EINVAL;
    318
    319		if (copy_from_user(data, buf, sizeof(*data)))
    320			return -EINVAL;
    321	}
    322
    323	return 0;
    324}
    325
    326/**
    327 * DOC: AMDGPU RAS debugfs control interface
    328 *
    329 * The control interface accepts struct ras_debug_if which has two members.
    330 *
    331 * First member: ras_debug_if::head or ras_debug_if::inject.
    332 *
    333 * head is used to indicate which IP block will be under control.
    334 *
    335 * head has four members, they are block, type, sub_block_index, name.
    336 * block: which IP will be under control.
    337 * type: what kind of error will be enabled/disabled/injected.
    338 * sub_block_index: some IPs have subcomponets. say, GFX, sDMA.
    339 * name: the name of IP.
    340 *
    341 * inject has two more members than head, they are address, value.
    342 * As their names indicate, inject operation will write the
    343 * value to the address.
    344 *
    345 * The second member: struct ras_debug_if::op.
    346 * It has three kinds of operations.
    347 *
    348 * - 0: disable RAS on the block. Take ::head as its data.
    349 * - 1: enable RAS on the block. Take ::head as its data.
    350 * - 2: inject errors on the block. Take ::inject as its data.
    351 *
    352 * How to use the interface?
    353 *
    354 * In a program
    355 *
    356 * Copy the struct ras_debug_if in your code and initialize it.
    357 * Write the struct to the control interface.
    358 *
    359 * From shell
    360 *
    361 * .. code-block:: bash
    362 *
    363 *	echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
    364 *	echo "enable  <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
    365 *	echo "inject  <block> <error> <sub-block> <address> <value> > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
    366 *
    367 * Where N, is the card which you want to affect.
    368 *
    369 * "disable" requires only the block.
    370 * "enable" requires the block and error type.
    371 * "inject" requires the block, error type, address, and value.
    372 *
    373 * The block is one of: umc, sdma, gfx, etc.
    374 *	see ras_block_string[] for details
    375 *
    376 * The error type is one of: ue, ce, where,
    377 *	ue is multi-uncorrectable
    378 *	ce is single-correctable
    379 *
    380 * The sub-block is a the sub-block index, pass 0 if there is no sub-block.
    381 * The address and value are hexadecimal numbers, leading 0x is optional.
    382 *
    383 * For instance,
    384 *
    385 * .. code-block:: bash
    386 *
    387 *	echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
    388 *	echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
    389 *	echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
    390 *
    391 * How to check the result of the operation?
    392 *
    393 * To check disable/enable, see "ras" features at,
    394 * /sys/class/drm/card[0/1/2...]/device/ras/features
    395 *
    396 * To check inject, see the corresponding error count at,
    397 * /sys/class/drm/card[0/1/2...]/device/ras/[gfx|sdma|umc|...]_err_count
    398 *
    399 * .. note::
    400 *	Operations are only allowed on blocks which are supported.
    401 *	Check the "ras" mask at /sys/module/amdgpu/parameters/ras_mask
    402 *	to see which blocks support RAS on a particular asic.
    403 *
    404 */
    405static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
    406					     const char __user *buf,
    407					     size_t size, loff_t *pos)
    408{
    409	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
    410	struct ras_debug_if data;
    411	int ret = 0;
    412
    413	if (!amdgpu_ras_get_error_query_ready(adev)) {
    414		dev_warn(adev->dev, "RAS WARN: error injection "
    415				"currently inaccessible\n");
    416		return size;
    417	}
    418
    419	ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data);
    420	if (ret)
    421		return ret;
    422
    423	if (data.op == 3) {
    424		ret = amdgpu_reserve_page_direct(adev, data.inject.address);
    425		if (!ret)
    426			return size;
    427		else
    428			return ret;
    429	}
    430
    431	if (!amdgpu_ras_is_supported(adev, data.head.block))
    432		return -EINVAL;
    433
    434	switch (data.op) {
    435	case 0:
    436		ret = amdgpu_ras_feature_enable(adev, &data.head, 0);
    437		break;
    438	case 1:
    439		ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
    440		break;
    441	case 2:
    442		if ((data.inject.address >= adev->gmc.mc_vram_size) ||
    443		    (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
    444			dev_warn(adev->dev, "RAS WARN: input address "
    445					"0x%llx is invalid.",
    446					data.inject.address);
    447			ret = -EINVAL;
    448			break;
    449		}
    450
    451		/* umc ce/ue error injection for a bad page is not allowed */
    452		if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
    453		    amdgpu_ras_check_bad_page(adev, data.inject.address)) {
    454			dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has "
    455				 "already been marked as bad!\n",
    456				 data.inject.address);
    457			break;
    458		}
    459
    460		/* data.inject.address is offset instead of absolute gpu address */
    461		ret = amdgpu_ras_error_inject(adev, &data.inject);
    462		break;
    463	default:
    464		ret = -EINVAL;
    465		break;
    466	}
    467
    468	if (ret)
    469		return ret;
    470
    471	return size;
    472}
    473
    474/**
    475 * DOC: AMDGPU RAS debugfs EEPROM table reset interface
    476 *
    477 * Some boards contain an EEPROM which is used to persistently store a list of
    478 * bad pages which experiences ECC errors in vram.  This interface provides
    479 * a way to reset the EEPROM, e.g., after testing error injection.
    480 *
    481 * Usage:
    482 *
    483 * .. code-block:: bash
    484 *
    485 *	echo 1 > ../ras/ras_eeprom_reset
    486 *
    487 * will reset EEPROM table to 0 entries.
    488 *
    489 */
    490static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f,
    491					       const char __user *buf,
    492					       size_t size, loff_t *pos)
    493{
    494	struct amdgpu_device *adev =
    495		(struct amdgpu_device *)file_inode(f)->i_private;
    496	int ret;
    497
    498	ret = amdgpu_ras_eeprom_reset_table(
    499		&(amdgpu_ras_get_context(adev)->eeprom_control));
    500
    501	if (!ret) {
    502		/* Something was written to EEPROM.
    503		 */
    504		amdgpu_ras_get_context(adev)->flags = RAS_DEFAULT_FLAGS;
    505		return size;
    506	} else {
    507		return ret;
    508	}
    509}
    510
    511static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
    512	.owner = THIS_MODULE,
    513	.read = NULL,
    514	.write = amdgpu_ras_debugfs_ctrl_write,
    515	.llseek = default_llseek
    516};
    517
    518static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
    519	.owner = THIS_MODULE,
    520	.read = NULL,
    521	.write = amdgpu_ras_debugfs_eeprom_write,
    522	.llseek = default_llseek
    523};
    524
    525/**
    526 * DOC: AMDGPU RAS sysfs Error Count Interface
    527 *
    528 * It allows the user to read the error count for each IP block on the gpu through
    529 * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
    530 *
    531 * It outputs the multiple lines which report the uncorrected (ue) and corrected
    532 * (ce) error counts.
    533 *
    534 * The format of one line is below,
    535 *
    536 * [ce|ue]: count
    537 *
    538 * Example:
    539 *
    540 * .. code-block:: bash
    541 *
    542 *	ue: 0
    543 *	ce: 1
    544 *
    545 */
    546static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
    547		struct device_attribute *attr, char *buf)
    548{
    549	struct ras_manager *obj = container_of(attr, struct ras_manager, sysfs_attr);
    550	struct ras_query_if info = {
    551		.head = obj->head,
    552	};
    553
    554	if (!amdgpu_ras_get_error_query_ready(obj->adev))
    555		return sysfs_emit(buf, "Query currently inaccessible\n");
    556
    557	if (amdgpu_ras_query_error_status(obj->adev, &info))
    558		return -EINVAL;
    559
    560	if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
    561	    obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
    562		if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
    563			dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
    564	}
    565
    566	return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
    567			  "ce", info.ce_count);
    568}
    569
    570/* obj begin */
    571
    572#define get_obj(obj) do { (obj)->use++; } while (0)
    573#define alive_obj(obj) ((obj)->use)
    574
    575static inline void put_obj(struct ras_manager *obj)
    576{
    577	if (obj && (--obj->use == 0))
    578		list_del(&obj->node);
    579	if (obj && (obj->use < 0))
    580		DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", get_ras_block_str(&obj->head));
    581}
    582
    583/* make one obj and return it. */
    584static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
    585		struct ras_common_if *head)
    586{
    587	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
    588	struct ras_manager *obj;
    589
    590	if (!adev->ras_enabled || !con)
    591		return NULL;
    592
    593	if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
    594		return NULL;
    595
    596	if (head->block == AMDGPU_RAS_BLOCK__MCA) {
    597		if (head->sub_block_index >= AMDGPU_RAS_MCA_BLOCK__LAST)
    598			return NULL;
    599
    600		obj = &con->objs[AMDGPU_RAS_BLOCK__LAST + head->sub_block_index];
    601	} else
    602		obj = &con->objs[head->block];
    603
    604	/* already exist. return obj? */
    605	if (alive_obj(obj))
    606		return NULL;
    607
    608	obj->head = *head;
    609	obj->adev = adev;
    610	list_add(&obj->node, &con->head);
    611	get_obj(obj);
    612
    613	return obj;
    614}
    615
    616/* return an obj equal to head, or the first when head is NULL */
    617struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
    618		struct ras_common_if *head)
    619{
    620	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
    621	struct ras_manager *obj;
    622	int i;
    623
    624	if (!adev->ras_enabled || !con)
    625		return NULL;
    626
    627	if (head) {
    628		if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
    629			return NULL;
    630
    631		if (head->block == AMDGPU_RAS_BLOCK__MCA) {
    632			if (head->sub_block_index >= AMDGPU_RAS_MCA_BLOCK__LAST)
    633				return NULL;
    634
    635			obj = &con->objs[AMDGPU_RAS_BLOCK__LAST + head->sub_block_index];
    636		} else
    637			obj = &con->objs[head->block];
    638
    639		if (alive_obj(obj))
    640			return obj;
    641	} else {
    642		for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT + AMDGPU_RAS_MCA_BLOCK_COUNT; i++) {
    643			obj = &con->objs[i];
    644			if (alive_obj(obj))
    645				return obj;
    646		}
    647	}
    648
    649	return NULL;
    650}
    651/* obj end */
    652
    653/* feature ctl begin */
    654static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
    655					 struct ras_common_if *head)
    656{
    657	return adev->ras_hw_enabled & BIT(head->block);
    658}
    659
    660static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev,
    661		struct ras_common_if *head)
    662{
    663	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
    664
    665	return con->features & BIT(head->block);
    666}
    667
    668/*
    669 * if obj is not created, then create one.
    670 * set feature enable flag.
    671 */
    672static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
    673		struct ras_common_if *head, int enable)
    674{
    675	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
    676	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
    677
    678	/* If hardware does not support ras, then do not create obj.
    679	 * But if hardware support ras, we can create the obj.
    680	 * Ras framework checks con->hw_supported to see if it need do
    681	 * corresponding initialization.
    682	 * IP checks con->support to see if it need disable ras.
    683	 */
    684	if (!amdgpu_ras_is_feature_allowed(adev, head))
    685		return 0;
    686
    687	if (enable) {
    688		if (!obj) {
    689			obj = amdgpu_ras_create_obj(adev, head);
    690			if (!obj)
    691				return -EINVAL;
    692		} else {
    693			/* In case we create obj somewhere else */
    694			get_obj(obj);
    695		}
    696		con->features |= BIT(head->block);
    697	} else {
    698		if (obj && amdgpu_ras_is_feature_enabled(adev, head)) {
    699			con->features &= ~BIT(head->block);
    700			put_obj(obj);
    701		}
    702	}
    703
    704	return 0;
    705}
    706
    707/* wrapper of psp_ras_enable_features */
    708int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
    709		struct ras_common_if *head, bool enable)
    710{
    711	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
    712	union ta_ras_cmd_input *info;
    713	int ret;
    714
    715	if (!con)
    716		return -EINVAL;
    717
    718	info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
    719	if (!info)
    720		return -ENOMEM;
    721
    722	if (!enable) {
    723		info->disable_features = (struct ta_ras_disable_features_input) {
    724			.block_id =  amdgpu_ras_block_to_ta(head->block),
    725			.error_type = amdgpu_ras_error_to_ta(head->type),
    726		};
    727	} else {
    728		info->enable_features = (struct ta_ras_enable_features_input) {
    729			.block_id =  amdgpu_ras_block_to_ta(head->block),
    730			.error_type = amdgpu_ras_error_to_ta(head->type),
    731		};
    732	}
    733
    734	/* Do not enable if it is not allowed. */
    735	WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head));
    736
    737	/* Only enable ras feature operation handle on host side */
    738	if (!amdgpu_sriov_vf(adev) &&
    739		!amdgpu_ras_intr_triggered()) {
    740		ret = psp_ras_enable_features(&adev->psp, info, enable);
    741		if (ret) {
    742			dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n",
    743				enable ? "enable":"disable",
    744				get_ras_block_str(head),
    745				amdgpu_ras_is_poison_mode_supported(adev), ret);
    746			goto out;
    747		}
    748	}
    749
    750	/* setup the obj */
    751	__amdgpu_ras_feature_enable(adev, head, enable);
    752	ret = 0;
    753out:
    754	kfree(info);
    755	return ret;
    756}
    757
    758/* Only used in device probe stage and called only once. */
    759int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
    760		struct ras_common_if *head, bool enable)
    761{
    762	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
    763	int ret;
    764
    765	if (!con)
    766		return -EINVAL;
    767
    768	if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
    769		if (enable) {
    770			/* There is no harm to issue a ras TA cmd regardless of
    771			 * the currecnt ras state.
    772			 * If current state == target state, it will do nothing
    773			 * But sometimes it requests driver to reset and repost
    774			 * with error code -EAGAIN.
    775			 */
    776			ret = amdgpu_ras_feature_enable(adev, head, 1);
    777			/* With old ras TA, we might fail to enable ras.
    778			 * Log it and just setup the object.
    779			 * TODO need remove this WA in the future.
    780			 */
    781			if (ret == -EINVAL) {
    782				ret = __amdgpu_ras_feature_enable(adev, head, 1);
    783				if (!ret)
    784					dev_info(adev->dev,
    785						"RAS INFO: %s setup object\n",
    786						get_ras_block_str(head));
    787			}
    788		} else {
    789			/* setup the object then issue a ras TA disable cmd.*/
    790			ret = __amdgpu_ras_feature_enable(adev, head, 1);
    791			if (ret)
    792				return ret;
    793
    794			/* gfx block ras dsiable cmd must send to ras-ta */
    795			if (head->block == AMDGPU_RAS_BLOCK__GFX)
    796				con->features |= BIT(head->block);
    797
    798			ret = amdgpu_ras_feature_enable(adev, head, 0);
    799
    800			/* clean gfx block ras features flag */
    801			if (adev->ras_enabled && head->block == AMDGPU_RAS_BLOCK__GFX)
    802				con->features &= ~BIT(head->block);
    803		}
    804	} else
    805		ret = amdgpu_ras_feature_enable(adev, head, enable);
    806
    807	return ret;
    808}
    809
    810static int amdgpu_ras_disable_all_features(struct amdgpu_device *adev,
    811		bool bypass)
    812{
    813	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
    814	struct ras_manager *obj, *tmp;
    815
    816	list_for_each_entry_safe(obj, tmp, &con->head, node) {
    817		/* bypass psp.
    818		 * aka just release the obj and corresponding flags
    819		 */
    820		if (bypass) {
    821			if (__amdgpu_ras_feature_enable(adev, &obj->head, 0))
    822				break;
    823		} else {
    824			if (amdgpu_ras_feature_enable(adev, &obj->head, 0))
    825				break;
    826		}
    827	}
    828
    829	return con->features;
    830}
    831
    832static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,
    833		bool bypass)
    834{
    835	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
    836	int i;
    837	const enum amdgpu_ras_error_type default_ras_type = AMDGPU_RAS_ERROR__NONE;
    838
    839	for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT; i++) {
    840		struct ras_common_if head = {
    841			.block = i,
    842			.type = default_ras_type,
    843			.sub_block_index = 0,
    844		};
    845
    846		if (i == AMDGPU_RAS_BLOCK__MCA)
    847			continue;
    848
    849		if (bypass) {
    850			/*
    851			 * bypass psp. vbios enable ras for us.
    852			 * so just create the obj
    853			 */
    854			if (__amdgpu_ras_feature_enable(adev, &head, 1))
    855				break;
    856		} else {
    857			if (amdgpu_ras_feature_enable(adev, &head, 1))
    858				break;
    859		}
    860	}
    861
    862	for (i = 0; i < AMDGPU_RAS_MCA_BLOCK_COUNT; i++) {
    863		struct ras_common_if head = {
    864			.block = AMDGPU_RAS_BLOCK__MCA,
    865			.type = default_ras_type,
    866			.sub_block_index = i,
    867		};
    868
    869		if (bypass) {
    870			/*
    871			 * bypass psp. vbios enable ras for us.
    872			 * so just create the obj
    873			 */
    874			if (__amdgpu_ras_feature_enable(adev, &head, 1))
    875				break;
    876		} else {
    877			if (amdgpu_ras_feature_enable(adev, &head, 1))
    878				break;
    879		}
    880	}
    881
    882	return con->features;
    883}
    884/* feature ctl end */
    885
    886static int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object *block_obj,
    887		enum amdgpu_ras_block block)
    888{
    889	if (!block_obj)
    890		return -EINVAL;
    891
    892	if (block_obj->ras_comm.block == block)
    893		return 0;
    894
    895	return -EINVAL;
    896}
    897
    898static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_device *adev,
    899					enum amdgpu_ras_block block, uint32_t sub_block_index)
    900{
    901	struct amdgpu_ras_block_list *node, *tmp;
    902	struct amdgpu_ras_block_object *obj;
    903
    904	if (block >= AMDGPU_RAS_BLOCK__LAST)
    905		return NULL;
    906
    907	if (!amdgpu_ras_is_supported(adev, block))
    908		return NULL;
    909
    910	list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
    911		if (!node->ras_obj) {
    912			dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
    913			continue;
    914		}
    915
    916		obj = node->ras_obj;
    917		if (obj->ras_block_match) {
    918			if (obj->ras_block_match(obj, block, sub_block_index) == 0)
    919				return obj;
    920		} else {
    921			if (amdgpu_ras_block_match_default(obj, block) == 0)
    922				return obj;
    923		}
    924	}
    925
    926	return NULL;
    927}
    928
    929static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_data *err_data)
    930{
    931	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
    932	int ret = 0;
    933
    934	/*
    935	 * choosing right query method according to
    936	 * whether smu support query error information
    937	 */
    938	ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc));
    939	if (ret == -EOPNOTSUPP) {
    940		if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
    941			adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
    942			adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
    943
    944		/* umc query_ras_error_address is also responsible for clearing
    945		 * error status
    946		 */
    947		if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
    948		    adev->umc.ras->ras_block.hw_ops->query_ras_error_address)
    949			adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, err_data);
    950	} else if (!ret) {
    951		if (adev->umc.ras &&
    952			adev->umc.ras->ecc_info_query_ras_error_count)
    953			adev->umc.ras->ecc_info_query_ras_error_count(adev, err_data);
    954
    955		if (adev->umc.ras &&
    956			adev->umc.ras->ecc_info_query_ras_error_address)
    957			adev->umc.ras->ecc_info_query_ras_error_address(adev, err_data);
    958	}
    959}
    960
    961/* query/inject/cure begin */
    962int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
    963				  struct ras_query_if *info)
    964{
    965	struct amdgpu_ras_block_object *block_obj = NULL;
    966	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
    967	struct ras_err_data err_data = {0, 0, 0, NULL};
    968
    969	if (!obj)
    970		return -EINVAL;
    971
    972	if (info->head.block == AMDGPU_RAS_BLOCK__UMC) {
    973		amdgpu_ras_get_ecc_info(adev, &err_data);
    974	} else {
    975		block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0);
    976		if (!block_obj || !block_obj->hw_ops)   {
    977			dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
    978				     get_ras_block_str(&info->head));
    979			return -EINVAL;
    980		}
    981
    982		if (block_obj->hw_ops->query_ras_error_count)
    983			block_obj->hw_ops->query_ras_error_count(adev, &err_data);
    984
    985		if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
    986		    (info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
    987		    (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) {
    988				if (block_obj->hw_ops->query_ras_error_status)
    989					block_obj->hw_ops->query_ras_error_status(adev);
    990			}
    991	}
    992
    993	obj->err_data.ue_count += err_data.ue_count;
    994	obj->err_data.ce_count += err_data.ce_count;
    995
    996	info->ue_count = obj->err_data.ue_count;
    997	info->ce_count = obj->err_data.ce_count;
    998
    999	if (err_data.ce_count) {
   1000		if (adev->smuio.funcs &&
   1001		    adev->smuio.funcs->get_socket_id &&
   1002		    adev->smuio.funcs->get_die_id) {
   1003			dev_info(adev->dev, "socket: %d, die: %d "
   1004					"%ld correctable hardware errors "
   1005					"detected in %s block, no user "
   1006					"action is needed.\n",
   1007					adev->smuio.funcs->get_socket_id(adev),
   1008					adev->smuio.funcs->get_die_id(adev),
   1009					obj->err_data.ce_count,
   1010					get_ras_block_str(&info->head));
   1011		} else {
   1012			dev_info(adev->dev, "%ld correctable hardware errors "
   1013					"detected in %s block, no user "
   1014					"action is needed.\n",
   1015					obj->err_data.ce_count,
   1016					get_ras_block_str(&info->head));
   1017		}
   1018	}
   1019	if (err_data.ue_count) {
   1020		if (adev->smuio.funcs &&
   1021		    adev->smuio.funcs->get_socket_id &&
   1022		    adev->smuio.funcs->get_die_id) {
   1023			dev_info(adev->dev, "socket: %d, die: %d "
   1024					"%ld uncorrectable hardware errors "
   1025					"detected in %s block\n",
   1026					adev->smuio.funcs->get_socket_id(adev),
   1027					adev->smuio.funcs->get_die_id(adev),
   1028					obj->err_data.ue_count,
   1029					get_ras_block_str(&info->head));
   1030		} else {
   1031			dev_info(adev->dev, "%ld uncorrectable hardware errors "
   1032					"detected in %s block\n",
   1033					obj->err_data.ue_count,
   1034					get_ras_block_str(&info->head));
   1035		}
   1036	}
   1037
   1038	return 0;
   1039}
   1040
   1041int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
   1042		enum amdgpu_ras_block block)
   1043{
   1044	struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
   1045
   1046	if (!amdgpu_ras_is_supported(adev, block))
   1047		return -EINVAL;
   1048
   1049	if (!block_obj || !block_obj->hw_ops)   {
   1050		dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
   1051			     ras_block_str(block));
   1052		return -EINVAL;
   1053	}
   1054
   1055	if (block_obj->hw_ops->reset_ras_error_count)
   1056		block_obj->hw_ops->reset_ras_error_count(adev);
   1057
   1058	if ((block == AMDGPU_RAS_BLOCK__GFX) ||
   1059	    (block == AMDGPU_RAS_BLOCK__MMHUB)) {
   1060		if (block_obj->hw_ops->reset_ras_error_status)
   1061			block_obj->hw_ops->reset_ras_error_status(adev);
   1062	}
   1063
   1064	return 0;
   1065}
   1066
   1067/* wrapper of psp_ras_trigger_error */
   1068int amdgpu_ras_error_inject(struct amdgpu_device *adev,
   1069		struct ras_inject_if *info)
   1070{
   1071	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
   1072	struct ta_ras_trigger_error_input block_info = {
   1073		.block_id =  amdgpu_ras_block_to_ta(info->head.block),
   1074		.inject_error_type = amdgpu_ras_error_to_ta(info->head.type),
   1075		.sub_block_index = info->head.sub_block_index,
   1076		.address = info->address,
   1077		.value = info->value,
   1078	};
   1079	int ret = -EINVAL;
   1080	struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev,
   1081							info->head.block,
   1082							info->head.sub_block_index);
   1083
   1084	if (!obj)
   1085		return -EINVAL;
   1086
   1087	if (!block_obj || !block_obj->hw_ops)	{
   1088		dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
   1089			     get_ras_block_str(&info->head));
   1090		return -EINVAL;
   1091	}
   1092
   1093	/* Calculate XGMI relative offset */
   1094	if (adev->gmc.xgmi.num_physical_nodes > 1) {
   1095		block_info.address =
   1096			amdgpu_xgmi_get_relative_phy_addr(adev,
   1097							  block_info.address);
   1098	}
   1099
   1100	if (info->head.block == AMDGPU_RAS_BLOCK__GFX) {
   1101		if (block_obj->hw_ops->ras_error_inject)
   1102			ret = block_obj->hw_ops->ras_error_inject(adev, info);
   1103	} else {
   1104		/* If defined special ras_error_inject(e.g: xgmi), implement special ras_error_inject */
   1105		if (block_obj->hw_ops->ras_error_inject)
   1106			ret = block_obj->hw_ops->ras_error_inject(adev, &block_info);
   1107		else  /*If not defined .ras_error_inject, use default ras_error_inject*/
   1108			ret = psp_ras_trigger_error(&adev->psp, &block_info);
   1109	}
   1110
   1111	if (ret)
   1112		dev_err(adev->dev, "ras inject %s failed %d\n",
   1113			get_ras_block_str(&info->head), ret);
   1114
   1115	return ret;
   1116}
   1117
   1118/**
   1119 * amdgpu_ras_query_error_count -- Get error counts of all IPs
   1120 * @adev: pointer to AMD GPU device
   1121 * @ce_count: pointer to an integer to be set to the count of correctible errors.
   1122 * @ue_count: pointer to an integer to be set to the count of uncorrectible
   1123 * errors.
   1124 *
   1125 * If set, @ce_count or @ue_count, count and return the corresponding
   1126 * error counts in those integer pointers. Return 0 if the device
   1127 * supports RAS. Return -EOPNOTSUPP if the device doesn't support RAS.
   1128 */
   1129int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
   1130				 unsigned long *ce_count,
   1131				 unsigned long *ue_count)
   1132{
   1133	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   1134	struct ras_manager *obj;
   1135	unsigned long ce, ue;
   1136
   1137	if (!adev->ras_enabled || !con)
   1138		return -EOPNOTSUPP;
   1139
   1140	/* Don't count since no reporting.
   1141	 */
   1142	if (!ce_count && !ue_count)
   1143		return 0;
   1144
   1145	ce = 0;
   1146	ue = 0;
   1147	list_for_each_entry(obj, &con->head, node) {
   1148		struct ras_query_if info = {
   1149			.head = obj->head,
   1150		};
   1151		int res;
   1152
   1153		res = amdgpu_ras_query_error_status(adev, &info);
   1154		if (res)
   1155			return res;
   1156
   1157		if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
   1158		    adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
   1159			if (amdgpu_ras_reset_error_status(adev, info.head.block))
   1160				dev_warn(adev->dev, "Failed to reset error counter and error status");
   1161		}
   1162
   1163		ce += info.ce_count;
   1164		ue += info.ue_count;
   1165	}
   1166
   1167	if (ce_count)
   1168		*ce_count = ce;
   1169
   1170	if (ue_count)
   1171		*ue_count = ue;
   1172
   1173	return 0;
   1174}
   1175/* query/inject/cure end */
   1176
   1177
   1178/* sysfs begin */
   1179
   1180static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
   1181		struct ras_badpage **bps, unsigned int *count);
   1182
   1183static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
   1184{
   1185	switch (flags) {
   1186	case AMDGPU_RAS_RETIRE_PAGE_RESERVED:
   1187		return "R";
   1188	case AMDGPU_RAS_RETIRE_PAGE_PENDING:
   1189		return "P";
   1190	case AMDGPU_RAS_RETIRE_PAGE_FAULT:
   1191	default:
   1192		return "F";
   1193	}
   1194}
   1195
   1196/**
   1197 * DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
   1198 *
   1199 * It allows user to read the bad pages of vram on the gpu through
   1200 * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
   1201 *
   1202 * It outputs multiple lines, and each line stands for one gpu page.
   1203 *
   1204 * The format of one line is below,
   1205 * gpu pfn : gpu page size : flags
   1206 *
   1207 * gpu pfn and gpu page size are printed in hex format.
   1208 * flags can be one of below character,
   1209 *
   1210 * R: reserved, this gpu page is reserved and not able to use.
   1211 *
   1212 * P: pending for reserve, this gpu page is marked as bad, will be reserved
   1213 * in next window of page_reserve.
   1214 *
   1215 * F: unable to reserve. this gpu page can't be reserved due to some reasons.
   1216 *
   1217 * Examples:
   1218 *
   1219 * .. code-block:: bash
   1220 *
   1221 *	0x00000001 : 0x00001000 : R
   1222 *	0x00000002 : 0x00001000 : P
   1223 *
   1224 */
   1225
   1226static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
   1227		struct kobject *kobj, struct bin_attribute *attr,
   1228		char *buf, loff_t ppos, size_t count)
   1229{
   1230	struct amdgpu_ras *con =
   1231		container_of(attr, struct amdgpu_ras, badpages_attr);
   1232	struct amdgpu_device *adev = con->adev;
   1233	const unsigned int element_size =
   1234		sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
   1235	unsigned int start = div64_ul(ppos + element_size - 1, element_size);
   1236	unsigned int end = div64_ul(ppos + count - 1, element_size);
   1237	ssize_t s = 0;
   1238	struct ras_badpage *bps = NULL;
   1239	unsigned int bps_count = 0;
   1240
   1241	memset(buf, 0, count);
   1242
   1243	if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
   1244		return 0;
   1245
   1246	for (; start < end && start < bps_count; start++)
   1247		s += scnprintf(&buf[s], element_size + 1,
   1248				"0x%08x : 0x%08x : %1s\n",
   1249				bps[start].bp,
   1250				bps[start].size,
   1251				amdgpu_ras_badpage_flags_str(bps[start].flags));
   1252
   1253	kfree(bps);
   1254
   1255	return s;
   1256}
   1257
   1258static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
   1259		struct device_attribute *attr, char *buf)
   1260{
   1261	struct amdgpu_ras *con =
   1262		container_of(attr, struct amdgpu_ras, features_attr);
   1263
   1264	return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);
   1265}
   1266
   1267static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
   1268{
   1269	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   1270
   1271	sysfs_remove_file_from_group(&adev->dev->kobj,
   1272				&con->badpages_attr.attr,
   1273				RAS_FS_NAME);
   1274}
   1275
   1276static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
   1277{
   1278	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   1279	struct attribute *attrs[] = {
   1280		&con->features_attr.attr,
   1281		NULL
   1282	};
   1283	struct attribute_group group = {
   1284		.name = RAS_FS_NAME,
   1285		.attrs = attrs,
   1286	};
   1287
   1288	sysfs_remove_group(&adev->dev->kobj, &group);
   1289
   1290	return 0;
   1291}
   1292
   1293int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
   1294		struct ras_common_if *head)
   1295{
   1296	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
   1297
   1298	if (!obj || obj->attr_inuse)
   1299		return -EINVAL;
   1300
   1301	get_obj(obj);
   1302
   1303	snprintf(obj->fs_data.sysfs_name, sizeof(obj->fs_data.sysfs_name),
   1304		"%s_err_count", head->name);
   1305
   1306	obj->sysfs_attr = (struct device_attribute){
   1307		.attr = {
   1308			.name = obj->fs_data.sysfs_name,
   1309			.mode = S_IRUGO,
   1310		},
   1311			.show = amdgpu_ras_sysfs_read,
   1312	};
   1313	sysfs_attr_init(&obj->sysfs_attr.attr);
   1314
   1315	if (sysfs_add_file_to_group(&adev->dev->kobj,
   1316				&obj->sysfs_attr.attr,
   1317				RAS_FS_NAME)) {
   1318		put_obj(obj);
   1319		return -EINVAL;
   1320	}
   1321
   1322	obj->attr_inuse = 1;
   1323
   1324	return 0;
   1325}
   1326
   1327int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
   1328		struct ras_common_if *head)
   1329{
   1330	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
   1331
   1332	if (!obj || !obj->attr_inuse)
   1333		return -EINVAL;
   1334
   1335	sysfs_remove_file_from_group(&adev->dev->kobj,
   1336				&obj->sysfs_attr.attr,
   1337				RAS_FS_NAME);
   1338	obj->attr_inuse = 0;
   1339	put_obj(obj);
   1340
   1341	return 0;
   1342}
   1343
   1344static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
   1345{
   1346	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   1347	struct ras_manager *obj, *tmp;
   1348
   1349	list_for_each_entry_safe(obj, tmp, &con->head, node) {
   1350		amdgpu_ras_sysfs_remove(adev, &obj->head);
   1351	}
   1352
   1353	if (amdgpu_bad_page_threshold != 0)
   1354		amdgpu_ras_sysfs_remove_bad_page_node(adev);
   1355
   1356	amdgpu_ras_sysfs_remove_feature_node(adev);
   1357
   1358	return 0;
   1359}
   1360/* sysfs end */
   1361
   1362/**
   1363 * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
   1364 *
   1365 * Normally when there is an uncorrectable error, the driver will reset
   1366 * the GPU to recover.  However, in the event of an unrecoverable error,
   1367 * the driver provides an interface to reboot the system automatically
   1368 * in that event.
   1369 *
   1370 * The following file in debugfs provides that interface:
   1371 * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
   1372 *
   1373 * Usage:
   1374 *
   1375 * .. code-block:: bash
   1376 *
   1377 *	echo true > .../ras/auto_reboot
   1378 *
   1379 */
   1380/* debugfs begin */
   1381static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
   1382{
   1383	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   1384	struct drm_minor  *minor = adev_to_drm(adev)->primary;
   1385	struct dentry     *dir;
   1386
   1387	dir = debugfs_create_dir(RAS_FS_NAME, minor->debugfs_root);
   1388	debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, dir, adev,
   1389			    &amdgpu_ras_debugfs_ctrl_ops);
   1390	debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, dir, adev,
   1391			    &amdgpu_ras_debugfs_eeprom_ops);
   1392	debugfs_create_u32("bad_page_cnt_threshold", 0444, dir,
   1393			   &con->bad_page_cnt_threshold);
   1394	debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled);
   1395	debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled);
   1396	debugfs_create_file("ras_eeprom_size", S_IRUGO, dir, adev,
   1397			    &amdgpu_ras_debugfs_eeprom_size_ops);
   1398	con->de_ras_eeprom_table = debugfs_create_file("ras_eeprom_table",
   1399						       S_IRUGO, dir, adev,
   1400						       &amdgpu_ras_debugfs_eeprom_table_ops);
   1401	amdgpu_ras_debugfs_set_ret_size(&con->eeprom_control);
   1402
   1403	/*
   1404	 * After one uncorrectable error happens, usually GPU recovery will
   1405	 * be scheduled. But due to the known problem in GPU recovery failing
   1406	 * to bring GPU back, below interface provides one direct way to
   1407	 * user to reboot system automatically in such case within
   1408	 * ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine
   1409	 * will never be called.
   1410	 */
   1411	debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, dir, &con->reboot);
   1412
   1413	/*
   1414	 * User could set this not to clean up hardware's error count register
   1415	 * of RAS IPs during ras recovery.
   1416	 */
   1417	debugfs_create_bool("disable_ras_err_cnt_harvest", 0644, dir,
   1418			    &con->disable_ras_err_cnt_harvest);
   1419	return dir;
   1420}
   1421
   1422static void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
   1423				      struct ras_fs_if *head,
   1424				      struct dentry *dir)
   1425{
   1426	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
   1427
   1428	if (!obj || !dir)
   1429		return;
   1430
   1431	get_obj(obj);
   1432
   1433	memcpy(obj->fs_data.debugfs_name,
   1434			head->debugfs_name,
   1435			sizeof(obj->fs_data.debugfs_name));
   1436
   1437	debugfs_create_file(obj->fs_data.debugfs_name, S_IWUGO | S_IRUGO, dir,
   1438			    obj, &amdgpu_ras_debugfs_ops);
   1439}
   1440
   1441void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
   1442{
   1443	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   1444	struct dentry *dir;
   1445	struct ras_manager *obj;
   1446	struct ras_fs_if fs_info;
   1447
   1448	/*
   1449	 * it won't be called in resume path, no need to check
   1450	 * suspend and gpu reset status
   1451	 */
   1452	if (!IS_ENABLED(CONFIG_DEBUG_FS) || !con)
   1453		return;
   1454
   1455	dir = amdgpu_ras_debugfs_create_ctrl_node(adev);
   1456
   1457	list_for_each_entry(obj, &con->head, node) {
   1458		if (amdgpu_ras_is_supported(adev, obj->head.block) &&
   1459			(obj->attr_inuse == 1)) {
   1460			sprintf(fs_info.debugfs_name, "%s_err_inject",
   1461					get_ras_block_str(&obj->head));
   1462			fs_info.head = obj->head;
   1463			amdgpu_ras_debugfs_create(adev, &fs_info, dir);
   1464		}
   1465	}
   1466}
   1467
   1468/* debugfs end */
   1469
   1470/* ras fs */
   1471static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
   1472		amdgpu_ras_sysfs_badpages_read, NULL, 0);
   1473static DEVICE_ATTR(features, S_IRUGO,
   1474		amdgpu_ras_sysfs_features_read, NULL);
   1475static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
   1476{
   1477	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   1478	struct attribute_group group = {
   1479		.name = RAS_FS_NAME,
   1480	};
   1481	struct attribute *attrs[] = {
   1482		&con->features_attr.attr,
   1483		NULL
   1484	};
   1485	struct bin_attribute *bin_attrs[] = {
   1486		NULL,
   1487		NULL,
   1488	};
   1489	int r;
   1490
   1491	/* add features entry */
   1492	con->features_attr = dev_attr_features;
   1493	group.attrs = attrs;
   1494	sysfs_attr_init(attrs[0]);
   1495
   1496	if (amdgpu_bad_page_threshold != 0) {
   1497		/* add bad_page_features entry */
   1498		bin_attr_gpu_vram_bad_pages.private = NULL;
   1499		con->badpages_attr = bin_attr_gpu_vram_bad_pages;
   1500		bin_attrs[0] = &con->badpages_attr;
   1501		group.bin_attrs = bin_attrs;
   1502		sysfs_bin_attr_init(bin_attrs[0]);
   1503	}
   1504
   1505	r = sysfs_create_group(&adev->dev->kobj, &group);
   1506	if (r)
   1507		dev_err(adev->dev, "Failed to create RAS sysfs group!");
   1508
   1509	return 0;
   1510}
   1511
   1512static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
   1513{
   1514	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   1515	struct ras_manager *con_obj, *ip_obj, *tmp;
   1516
   1517	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
   1518		list_for_each_entry_safe(con_obj, tmp, &con->head, node) {
   1519			ip_obj = amdgpu_ras_find_obj(adev, &con_obj->head);
   1520			if (ip_obj)
   1521				put_obj(ip_obj);
   1522		}
   1523	}
   1524
   1525	amdgpu_ras_sysfs_remove_all(adev);
   1526	return 0;
   1527}
   1528/* ras fs end */
   1529
   1530/* ih begin */
   1531
   1532/* For the hardware that cannot enable bif ring for both ras_controller_irq
   1533 * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
   1534 * register to check whether the interrupt is triggered or not, and properly
   1535 * ack the interrupt if it is there
   1536 */
   1537void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
   1538{
   1539	/* Fatal error events are handled on host side */
   1540	if (amdgpu_sriov_vf(adev) ||
   1541		!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF))
   1542		return;
   1543
   1544	if (adev->nbio.ras &&
   1545	    adev->nbio.ras->handle_ras_controller_intr_no_bifring)
   1546		adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
   1547
   1548	if (adev->nbio.ras &&
   1549	    adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
   1550		adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
   1551}
   1552
   1553static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj,
   1554				struct amdgpu_iv_entry *entry)
   1555{
   1556	bool poison_stat = false;
   1557	struct amdgpu_device *adev = obj->adev;
   1558	struct ras_err_data err_data = {0, 0, 0, NULL};
   1559	struct amdgpu_ras_block_object *block_obj =
   1560		amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
   1561
   1562	if (!block_obj || !block_obj->hw_ops)
   1563		return;
   1564
   1565	/* both query_poison_status and handle_poison_consumption are optional,
   1566	 * but at least one of them should be implemented if we need poison
   1567	 * consumption handler
   1568	 */
   1569	if (block_obj->hw_ops->query_poison_status) {
   1570		poison_stat = block_obj->hw_ops->query_poison_status(adev);
   1571		if (!poison_stat) {
   1572			/* Not poison consumption interrupt, no need to handle it */
   1573			dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
   1574					block_obj->ras_comm.name);
   1575
   1576			return;
   1577		}
   1578	}
   1579
   1580	if (!adev->gmc.xgmi.connected_to_cpu)
   1581		amdgpu_umc_poison_handler(adev, &err_data, false);
   1582
   1583	if (block_obj->hw_ops->handle_poison_consumption)
   1584		poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
   1585
   1586	/* gpu reset is fallback for failed and default cases */
   1587	if (poison_stat) {
   1588		dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n",
   1589				block_obj->ras_comm.name);
   1590		amdgpu_ras_reset_gpu(adev);
   1591	}
   1592}
   1593
   1594static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj,
   1595				struct amdgpu_iv_entry *entry)
   1596{
   1597	dev_info(obj->adev->dev,
   1598		"Poison is created, no user action is needed.\n");
   1599}
   1600
   1601static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
   1602				struct amdgpu_iv_entry *entry)
   1603{
   1604	struct ras_ih_data *data = &obj->ih_data;
   1605	struct ras_err_data err_data = {0, 0, 0, NULL};
   1606	int ret;
   1607
   1608	if (!data->cb)
   1609		return;
   1610
   1611	/* Let IP handle its data, maybe we need get the output
   1612	 * from the callback to update the error type/count, etc
   1613	 */
   1614	ret = data->cb(obj->adev, &err_data, entry);
   1615	/* ue will trigger an interrupt, and in that case
   1616	 * we need do a reset to recovery the whole system.
   1617	 * But leave IP do that recovery, here we just dispatch
   1618	 * the error.
   1619	 */
   1620	if (ret == AMDGPU_RAS_SUCCESS) {
   1621		/* these counts could be left as 0 if
   1622		 * some blocks do not count error number
   1623		 */
   1624		obj->err_data.ue_count += err_data.ue_count;
   1625		obj->err_data.ce_count += err_data.ce_count;
   1626	}
   1627}
   1628
   1629static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
   1630{
   1631	struct ras_ih_data *data = &obj->ih_data;
   1632	struct amdgpu_iv_entry entry;
   1633
   1634	while (data->rptr != data->wptr) {
   1635		rmb();
   1636		memcpy(&entry, &data->ring[data->rptr],
   1637				data->element_size);
   1638
   1639		wmb();
   1640		data->rptr = (data->aligned_element_size +
   1641				data->rptr) % data->ring_size;
   1642
   1643		if (amdgpu_ras_is_poison_mode_supported(obj->adev)) {
   1644			if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
   1645				amdgpu_ras_interrupt_poison_creation_handler(obj, &entry);
   1646			else
   1647				amdgpu_ras_interrupt_poison_consumption_handler(obj, &entry);
   1648		} else {
   1649			if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
   1650				amdgpu_ras_interrupt_umc_handler(obj, &entry);
   1651			else
   1652				dev_warn(obj->adev->dev,
   1653					"No RAS interrupt handler for non-UMC block with poison disabled.\n");
   1654		}
   1655	}
   1656}
   1657
   1658static void amdgpu_ras_interrupt_process_handler(struct work_struct *work)
   1659{
   1660	struct ras_ih_data *data =
   1661		container_of(work, struct ras_ih_data, ih_work);
   1662	struct ras_manager *obj =
   1663		container_of(data, struct ras_manager, ih_data);
   1664
   1665	amdgpu_ras_interrupt_handler(obj);
   1666}
   1667
   1668int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
   1669		struct ras_dispatch_if *info)
   1670{
   1671	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
   1672	struct ras_ih_data *data = &obj->ih_data;
   1673
   1674	if (!obj)
   1675		return -EINVAL;
   1676
   1677	if (data->inuse == 0)
   1678		return 0;
   1679
   1680	/* Might be overflow... */
   1681	memcpy(&data->ring[data->wptr], info->entry,
   1682			data->element_size);
   1683
   1684	wmb();
   1685	data->wptr = (data->aligned_element_size +
   1686			data->wptr) % data->ring_size;
   1687
   1688	schedule_work(&data->ih_work);
   1689
   1690	return 0;
   1691}
   1692
   1693int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
   1694		struct ras_common_if *head)
   1695{
   1696	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
   1697	struct ras_ih_data *data;
   1698
   1699	if (!obj)
   1700		return -EINVAL;
   1701
   1702	data = &obj->ih_data;
   1703	if (data->inuse == 0)
   1704		return 0;
   1705
   1706	cancel_work_sync(&data->ih_work);
   1707
   1708	kfree(data->ring);
   1709	memset(data, 0, sizeof(*data));
   1710	put_obj(obj);
   1711
   1712	return 0;
   1713}
   1714
   1715int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
   1716		struct ras_common_if *head)
   1717{
   1718	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
   1719	struct ras_ih_data *data;
   1720	struct amdgpu_ras_block_object *ras_obj;
   1721
   1722	if (!obj) {
   1723		/* in case we registe the IH before enable ras feature */
   1724		obj = amdgpu_ras_create_obj(adev, head);
   1725		if (!obj)
   1726			return -EINVAL;
   1727	} else
   1728		get_obj(obj);
   1729
   1730	ras_obj = container_of(head, struct amdgpu_ras_block_object, ras_comm);
   1731
   1732	data = &obj->ih_data;
   1733	/* add the callback.etc */
   1734	*data = (struct ras_ih_data) {
   1735		.inuse = 0,
   1736		.cb = ras_obj->ras_cb,
   1737		.element_size = sizeof(struct amdgpu_iv_entry),
   1738		.rptr = 0,
   1739		.wptr = 0,
   1740	};
   1741
   1742	INIT_WORK(&data->ih_work, amdgpu_ras_interrupt_process_handler);
   1743
   1744	data->aligned_element_size = ALIGN(data->element_size, 8);
   1745	/* the ring can store 64 iv entries. */
   1746	data->ring_size = 64 * data->aligned_element_size;
   1747	data->ring = kmalloc(data->ring_size, GFP_KERNEL);
   1748	if (!data->ring) {
   1749		put_obj(obj);
   1750		return -ENOMEM;
   1751	}
   1752
   1753	/* IH is ready */
   1754	data->inuse = 1;
   1755
   1756	return 0;
   1757}
   1758
   1759static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
   1760{
   1761	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   1762	struct ras_manager *obj, *tmp;
   1763
   1764	list_for_each_entry_safe(obj, tmp, &con->head, node) {
   1765		amdgpu_ras_interrupt_remove_handler(adev, &obj->head);
   1766	}
   1767
   1768	return 0;
   1769}
   1770/* ih end */
   1771
   1772/* traversal all IPs except NBIO to query error counter */
   1773static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
   1774{
   1775	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   1776	struct ras_manager *obj;
   1777
   1778	if (!adev->ras_enabled || !con)
   1779		return;
   1780
   1781	list_for_each_entry(obj, &con->head, node) {
   1782		struct ras_query_if info = {
   1783			.head = obj->head,
   1784		};
   1785
   1786		/*
   1787		 * PCIE_BIF IP has one different isr by ras controller
   1788		 * interrupt, the specific ras counter query will be
   1789		 * done in that isr. So skip such block from common
   1790		 * sync flood interrupt isr calling.
   1791		 */
   1792		if (info.head.block == AMDGPU_RAS_BLOCK__PCIE_BIF)
   1793			continue;
   1794
   1795		/*
   1796		 * this is a workaround for aldebaran, skip send msg to
   1797		 * smu to get ecc_info table due to smu handle get ecc
   1798		 * info table failed temporarily.
   1799		 * should be removed until smu fix handle ecc_info table.
   1800		 */
   1801		if ((info.head.block == AMDGPU_RAS_BLOCK__UMC) &&
   1802			(adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)))
   1803			continue;
   1804
   1805		amdgpu_ras_query_error_status(adev, &info);
   1806
   1807		if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
   1808		    adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
   1809			if (amdgpu_ras_reset_error_status(adev, info.head.block))
   1810				dev_warn(adev->dev, "Failed to reset error counter and error status");
   1811		}
   1812	}
   1813}
   1814
   1815/* Parse RdRspStatus and WrRspStatus */
   1816static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
   1817					  struct ras_query_if *info)
   1818{
   1819	struct amdgpu_ras_block_object *block_obj;
   1820	/*
   1821	 * Only two block need to query read/write
   1822	 * RspStatus at current state
   1823	 */
   1824	if ((info->head.block != AMDGPU_RAS_BLOCK__GFX) &&
   1825		(info->head.block != AMDGPU_RAS_BLOCK__MMHUB))
   1826		return;
   1827
   1828	block_obj = amdgpu_ras_get_ras_block(adev,
   1829					info->head.block,
   1830					info->head.sub_block_index);
   1831
   1832	if (!block_obj || !block_obj->hw_ops) {
   1833		dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
   1834			     get_ras_block_str(&info->head));
   1835		return;
   1836	}
   1837
   1838	if (block_obj->hw_ops->query_ras_error_status)
   1839		block_obj->hw_ops->query_ras_error_status(adev);
   1840
   1841}
   1842
   1843static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
   1844{
   1845	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   1846	struct ras_manager *obj;
   1847
   1848	if (!adev->ras_enabled || !con)
   1849		return;
   1850
   1851	list_for_each_entry(obj, &con->head, node) {
   1852		struct ras_query_if info = {
   1853			.head = obj->head,
   1854		};
   1855
   1856		amdgpu_ras_error_status_query(adev, &info);
   1857	}
   1858}
   1859
   1860/* recovery begin */
   1861
   1862/* return 0 on success.
   1863 * caller need free bps.
   1864 */
   1865static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
   1866		struct ras_badpage **bps, unsigned int *count)
   1867{
   1868	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   1869	struct ras_err_handler_data *data;
   1870	int i = 0;
   1871	int ret = 0, status;
   1872
   1873	if (!con || !con->eh_data || !bps || !count)
   1874		return -EINVAL;
   1875
   1876	mutex_lock(&con->recovery_lock);
   1877	data = con->eh_data;
   1878	if (!data || data->count == 0) {
   1879		*bps = NULL;
   1880		ret = -EINVAL;
   1881		goto out;
   1882	}
   1883
   1884	*bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
   1885	if (!*bps) {
   1886		ret = -ENOMEM;
   1887		goto out;
   1888	}
   1889
   1890	for (; i < data->count; i++) {
   1891		(*bps)[i] = (struct ras_badpage){
   1892			.bp = data->bps[i].retired_page,
   1893			.size = AMDGPU_GPU_PAGE_SIZE,
   1894			.flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
   1895		};
   1896		status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr,
   1897				data->bps[i].retired_page);
   1898		if (status == -EBUSY)
   1899			(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
   1900		else if (status == -ENOENT)
   1901			(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
   1902	}
   1903
   1904	*count = data->count;
   1905out:
   1906	mutex_unlock(&con->recovery_lock);
   1907	return ret;
   1908}
   1909
   1910static void amdgpu_ras_do_recovery(struct work_struct *work)
   1911{
   1912	struct amdgpu_ras *ras =
   1913		container_of(work, struct amdgpu_ras, recovery_work);
   1914	struct amdgpu_device *remote_adev = NULL;
   1915	struct amdgpu_device *adev = ras->adev;
   1916	struct list_head device_list, *device_list_handle =  NULL;
   1917
   1918	if (!ras->disable_ras_err_cnt_harvest) {
   1919		struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
   1920
   1921		/* Build list of devices to query RAS related errors */
   1922		if  (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
   1923			device_list_handle = &hive->device_list;
   1924		} else {
   1925			INIT_LIST_HEAD(&device_list);
   1926			list_add_tail(&adev->gmc.xgmi.head, &device_list);
   1927			device_list_handle = &device_list;
   1928		}
   1929
   1930		list_for_each_entry(remote_adev,
   1931				device_list_handle, gmc.xgmi.head) {
   1932			amdgpu_ras_query_err_status(remote_adev);
   1933			amdgpu_ras_log_on_err_counter(remote_adev);
   1934		}
   1935
   1936		amdgpu_put_xgmi_hive(hive);
   1937	}
   1938
   1939	if (amdgpu_device_should_recover_gpu(ras->adev))
   1940		amdgpu_device_gpu_recover(ras->adev, NULL);
   1941	atomic_set(&ras->in_recovery, 0);
   1942}
   1943
   1944/* alloc/realloc bps array */
   1945static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
   1946		struct ras_err_handler_data *data, int pages)
   1947{
   1948	unsigned int old_space = data->count + data->space_left;
   1949	unsigned int new_space = old_space + pages;
   1950	unsigned int align_space = ALIGN(new_space, 512);
   1951	void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
   1952
   1953	if (!bps) {
   1954		return -ENOMEM;
   1955	}
   1956
   1957	if (data->bps) {
   1958		memcpy(bps, data->bps,
   1959				data->count * sizeof(*data->bps));
   1960		kfree(data->bps);
   1961	}
   1962
   1963	data->bps = bps;
   1964	data->space_left += align_space - old_space;
   1965	return 0;
   1966}
   1967
   1968/* it deal with vram only. */
   1969int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
   1970		struct eeprom_table_record *bps, int pages)
   1971{
   1972	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   1973	struct ras_err_handler_data *data;
   1974	int ret = 0;
   1975	uint32_t i;
   1976
   1977	if (!con || !con->eh_data || !bps || pages <= 0)
   1978		return 0;
   1979
   1980	mutex_lock(&con->recovery_lock);
   1981	data = con->eh_data;
   1982	if (!data)
   1983		goto out;
   1984
   1985	for (i = 0; i < pages; i++) {
   1986		if (amdgpu_ras_check_bad_page_unlock(con,
   1987			bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
   1988			continue;
   1989
   1990		if (!data->space_left &&
   1991			amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
   1992			ret = -ENOMEM;
   1993			goto out;
   1994		}
   1995
   1996		amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
   1997			bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT,
   1998			AMDGPU_GPU_PAGE_SIZE);
   1999
   2000		memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps));
   2001		data->count++;
   2002		data->space_left--;
   2003	}
   2004out:
   2005	mutex_unlock(&con->recovery_lock);
   2006
   2007	return ret;
   2008}
   2009
   2010/*
   2011 * write error record array to eeprom, the function should be
   2012 * protected by recovery_lock
   2013 */
   2014int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
   2015{
   2016	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   2017	struct ras_err_handler_data *data;
   2018	struct amdgpu_ras_eeprom_control *control;
   2019	int save_count;
   2020
   2021	if (!con || !con->eh_data)
   2022		return 0;
   2023
   2024	mutex_lock(&con->recovery_lock);
   2025	control = &con->eeprom_control;
   2026	data = con->eh_data;
   2027	save_count = data->count - control->ras_num_recs;
   2028	mutex_unlock(&con->recovery_lock);
   2029	/* only new entries are saved */
   2030	if (save_count > 0) {
   2031		if (amdgpu_ras_eeprom_append(control,
   2032					     &data->bps[control->ras_num_recs],
   2033					     save_count)) {
   2034			dev_err(adev->dev, "Failed to save EEPROM table data!");
   2035			return -EIO;
   2036		}
   2037
   2038		dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);
   2039	}
   2040
   2041	return 0;
   2042}
   2043
   2044/*
   2045 * read error record array in eeprom and reserve enough space for
   2046 * storing new bad pages
   2047 */
   2048static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
   2049{
   2050	struct amdgpu_ras_eeprom_control *control =
   2051		&adev->psp.ras_context.ras->eeprom_control;
   2052	struct eeprom_table_record *bps;
   2053	int ret;
   2054
   2055	/* no bad page record, skip eeprom access */
   2056	if (control->ras_num_recs == 0 || amdgpu_bad_page_threshold == 0)
   2057		return 0;
   2058
   2059	bps = kcalloc(control->ras_num_recs, sizeof(*bps), GFP_KERNEL);
   2060	if (!bps)
   2061		return -ENOMEM;
   2062
   2063	ret = amdgpu_ras_eeprom_read(control, bps, control->ras_num_recs);
   2064	if (ret)
   2065		dev_err(adev->dev, "Failed to load EEPROM table records!");
   2066	else
   2067		ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs);
   2068
   2069	kfree(bps);
   2070	return ret;
   2071}
   2072
   2073static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
   2074				uint64_t addr)
   2075{
   2076	struct ras_err_handler_data *data = con->eh_data;
   2077	int i;
   2078
   2079	addr >>= AMDGPU_GPU_PAGE_SHIFT;
   2080	for (i = 0; i < data->count; i++)
   2081		if (addr == data->bps[i].retired_page)
   2082			return true;
   2083
   2084	return false;
   2085}
   2086
   2087/*
   2088 * check if an address belongs to bad page
   2089 *
   2090 * Note: this check is only for umc block
   2091 */
   2092static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
   2093				uint64_t addr)
   2094{
   2095	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   2096	bool ret = false;
   2097
   2098	if (!con || !con->eh_data)
   2099		return ret;
   2100
   2101	mutex_lock(&con->recovery_lock);
   2102	ret = amdgpu_ras_check_bad_page_unlock(con, addr);
   2103	mutex_unlock(&con->recovery_lock);
   2104	return ret;
   2105}
   2106
   2107static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
   2108					  uint32_t max_count)
   2109{
   2110	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   2111
   2112	/*
   2113	 * Justification of value bad_page_cnt_threshold in ras structure
   2114	 *
   2115	 * Generally, -1 <= amdgpu_bad_page_threshold <= max record length
   2116	 * in eeprom, and introduce two scenarios accordingly.
   2117	 *
   2118	 * Bad page retirement enablement:
   2119	 *    - If amdgpu_bad_page_threshold = -1,
   2120	 *      bad_page_cnt_threshold = typical value by formula.
   2121	 *
   2122	 *    - When the value from user is 0 < amdgpu_bad_page_threshold <
   2123	 *      max record length in eeprom, use it directly.
   2124	 *
   2125	 * Bad page retirement disablement:
   2126	 *    - If amdgpu_bad_page_threshold = 0, bad page retirement
   2127	 *      functionality is disabled, and bad_page_cnt_threshold will
   2128	 *      take no effect.
   2129	 */
   2130
   2131	if (amdgpu_bad_page_threshold < 0) {
   2132		u64 val = adev->gmc.mc_vram_size;
   2133
   2134		do_div(val, RAS_BAD_PAGE_COVER);
   2135		con->bad_page_cnt_threshold = min(lower_32_bits(val),
   2136						  max_count);
   2137	} else {
   2138		con->bad_page_cnt_threshold = min_t(int, max_count,
   2139						    amdgpu_bad_page_threshold);
   2140	}
   2141}
   2142
   2143int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
   2144{
   2145	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   2146	struct ras_err_handler_data **data;
   2147	u32  max_eeprom_records_count = 0;
   2148	bool exc_err_limit = false;
   2149	int ret;
   2150
   2151	if (!con)
   2152		return 0;
   2153
   2154	/* Allow access to RAS EEPROM via debugfs, when the ASIC
   2155	 * supports RAS and debugfs is enabled, but when
   2156	 * adev->ras_enabled is unset, i.e. when "ras_enable"
   2157	 * module parameter is set to 0.
   2158	 */
   2159	con->adev = adev;
   2160
   2161	if (!adev->ras_enabled)
   2162		return 0;
   2163
   2164	data = &con->eh_data;
   2165	*data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO);
   2166	if (!*data) {
   2167		ret = -ENOMEM;
   2168		goto out;
   2169	}
   2170
   2171	mutex_init(&con->recovery_lock);
   2172	INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery);
   2173	atomic_set(&con->in_recovery, 0);
   2174	con->eeprom_control.bad_channel_bitmap = 0;
   2175
   2176	max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count();
   2177	amdgpu_ras_validate_threshold(adev, max_eeprom_records_count);
   2178
   2179	/* Todo: During test the SMU might fail to read the eeprom through I2C
   2180	 * when the GPU is pending on XGMI reset during probe time
   2181	 * (Mostly after second bus reset), skip it now
   2182	 */
   2183	if (adev->gmc.xgmi.pending_reset)
   2184		return 0;
   2185	ret = amdgpu_ras_eeprom_init(&con->eeprom_control, &exc_err_limit);
   2186	/*
   2187	 * This calling fails when exc_err_limit is true or
   2188	 * ret != 0.
   2189	 */
   2190	if (exc_err_limit || ret)
   2191		goto free;
   2192
   2193	if (con->eeprom_control.ras_num_recs) {
   2194		ret = amdgpu_ras_load_bad_pages(adev);
   2195		if (ret)
   2196			goto free;
   2197
   2198		amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs);
   2199
   2200		if (con->update_channel_flag == true) {
   2201			amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap);
   2202			con->update_channel_flag = false;
   2203		}
   2204	}
   2205
   2206#ifdef CONFIG_X86_MCE_AMD
   2207	if ((adev->asic_type == CHIP_ALDEBARAN) &&
   2208	    (adev->gmc.xgmi.connected_to_cpu))
   2209		amdgpu_register_bad_pages_mca_notifier(adev);
   2210#endif
   2211	return 0;
   2212
   2213free:
   2214	kfree((*data)->bps);
   2215	kfree(*data);
   2216	con->eh_data = NULL;
   2217out:
   2218	dev_warn(adev->dev, "Failed to initialize ras recovery! (%d)\n", ret);
   2219
   2220	/*
   2221	 * Except error threshold exceeding case, other failure cases in this
   2222	 * function would not fail amdgpu driver init.
   2223	 */
   2224	if (!exc_err_limit)
   2225		ret = 0;
   2226	else
   2227		ret = -EINVAL;
   2228
   2229	return ret;
   2230}
   2231
   2232static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
   2233{
   2234	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   2235	struct ras_err_handler_data *data = con->eh_data;
   2236
   2237	/* recovery_init failed to init it, fini is useless */
   2238	if (!data)
   2239		return 0;
   2240
   2241	cancel_work_sync(&con->recovery_work);
   2242
   2243	mutex_lock(&con->recovery_lock);
   2244	con->eh_data = NULL;
   2245	kfree(data->bps);
   2246	kfree(data);
   2247	mutex_unlock(&con->recovery_lock);
   2248
   2249	return 0;
   2250}
   2251/* recovery end */
   2252
   2253static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
   2254{
   2255	return adev->asic_type == CHIP_VEGA10 ||
   2256		adev->asic_type == CHIP_VEGA20 ||
   2257		adev->asic_type == CHIP_ARCTURUS ||
   2258		adev->asic_type == CHIP_ALDEBARAN ||
   2259		adev->asic_type == CHIP_SIENNA_CICHLID;
   2260}
   2261
   2262/*
   2263 * this is workaround for vega20 workstation sku,
   2264 * force enable gfx ras, ignore vbios gfx ras flag
   2265 * due to GC EDC can not write
   2266 */
   2267static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
   2268{
   2269	struct atom_context *ctx = adev->mode_info.atom_context;
   2270
   2271	if (!ctx)
   2272		return;
   2273
   2274	if (strnstr(ctx->vbios_version, "D16406",
   2275		    sizeof(ctx->vbios_version)) ||
   2276		strnstr(ctx->vbios_version, "D36002",
   2277			sizeof(ctx->vbios_version)))
   2278		adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
   2279}
   2280
   2281/*
   2282 * check hardware's ras ability which will be saved in hw_supported.
   2283 * if hardware does not support ras, we can skip some ras initializtion and
   2284 * forbid some ras operations from IP.
   2285 * if software itself, say boot parameter, limit the ras ability. We still
   2286 * need allow IP do some limited operations, like disable. In such case,
   2287 * we have to initialize ras as normal. but need check if operation is
   2288 * allowed or not in each function.
   2289 */
   2290static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
   2291{
   2292	adev->ras_hw_enabled = adev->ras_enabled = 0;
   2293
   2294	if (!adev->is_atom_fw ||
   2295	    !amdgpu_ras_asic_supported(adev))
   2296		return;
   2297
   2298	/* If driver run on sriov guest side, only enable ras for aldebaran */
   2299	if (amdgpu_sriov_vf(adev) &&
   2300		adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 2))
   2301		return;
   2302
   2303	if (!adev->gmc.xgmi.connected_to_cpu) {
   2304		if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
   2305			dev_info(adev->dev, "MEM ECC is active.\n");
   2306			adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
   2307						   1 << AMDGPU_RAS_BLOCK__DF);
   2308		} else {
   2309			dev_info(adev->dev, "MEM ECC is not presented.\n");
   2310		}
   2311
   2312		if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
   2313			dev_info(adev->dev, "SRAM ECC is active.\n");
   2314			if (!amdgpu_sriov_vf(adev)) {
   2315				adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
   2316							    1 << AMDGPU_RAS_BLOCK__DF);
   2317
   2318				if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0))
   2319					adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
   2320							1 << AMDGPU_RAS_BLOCK__JPEG);
   2321				else
   2322					adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
   2323							1 << AMDGPU_RAS_BLOCK__JPEG);
   2324			} else {
   2325				adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF |
   2326								1 << AMDGPU_RAS_BLOCK__SDMA |
   2327								1 << AMDGPU_RAS_BLOCK__GFX);
   2328			}
   2329		} else {
   2330			dev_info(adev->dev, "SRAM ECC is not presented.\n");
   2331		}
   2332	} else {
   2333		/* driver only manages a few IP blocks RAS feature
   2334		 * when GPU is connected cpu through XGMI */
   2335		adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX |
   2336					   1 << AMDGPU_RAS_BLOCK__SDMA |
   2337					   1 << AMDGPU_RAS_BLOCK__MMHUB);
   2338	}
   2339
   2340	amdgpu_ras_get_quirks(adev);
   2341
   2342	/* hw_supported needs to be aligned with RAS block mask. */
   2343	adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK;
   2344
   2345	adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
   2346		adev->ras_hw_enabled & amdgpu_ras_mask;
   2347}
   2348
   2349static void amdgpu_ras_counte_dw(struct work_struct *work)
   2350{
   2351	struct amdgpu_ras *con = container_of(work, struct amdgpu_ras,
   2352					      ras_counte_delay_work.work);
   2353	struct amdgpu_device *adev = con->adev;
   2354	struct drm_device *dev = adev_to_drm(adev);
   2355	unsigned long ce_count, ue_count;
   2356	int res;
   2357
   2358	res = pm_runtime_get_sync(dev->dev);
   2359	if (res < 0)
   2360		goto Out;
   2361
   2362	/* Cache new values.
   2363	 */
   2364	if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count) == 0) {
   2365		atomic_set(&con->ras_ce_count, ce_count);
   2366		atomic_set(&con->ras_ue_count, ue_count);
   2367	}
   2368
   2369	pm_runtime_mark_last_busy(dev->dev);
   2370Out:
   2371	pm_runtime_put_autosuspend(dev->dev);
   2372}
   2373
   2374int amdgpu_ras_init(struct amdgpu_device *adev)
   2375{
   2376	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   2377	int r;
   2378	bool df_poison, umc_poison;
   2379
   2380	if (con)
   2381		return 0;
   2382
   2383	con = kmalloc(sizeof(struct amdgpu_ras) +
   2384			sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT +
   2385			sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT,
   2386			GFP_KERNEL|__GFP_ZERO);
   2387	if (!con)
   2388		return -ENOMEM;
   2389
   2390	con->adev = adev;
   2391	INIT_DELAYED_WORK(&con->ras_counte_delay_work, amdgpu_ras_counte_dw);
   2392	atomic_set(&con->ras_ce_count, 0);
   2393	atomic_set(&con->ras_ue_count, 0);
   2394
   2395	con->objs = (struct ras_manager *)(con + 1);
   2396
   2397	amdgpu_ras_set_context(adev, con);
   2398
   2399	amdgpu_ras_check_supported(adev);
   2400
   2401	if (!adev->ras_enabled || adev->asic_type == CHIP_VEGA10) {
   2402		/* set gfx block ras context feature for VEGA20 Gaming
   2403		 * send ras disable cmd to ras ta during ras late init.
   2404		 */
   2405		if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) {
   2406			con->features |= BIT(AMDGPU_RAS_BLOCK__GFX);
   2407
   2408			return 0;
   2409		}
   2410
   2411		r = 0;
   2412		goto release_con;
   2413	}
   2414
   2415	con->update_channel_flag = false;
   2416	con->features = 0;
   2417	INIT_LIST_HEAD(&con->head);
   2418	/* Might need get this flag from vbios. */
   2419	con->flags = RAS_DEFAULT_FLAGS;
   2420
   2421	/* initialize nbio ras function ahead of any other
   2422	 * ras functions so hardware fatal error interrupt
   2423	 * can be enabled as early as possible */
   2424	switch (adev->asic_type) {
   2425	case CHIP_VEGA20:
   2426	case CHIP_ARCTURUS:
   2427	case CHIP_ALDEBARAN:
   2428		if (!adev->gmc.xgmi.connected_to_cpu) {
   2429			adev->nbio.ras = &nbio_v7_4_ras;
   2430			amdgpu_ras_register_ras_block(adev, &adev->nbio.ras->ras_block);
   2431			adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm;
   2432		}
   2433		break;
   2434	default:
   2435		/* nbio ras is not available */
   2436		break;
   2437	}
   2438
   2439	if (adev->nbio.ras &&
   2440	    adev->nbio.ras->init_ras_controller_interrupt) {
   2441		r = adev->nbio.ras->init_ras_controller_interrupt(adev);
   2442		if (r)
   2443			goto release_con;
   2444	}
   2445
   2446	if (adev->nbio.ras &&
   2447	    adev->nbio.ras->init_ras_err_event_athub_interrupt) {
   2448		r = adev->nbio.ras->init_ras_err_event_athub_interrupt(adev);
   2449		if (r)
   2450			goto release_con;
   2451	}
   2452
   2453	/* Init poison supported flag, the default value is false */
   2454	if (adev->gmc.xgmi.connected_to_cpu) {
   2455		/* enabled by default when GPU is connected to CPU */
   2456		con->poison_supported = true;
   2457	}
   2458	else if (adev->df.funcs &&
   2459	    adev->df.funcs->query_ras_poison_mode &&
   2460	    adev->umc.ras &&
   2461	    adev->umc.ras->query_ras_poison_mode) {
   2462		df_poison =
   2463			adev->df.funcs->query_ras_poison_mode(adev);
   2464		umc_poison =
   2465			adev->umc.ras->query_ras_poison_mode(adev);
   2466		/* Only poison is set in both DF and UMC, we can support it */
   2467		if (df_poison && umc_poison)
   2468			con->poison_supported = true;
   2469		else if (df_poison != umc_poison)
   2470			dev_warn(adev->dev, "Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
   2471					df_poison, umc_poison);
   2472	}
   2473
   2474	if (amdgpu_ras_fs_init(adev)) {
   2475		r = -EINVAL;
   2476		goto release_con;
   2477	}
   2478
   2479	dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
   2480		 "hardware ability[%x] ras_mask[%x]\n",
   2481		 adev->ras_hw_enabled, adev->ras_enabled);
   2482
   2483	return 0;
   2484release_con:
   2485	amdgpu_ras_set_context(adev, NULL);
   2486	kfree(con);
   2487
   2488	return r;
   2489}
   2490
   2491int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev)
   2492{
   2493	if (adev->gmc.xgmi.connected_to_cpu)
   2494		return 1;
   2495	return 0;
   2496}
   2497
   2498static int amdgpu_persistent_edc_harvesting(struct amdgpu_device *adev,
   2499					struct ras_common_if *ras_block)
   2500{
   2501	struct ras_query_if info = {
   2502		.head = *ras_block,
   2503	};
   2504
   2505	if (!amdgpu_persistent_edc_harvesting_supported(adev))
   2506		return 0;
   2507
   2508	if (amdgpu_ras_query_error_status(adev, &info) != 0)
   2509		DRM_WARN("RAS init harvest failure");
   2510
   2511	if (amdgpu_ras_reset_error_status(adev, ras_block->block) != 0)
   2512		DRM_WARN("RAS init harvest reset failure");
   2513
   2514	return 0;
   2515}
   2516
   2517bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev)
   2518{
   2519       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   2520
   2521       if (!con)
   2522               return false;
   2523
   2524       return con->poison_supported;
   2525}
   2526
   2527/* helper function to handle common stuff in ip late init phase */
   2528int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
   2529			 struct ras_common_if *ras_block)
   2530{
   2531	struct amdgpu_ras_block_object *ras_obj = NULL;
   2532	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   2533	unsigned long ue_count, ce_count;
   2534	int r;
   2535
   2536	/* disable RAS feature per IP block if it is not supported */
   2537	if (!amdgpu_ras_is_supported(adev, ras_block->block)) {
   2538		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
   2539		return 0;
   2540	}
   2541
   2542	r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1);
   2543	if (r) {
   2544		if (adev->in_suspend || amdgpu_in_reset(adev)) {
   2545			/* in resume phase, if fail to enable ras,
   2546			 * clean up all ras fs nodes, and disable ras */
   2547			goto cleanup;
   2548		} else
   2549			return r;
   2550	}
   2551
   2552	/* check for errors on warm reset edc persisant supported ASIC */
   2553	amdgpu_persistent_edc_harvesting(adev, ras_block);
   2554
   2555	/* in resume phase, no need to create ras fs node */
   2556	if (adev->in_suspend || amdgpu_in_reset(adev))
   2557		return 0;
   2558
   2559	ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
   2560	if (ras_obj->ras_cb || (ras_obj->hw_ops &&
   2561	    (ras_obj->hw_ops->query_poison_status ||
   2562	    ras_obj->hw_ops->handle_poison_consumption))) {
   2563		r = amdgpu_ras_interrupt_add_handler(adev, ras_block);
   2564		if (r)
   2565			goto cleanup;
   2566	}
   2567
   2568	r = amdgpu_ras_sysfs_create(adev, ras_block);
   2569	if (r)
   2570		goto interrupt;
   2571
   2572	/* Those are the cached values at init.
   2573	 */
   2574	if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count) == 0) {
   2575		atomic_set(&con->ras_ce_count, ce_count);
   2576		atomic_set(&con->ras_ue_count, ue_count);
   2577	}
   2578
   2579	return 0;
   2580
   2581interrupt:
   2582	if (ras_obj->ras_cb)
   2583		amdgpu_ras_interrupt_remove_handler(adev, ras_block);
   2584cleanup:
   2585	amdgpu_ras_feature_enable(adev, ras_block, 0);
   2586	return r;
   2587}
   2588
   2589static int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev,
   2590			 struct ras_common_if *ras_block)
   2591{
   2592	return amdgpu_ras_block_late_init(adev, ras_block);
   2593}
   2594
   2595/* helper function to remove ras fs node and interrupt handler */
   2596void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
   2597			  struct ras_common_if *ras_block)
   2598{
   2599	struct amdgpu_ras_block_object *ras_obj;
   2600	if (!ras_block)
   2601		return;
   2602
   2603	amdgpu_ras_sysfs_remove(adev, ras_block);
   2604
   2605	ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
   2606	if (ras_obj->ras_cb)
   2607		amdgpu_ras_interrupt_remove_handler(adev, ras_block);
   2608}
   2609
   2610static void amdgpu_ras_block_late_fini_default(struct amdgpu_device *adev,
   2611			  struct ras_common_if *ras_block)
   2612{
   2613	return amdgpu_ras_block_late_fini(adev, ras_block);
   2614}
   2615
   2616/* do some init work after IP late init as dependence.
   2617 * and it runs in resume/gpu reset/booting up cases.
   2618 */
   2619void amdgpu_ras_resume(struct amdgpu_device *adev)
   2620{
   2621	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   2622	struct ras_manager *obj, *tmp;
   2623
   2624	if (!adev->ras_enabled || !con) {
   2625		/* clean ras context for VEGA20 Gaming after send ras disable cmd */
   2626		amdgpu_release_ras_context(adev);
   2627
   2628		return;
   2629	}
   2630
   2631	if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
   2632		/* Set up all other IPs which are not implemented. There is a
   2633		 * tricky thing that IP's actual ras error type should be
   2634		 * MULTI_UNCORRECTABLE, but as driver does not handle it, so
   2635		 * ERROR_NONE make sense anyway.
   2636		 */
   2637		amdgpu_ras_enable_all_features(adev, 1);
   2638
   2639		/* We enable ras on all hw_supported block, but as boot
   2640		 * parameter might disable some of them and one or more IP has
   2641		 * not implemented yet. So we disable them on behalf.
   2642		 */
   2643		list_for_each_entry_safe(obj, tmp, &con->head, node) {
   2644			if (!amdgpu_ras_is_supported(adev, obj->head.block)) {
   2645				amdgpu_ras_feature_enable(adev, &obj->head, 0);
   2646				/* there should be no any reference. */
   2647				WARN_ON(alive_obj(obj));
   2648			}
   2649		}
   2650	}
   2651}
   2652
   2653void amdgpu_ras_suspend(struct amdgpu_device *adev)
   2654{
   2655	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   2656
   2657	if (!adev->ras_enabled || !con)
   2658		return;
   2659
   2660	amdgpu_ras_disable_all_features(adev, 0);
   2661	/* Make sure all ras objects are disabled. */
   2662	if (con->features)
   2663		amdgpu_ras_disable_all_features(adev, 1);
   2664}
   2665
   2666int amdgpu_ras_late_init(struct amdgpu_device *adev)
   2667{
   2668	struct amdgpu_ras_block_list *node, *tmp;
   2669	struct amdgpu_ras_block_object *obj;
   2670	int r;
   2671
   2672	/* Guest side doesn't need init ras feature */
   2673	if (amdgpu_sriov_vf(adev))
   2674		return 0;
   2675
   2676	list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
   2677		if (!node->ras_obj) {
   2678			dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
   2679			continue;
   2680		}
   2681
   2682		obj = node->ras_obj;
   2683		if (obj->ras_late_init) {
   2684			r = obj->ras_late_init(adev, &obj->ras_comm);
   2685			if (r) {
   2686				dev_err(adev->dev, "%s failed to execute ras_late_init! ret:%d\n",
   2687					obj->ras_comm.name, r);
   2688				return r;
   2689			}
   2690		} else
   2691			amdgpu_ras_block_late_init_default(adev, &obj->ras_comm);
   2692	}
   2693
   2694	return 0;
   2695}
   2696
   2697/* do some fini work before IP fini as dependence */
   2698int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
   2699{
   2700	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   2701
   2702	if (!adev->ras_enabled || !con)
   2703		return 0;
   2704
   2705
   2706	/* Need disable ras on all IPs here before ip [hw/sw]fini */
   2707	amdgpu_ras_disable_all_features(adev, 0);
   2708	amdgpu_ras_recovery_fini(adev);
   2709	return 0;
   2710}
   2711
   2712int amdgpu_ras_fini(struct amdgpu_device *adev)
   2713{
   2714	struct amdgpu_ras_block_list *ras_node, *tmp;
   2715	struct amdgpu_ras_block_object *obj = NULL;
   2716	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   2717
   2718	if (!adev->ras_enabled || !con)
   2719		return 0;
   2720
   2721	list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) {
   2722		if (ras_node->ras_obj) {
   2723			obj = ras_node->ras_obj;
   2724			if (amdgpu_ras_is_supported(adev, obj->ras_comm.block) &&
   2725			    obj->ras_fini)
   2726				obj->ras_fini(adev, &obj->ras_comm);
   2727			else
   2728				amdgpu_ras_block_late_fini_default(adev, &obj->ras_comm);
   2729		}
   2730
   2731		/* Clear ras blocks from ras_list and free ras block list node */
   2732		list_del(&ras_node->node);
   2733		kfree(ras_node);
   2734	}
   2735
   2736	amdgpu_ras_fs_fini(adev);
   2737	amdgpu_ras_interrupt_remove_all(adev);
   2738
   2739	WARN(con->features, "Feature mask is not cleared");
   2740
   2741	if (con->features)
   2742		amdgpu_ras_disable_all_features(adev, 1);
   2743
   2744	cancel_delayed_work_sync(&con->ras_counte_delay_work);
   2745
   2746	amdgpu_ras_set_context(adev, NULL);
   2747	kfree(con);
   2748
   2749	return 0;
   2750}
   2751
   2752void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
   2753{
   2754	amdgpu_ras_check_supported(adev);
   2755	if (!adev->ras_hw_enabled)
   2756		return;
   2757
   2758	if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
   2759		dev_info(adev->dev, "uncorrectable hardware error"
   2760			"(ERREVENT_ATHUB_INTERRUPT) detected!\n");
   2761
   2762		amdgpu_ras_reset_gpu(adev);
   2763	}
   2764}
   2765
   2766bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev)
   2767{
   2768	if (adev->asic_type == CHIP_VEGA20 &&
   2769	    adev->pm.fw_version <= 0x283400) {
   2770		return !(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) &&
   2771				amdgpu_ras_intr_triggered();
   2772	}
   2773
   2774	return false;
   2775}
   2776
   2777void amdgpu_release_ras_context(struct amdgpu_device *adev)
   2778{
   2779	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
   2780
   2781	if (!con)
   2782		return;
   2783
   2784	if (!adev->ras_enabled && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
   2785		con->features &= ~BIT(AMDGPU_RAS_BLOCK__GFX);
   2786		amdgpu_ras_set_context(adev, NULL);
   2787		kfree(con);
   2788	}
   2789}
   2790
   2791#ifdef CONFIG_X86_MCE_AMD
   2792static struct amdgpu_device *find_adev(uint32_t node_id)
   2793{
   2794	int i;
   2795	struct amdgpu_device *adev = NULL;
   2796
   2797	for (i = 0; i < mce_adev_list.num_gpu; i++) {
   2798		adev = mce_adev_list.devs[i];
   2799
   2800		if (adev && adev->gmc.xgmi.connected_to_cpu &&
   2801		    adev->gmc.xgmi.physical_node_id == node_id)
   2802			break;
   2803		adev = NULL;
   2804	}
   2805
   2806	return adev;
   2807}
   2808
   2809#define GET_MCA_IPID_GPUID(m)	(((m) >> 44) & 0xF)
   2810#define GET_UMC_INST(m)		(((m) >> 21) & 0x7)
   2811#define GET_CHAN_INDEX(m)	((((m) >> 12) & 0x3) | (((m) >> 18) & 0x4))
   2812#define GPU_ID_OFFSET		8
   2813
   2814static int amdgpu_bad_page_notifier(struct notifier_block *nb,
   2815				    unsigned long val, void *data)
   2816{
   2817	struct mce *m = (struct mce *)data;
   2818	struct amdgpu_device *adev = NULL;
   2819	uint32_t gpu_id = 0;
   2820	uint32_t umc_inst = 0;
   2821	uint32_t ch_inst, channel_index = 0;
   2822	struct ras_err_data err_data = {0, 0, 0, NULL};
   2823	struct eeprom_table_record err_rec;
   2824	uint64_t retired_page;
   2825
   2826	/*
   2827	 * If the error was generated in UMC_V2, which belongs to GPU UMCs,
   2828	 * and error occurred in DramECC (Extended error code = 0) then only
   2829	 * process the error, else bail out.
   2830	 */
   2831	if (!m || !((smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC_V2) &&
   2832		    (XEC(m->status, 0x3f) == 0x0)))
   2833		return NOTIFY_DONE;
   2834
   2835	/*
   2836	 * If it is correctable error, return.
   2837	 */
   2838	if (mce_is_correctable(m))
   2839		return NOTIFY_OK;
   2840
   2841	/*
   2842	 * GPU Id is offset by GPU_ID_OFFSET in MCA_IPID_UMC register.
   2843	 */
   2844	gpu_id = GET_MCA_IPID_GPUID(m->ipid) - GPU_ID_OFFSET;
   2845
   2846	adev = find_adev(gpu_id);
   2847	if (!adev) {
   2848		DRM_WARN("%s: Unable to find adev for gpu_id: %d\n", __func__,
   2849								gpu_id);
   2850		return NOTIFY_DONE;
   2851	}
   2852
   2853	/*
   2854	 * If it is uncorrectable error, then find out UMC instance and
   2855	 * channel index.
   2856	 */
   2857	umc_inst = GET_UMC_INST(m->ipid);
   2858	ch_inst = GET_CHAN_INDEX(m->ipid);
   2859
   2860	dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
   2861			     umc_inst, ch_inst);
   2862
   2863	/*
   2864	 * Translate UMC channel address to Physical address
   2865	 */
   2866	channel_index =
   2867		adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num
   2868					  + ch_inst];
   2869
   2870	retired_page = ADDR_OF_8KB_BLOCK(m->addr) |
   2871			ADDR_OF_256B_BLOCK(channel_index) |
   2872			OFFSET_IN_256B_BLOCK(m->addr);
   2873
   2874	memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
   2875	err_data.err_addr = &err_rec;
   2876	amdgpu_umc_fill_error_record(&err_data, m->addr,
   2877			retired_page, channel_index, umc_inst);
   2878
   2879	if (amdgpu_bad_page_threshold != 0) {
   2880		amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
   2881						err_data.err_addr_cnt);
   2882		amdgpu_ras_save_bad_pages(adev);
   2883	}
   2884
   2885	return NOTIFY_OK;
   2886}
   2887
   2888static struct notifier_block amdgpu_bad_page_nb = {
   2889	.notifier_call  = amdgpu_bad_page_notifier,
   2890	.priority       = MCE_PRIO_UC,
   2891};
   2892
   2893static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev)
   2894{
   2895	/*
   2896	 * Add the adev to the mce_adev_list.
   2897	 * During mode2 reset, amdgpu device is temporarily
   2898	 * removed from the mgpu_info list which can cause
   2899	 * page retirement to fail.
   2900	 * Use this list instead of mgpu_info to find the amdgpu
   2901	 * device on which the UMC error was reported.
   2902	 */
   2903	mce_adev_list.devs[mce_adev_list.num_gpu++] = adev;
   2904
   2905	/*
   2906	 * Register the x86 notifier only once
   2907	 * with MCE subsystem.
   2908	 */
   2909	if (notifier_registered == false) {
   2910		mce_register_decode_chain(&amdgpu_bad_page_nb);
   2911		notifier_registered = true;
   2912	}
   2913}
   2914#endif
   2915
   2916struct amdgpu_ras *amdgpu_ras_get_context(struct amdgpu_device *adev)
   2917{
   2918	if (!adev)
   2919		return NULL;
   2920
   2921	return adev->psp.ras_context.ras;
   2922}
   2923
   2924int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con)
   2925{
   2926	if (!adev)
   2927		return -EINVAL;
   2928
   2929	adev->psp.ras_context.ras = ras_con;
   2930	return 0;
   2931}
   2932
   2933/* check if ras is supported on block, say, sdma, gfx */
   2934int amdgpu_ras_is_supported(struct amdgpu_device *adev,
   2935		unsigned int block)
   2936{
   2937	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
   2938
   2939	if (block >= AMDGPU_RAS_BLOCK_COUNT)
   2940		return 0;
   2941	return ras && (adev->ras_enabled & (1 << block));
   2942}
   2943
   2944int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
   2945{
   2946	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
   2947
   2948	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
   2949		schedule_work(&ras->recovery_work);
   2950	return 0;
   2951}
   2952
   2953
   2954/* Register each ip ras block into amdgpu ras */
   2955int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
   2956		struct amdgpu_ras_block_object *ras_block_obj)
   2957{
   2958	struct amdgpu_ras_block_list *ras_node;
   2959	if (!adev || !ras_block_obj)
   2960		return -EINVAL;
   2961
   2962	if (!amdgpu_ras_asic_supported(adev))
   2963		return 0;
   2964
   2965	ras_node = kzalloc(sizeof(*ras_node), GFP_KERNEL);
   2966	if (!ras_node)
   2967		return -ENOMEM;
   2968
   2969	INIT_LIST_HEAD(&ras_node->node);
   2970	ras_node->ras_obj = ras_block_obj;
   2971	list_add_tail(&ras_node->node, &adev->ras_list);
   2972
   2973	return 0;
   2974}