cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

al_mc_edac.c (9595B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
      4 */
      5#include <linux/bitfield.h>
      6#include <linux/bitops.h>
      7#include <linux/edac.h>
      8#include <linux/of_irq.h>
      9#include <linux/platform_device.h>
     10#include <linux/spinlock.h>
     11#include "edac_module.h"
     12
     13/* Registers Offset */
     14#define AL_MC_ECC_CFG		0x70
     15#define AL_MC_ECC_CLEAR		0x7c
     16#define AL_MC_ECC_ERR_COUNT	0x80
     17#define AL_MC_ECC_CE_ADDR0	0x84
     18#define AL_MC_ECC_CE_ADDR1	0x88
     19#define AL_MC_ECC_UE_ADDR0	0xa4
     20#define AL_MC_ECC_UE_ADDR1	0xa8
     21#define AL_MC_ECC_CE_SYND0	0x8c
     22#define AL_MC_ECC_CE_SYND1	0x90
     23#define AL_MC_ECC_CE_SYND2	0x94
     24#define AL_MC_ECC_UE_SYND0	0xac
     25#define AL_MC_ECC_UE_SYND1	0xb0
     26#define AL_MC_ECC_UE_SYND2	0xb4
     27
     28/* Registers Fields */
     29#define AL_MC_ECC_CFG_SCRUB_DISABLED	BIT(4)
     30
     31#define AL_MC_ECC_CLEAR_UE_COUNT	BIT(3)
     32#define AL_MC_ECC_CLEAR_CE_COUNT	BIT(2)
     33#define AL_MC_ECC_CLEAR_UE_ERR		BIT(1)
     34#define AL_MC_ECC_CLEAR_CE_ERR		BIT(0)
     35
     36#define AL_MC_ECC_ERR_COUNT_UE		GENMASK(31, 16)
     37#define AL_MC_ECC_ERR_COUNT_CE		GENMASK(15, 0)
     38
     39#define AL_MC_ECC_CE_ADDR0_RANK		GENMASK(25, 24)
     40#define AL_MC_ECC_CE_ADDR0_ROW		GENMASK(17, 0)
     41
     42#define AL_MC_ECC_CE_ADDR1_BG		GENMASK(25, 24)
     43#define AL_MC_ECC_CE_ADDR1_BANK		GENMASK(18, 16)
     44#define AL_MC_ECC_CE_ADDR1_COLUMN	GENMASK(11, 0)
     45
     46#define AL_MC_ECC_UE_ADDR0_RANK		GENMASK(25, 24)
     47#define AL_MC_ECC_UE_ADDR0_ROW		GENMASK(17, 0)
     48
     49#define AL_MC_ECC_UE_ADDR1_BG		GENMASK(25, 24)
     50#define AL_MC_ECC_UE_ADDR1_BANK		GENMASK(18, 16)
     51#define AL_MC_ECC_UE_ADDR1_COLUMN	GENMASK(11, 0)
     52
     53#define DRV_NAME "al_mc_edac"
     54#define AL_MC_EDAC_MSG_MAX 256
     55
     56struct al_mc_edac {
     57	void __iomem *mmio_base;
     58	spinlock_t lock;
     59	int irq_ce;
     60	int irq_ue;
     61};
     62
     63static void prepare_msg(char *message, size_t buffer_size,
     64			enum hw_event_mc_err_type type,
     65			u8 rank, u32 row, u8 bg, u8 bank, u16 column,
     66			u32 syn0, u32 syn1, u32 syn2)
     67{
     68	snprintf(message, buffer_size,
     69		 "%s rank=0x%x row=0x%x bg=0x%x bank=0x%x col=0x%x syn0: 0x%x syn1: 0x%x syn2: 0x%x",
     70		 type == HW_EVENT_ERR_UNCORRECTED ? "UE" : "CE",
     71		 rank, row, bg, bank, column, syn0, syn1, syn2);
     72}
     73
     74static int handle_ce(struct mem_ctl_info *mci)
     75{
     76	u32 eccerrcnt, ecccaddr0, ecccaddr1, ecccsyn0, ecccsyn1, ecccsyn2, row;
     77	struct al_mc_edac *al_mc = mci->pvt_info;
     78	char msg[AL_MC_EDAC_MSG_MAX];
     79	u16 ce_count, column;
     80	unsigned long flags;
     81	u8 rank, bg, bank;
     82
     83	eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT);
     84	ce_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_CE, eccerrcnt);
     85	if (!ce_count)
     86		return 0;
     87
     88	ecccaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR0);
     89	ecccaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR1);
     90	ecccsyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND0);
     91	ecccsyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND1);
     92	ecccsyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND2);
     93
     94	writel_relaxed(AL_MC_ECC_CLEAR_CE_COUNT | AL_MC_ECC_CLEAR_CE_ERR,
     95		       al_mc->mmio_base + AL_MC_ECC_CLEAR);
     96
     97	dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n",
     98		ecccaddr0, ecccaddr1);
     99
    100	rank = FIELD_GET(AL_MC_ECC_CE_ADDR0_RANK, ecccaddr0);
    101	row = FIELD_GET(AL_MC_ECC_CE_ADDR0_ROW, ecccaddr0);
    102
    103	bg = FIELD_GET(AL_MC_ECC_CE_ADDR1_BG, ecccaddr1);
    104	bank = FIELD_GET(AL_MC_ECC_CE_ADDR1_BANK, ecccaddr1);
    105	column = FIELD_GET(AL_MC_ECC_CE_ADDR1_COLUMN, ecccaddr1);
    106
    107	prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_CORRECTED,
    108		    rank, row, bg, bank, column,
    109		    ecccsyn0, ecccsyn1, ecccsyn2);
    110
    111	spin_lock_irqsave(&al_mc->lock, flags);
    112	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
    113			     ce_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg);
    114	spin_unlock_irqrestore(&al_mc->lock, flags);
    115
    116	return ce_count;
    117}
    118
    119static int handle_ue(struct mem_ctl_info *mci)
    120{
    121	u32 eccerrcnt, eccuaddr0, eccuaddr1, eccusyn0, eccusyn1, eccusyn2, row;
    122	struct al_mc_edac *al_mc = mci->pvt_info;
    123	char msg[AL_MC_EDAC_MSG_MAX];
    124	u16 ue_count, column;
    125	unsigned long flags;
    126	u8 rank, bg, bank;
    127
    128	eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT);
    129	ue_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_UE, eccerrcnt);
    130	if (!ue_count)
    131		return 0;
    132
    133	eccuaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR0);
    134	eccuaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR1);
    135	eccusyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND0);
    136	eccusyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND1);
    137	eccusyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND2);
    138
    139	writel_relaxed(AL_MC_ECC_CLEAR_UE_COUNT | AL_MC_ECC_CLEAR_UE_ERR,
    140		       al_mc->mmio_base + AL_MC_ECC_CLEAR);
    141
    142	dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n",
    143		eccuaddr0, eccuaddr1);
    144
    145	rank = FIELD_GET(AL_MC_ECC_UE_ADDR0_RANK, eccuaddr0);
    146	row = FIELD_GET(AL_MC_ECC_UE_ADDR0_ROW, eccuaddr0);
    147
    148	bg = FIELD_GET(AL_MC_ECC_UE_ADDR1_BG, eccuaddr1);
    149	bank = FIELD_GET(AL_MC_ECC_UE_ADDR1_BANK, eccuaddr1);
    150	column = FIELD_GET(AL_MC_ECC_UE_ADDR1_COLUMN, eccuaddr1);
    151
    152	prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_UNCORRECTED,
    153		    rank, row, bg, bank, column,
    154		    eccusyn0, eccusyn1, eccusyn2);
    155
    156	spin_lock_irqsave(&al_mc->lock, flags);
    157	edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
    158			     ue_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg);
    159	spin_unlock_irqrestore(&al_mc->lock, flags);
    160
    161	return ue_count;
    162}
    163
    164static void al_mc_edac_check(struct mem_ctl_info *mci)
    165{
    166	struct al_mc_edac *al_mc = mci->pvt_info;
    167
    168	if (al_mc->irq_ue <= 0)
    169		handle_ue(mci);
    170
    171	if (al_mc->irq_ce <= 0)
    172		handle_ce(mci);
    173}
    174
    175static irqreturn_t al_mc_edac_irq_handler_ue(int irq, void *info)
    176{
    177	struct platform_device *pdev = info;
    178	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
    179
    180	if (handle_ue(mci))
    181		return IRQ_HANDLED;
    182	return IRQ_NONE;
    183}
    184
    185static irqreturn_t al_mc_edac_irq_handler_ce(int irq, void *info)
    186{
    187	struct platform_device *pdev = info;
    188	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
    189
    190	if (handle_ce(mci))
    191		return IRQ_HANDLED;
    192	return IRQ_NONE;
    193}
    194
    195static enum scrub_type get_scrub_mode(void __iomem *mmio_base)
    196{
    197	u32 ecccfg0;
    198
    199	ecccfg0 = readl(mmio_base + AL_MC_ECC_CFG);
    200
    201	if (FIELD_GET(AL_MC_ECC_CFG_SCRUB_DISABLED, ecccfg0))
    202		return SCRUB_NONE;
    203	else
    204		return SCRUB_HW_SRC;
    205}
    206
    207static void devm_al_mc_edac_free(void *data)
    208{
    209	edac_mc_free(data);
    210}
    211
    212static void devm_al_mc_edac_del(void *data)
    213{
    214	edac_mc_del_mc(data);
    215}
    216
    217static int al_mc_edac_probe(struct platform_device *pdev)
    218{
    219	struct edac_mc_layer layers[1];
    220	struct mem_ctl_info *mci;
    221	struct al_mc_edac *al_mc;
    222	void __iomem *mmio_base;
    223	struct dimm_info *dimm;
    224	int ret;
    225
    226	mmio_base = devm_platform_ioremap_resource(pdev, 0);
    227	if (IS_ERR(mmio_base)) {
    228		dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n",
    229			PTR_ERR(mmio_base));
    230		return PTR_ERR(mmio_base);
    231	}
    232
    233	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
    234	layers[0].size = 1;
    235	layers[0].is_virt_csrow = false;
    236	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
    237			    sizeof(struct al_mc_edac));
    238	if (!mci)
    239		return -ENOMEM;
    240
    241	ret = devm_add_action_or_reset(&pdev->dev, devm_al_mc_edac_free, mci);
    242	if (ret)
    243		return ret;
    244
    245	platform_set_drvdata(pdev, mci);
    246	al_mc = mci->pvt_info;
    247
    248	al_mc->mmio_base = mmio_base;
    249
    250	al_mc->irq_ue = of_irq_get_byname(pdev->dev.of_node, "ue");
    251	if (al_mc->irq_ue <= 0)
    252		dev_dbg(&pdev->dev,
    253			"no IRQ defined for UE - falling back to polling\n");
    254
    255	al_mc->irq_ce = of_irq_get_byname(pdev->dev.of_node, "ce");
    256	if (al_mc->irq_ce <= 0)
    257		dev_dbg(&pdev->dev,
    258			"no IRQ defined for CE - falling back to polling\n");
    259
    260	/*
    261	 * In case both interrupts (ue/ce) are to be found, use interrupt mode.
    262	 * In case none of the interrupt are foud, use polling mode.
    263	 * In case only one interrupt is found, use interrupt mode for it but
    264	 * keep polling mode enable for the other.
    265	 */
    266	if (al_mc->irq_ue <= 0 || al_mc->irq_ce <= 0) {
    267		edac_op_state = EDAC_OPSTATE_POLL;
    268		mci->edac_check = al_mc_edac_check;
    269	} else {
    270		edac_op_state = EDAC_OPSTATE_INT;
    271	}
    272
    273	spin_lock_init(&al_mc->lock);
    274
    275	mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4;
    276	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
    277	mci->edac_cap = EDAC_FLAG_SECDED;
    278	mci->mod_name = DRV_NAME;
    279	mci->ctl_name = "al_mc";
    280	mci->pdev = &pdev->dev;
    281	mci->scrub_mode = get_scrub_mode(mmio_base);
    282
    283	dimm = *mci->dimms;
    284	dimm->grain = 1;
    285
    286	ret = edac_mc_add_mc(mci);
    287	if (ret < 0) {
    288		dev_err(&pdev->dev,
    289			"fail to add memory controller device (%d)\n",
    290			ret);
    291		return ret;
    292	}
    293
    294	ret = devm_add_action_or_reset(&pdev->dev, devm_al_mc_edac_del, &pdev->dev);
    295	if (ret)
    296		return ret;
    297
    298	if (al_mc->irq_ue > 0) {
    299		ret = devm_request_irq(&pdev->dev,
    300				       al_mc->irq_ue,
    301				       al_mc_edac_irq_handler_ue,
    302				       IRQF_SHARED,
    303				       pdev->name,
    304				       pdev);
    305		if (ret != 0) {
    306			dev_err(&pdev->dev,
    307				"failed to request UE IRQ %d (%d)\n",
    308				al_mc->irq_ue, ret);
    309			return ret;
    310		}
    311	}
    312
    313	if (al_mc->irq_ce > 0) {
    314		ret = devm_request_irq(&pdev->dev,
    315				       al_mc->irq_ce,
    316				       al_mc_edac_irq_handler_ce,
    317				       IRQF_SHARED,
    318				       pdev->name,
    319				       pdev);
    320		if (ret != 0) {
    321			dev_err(&pdev->dev,
    322				"failed to request CE IRQ %d (%d)\n",
    323				al_mc->irq_ce, ret);
    324			return ret;
    325		}
    326	}
    327
    328	return 0;
    329}
    330
    331static const struct of_device_id al_mc_edac_of_match[] = {
    332	{ .compatible = "amazon,al-mc-edac", },
    333	{},
    334};
    335
    336MODULE_DEVICE_TABLE(of, al_mc_edac_of_match);
    337
    338static struct platform_driver al_mc_edac_driver = {
    339	.probe = al_mc_edac_probe,
    340	.driver = {
    341		.name = DRV_NAME,
    342		.of_match_table = al_mc_edac_of_match,
    343	},
    344};
    345
    346module_platform_driver(al_mc_edac_driver);
    347
    348MODULE_LICENSE("GPL v2");
    349MODULE_AUTHOR("Talel Shenhar");
    350MODULE_DESCRIPTION("Amazon's Annapurna Lab's Memory Controller EDAC Driver");