al_mc_edac.c (9595B)
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 */ 5#include <linux/bitfield.h> 6#include <linux/bitops.h> 7#include <linux/edac.h> 8#include <linux/of_irq.h> 9#include <linux/platform_device.h> 10#include <linux/spinlock.h> 11#include "edac_module.h" 12 13/* Registers Offset */ 14#define AL_MC_ECC_CFG 0x70 15#define AL_MC_ECC_CLEAR 0x7c 16#define AL_MC_ECC_ERR_COUNT 0x80 17#define AL_MC_ECC_CE_ADDR0 0x84 18#define AL_MC_ECC_CE_ADDR1 0x88 19#define AL_MC_ECC_UE_ADDR0 0xa4 20#define AL_MC_ECC_UE_ADDR1 0xa8 21#define AL_MC_ECC_CE_SYND0 0x8c 22#define AL_MC_ECC_CE_SYND1 0x90 23#define AL_MC_ECC_CE_SYND2 0x94 24#define AL_MC_ECC_UE_SYND0 0xac 25#define AL_MC_ECC_UE_SYND1 0xb0 26#define AL_MC_ECC_UE_SYND2 0xb4 27 28/* Registers Fields */ 29#define AL_MC_ECC_CFG_SCRUB_DISABLED BIT(4) 30 31#define AL_MC_ECC_CLEAR_UE_COUNT BIT(3) 32#define AL_MC_ECC_CLEAR_CE_COUNT BIT(2) 33#define AL_MC_ECC_CLEAR_UE_ERR BIT(1) 34#define AL_MC_ECC_CLEAR_CE_ERR BIT(0) 35 36#define AL_MC_ECC_ERR_COUNT_UE GENMASK(31, 16) 37#define AL_MC_ECC_ERR_COUNT_CE GENMASK(15, 0) 38 39#define AL_MC_ECC_CE_ADDR0_RANK GENMASK(25, 24) 40#define AL_MC_ECC_CE_ADDR0_ROW GENMASK(17, 0) 41 42#define AL_MC_ECC_CE_ADDR1_BG GENMASK(25, 24) 43#define AL_MC_ECC_CE_ADDR1_BANK GENMASK(18, 16) 44#define AL_MC_ECC_CE_ADDR1_COLUMN GENMASK(11, 0) 45 46#define AL_MC_ECC_UE_ADDR0_RANK GENMASK(25, 24) 47#define AL_MC_ECC_UE_ADDR0_ROW GENMASK(17, 0) 48 49#define AL_MC_ECC_UE_ADDR1_BG GENMASK(25, 24) 50#define AL_MC_ECC_UE_ADDR1_BANK GENMASK(18, 16) 51#define AL_MC_ECC_UE_ADDR1_COLUMN GENMASK(11, 0) 52 53#define DRV_NAME "al_mc_edac" 54#define AL_MC_EDAC_MSG_MAX 256 55 56struct al_mc_edac { 57 void __iomem *mmio_base; 58 spinlock_t lock; 59 int irq_ce; 60 int irq_ue; 61}; 62 63static void prepare_msg(char *message, size_t buffer_size, 64 enum hw_event_mc_err_type type, 65 u8 rank, u32 row, u8 bg, u8 bank, u16 column, 66 u32 syn0, u32 syn1, u32 syn2) 67{ 68 snprintf(message, buffer_size, 69 "%s rank=0x%x row=0x%x bg=0x%x bank=0x%x col=0x%x syn0: 0x%x syn1: 0x%x syn2: 0x%x", 70 type == HW_EVENT_ERR_UNCORRECTED ? "UE" : "CE", 71 rank, row, bg, bank, column, syn0, syn1, syn2); 72} 73 74static int handle_ce(struct mem_ctl_info *mci) 75{ 76 u32 eccerrcnt, ecccaddr0, ecccaddr1, ecccsyn0, ecccsyn1, ecccsyn2, row; 77 struct al_mc_edac *al_mc = mci->pvt_info; 78 char msg[AL_MC_EDAC_MSG_MAX]; 79 u16 ce_count, column; 80 unsigned long flags; 81 u8 rank, bg, bank; 82 83 eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT); 84 ce_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_CE, eccerrcnt); 85 if (!ce_count) 86 return 0; 87 88 ecccaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR0); 89 ecccaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR1); 90 ecccsyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND0); 91 ecccsyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND1); 92 ecccsyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND2); 93 94 writel_relaxed(AL_MC_ECC_CLEAR_CE_COUNT | AL_MC_ECC_CLEAR_CE_ERR, 95 al_mc->mmio_base + AL_MC_ECC_CLEAR); 96 97 dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n", 98 ecccaddr0, ecccaddr1); 99 100 rank = FIELD_GET(AL_MC_ECC_CE_ADDR0_RANK, ecccaddr0); 101 row = FIELD_GET(AL_MC_ECC_CE_ADDR0_ROW, ecccaddr0); 102 103 bg = FIELD_GET(AL_MC_ECC_CE_ADDR1_BG, ecccaddr1); 104 bank = FIELD_GET(AL_MC_ECC_CE_ADDR1_BANK, ecccaddr1); 105 column = FIELD_GET(AL_MC_ECC_CE_ADDR1_COLUMN, ecccaddr1); 106 107 prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_CORRECTED, 108 rank, row, bg, bank, column, 109 ecccsyn0, ecccsyn1, ecccsyn2); 110 111 spin_lock_irqsave(&al_mc->lock, flags); 112 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 113 ce_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg); 114 spin_unlock_irqrestore(&al_mc->lock, flags); 115 116 return ce_count; 117} 118 119static int handle_ue(struct mem_ctl_info *mci) 120{ 121 u32 eccerrcnt, eccuaddr0, eccuaddr1, eccusyn0, eccusyn1, eccusyn2, row; 122 struct al_mc_edac *al_mc = mci->pvt_info; 123 char msg[AL_MC_EDAC_MSG_MAX]; 124 u16 ue_count, column; 125 unsigned long flags; 126 u8 rank, bg, bank; 127 128 eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT); 129 ue_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_UE, eccerrcnt); 130 if (!ue_count) 131 return 0; 132 133 eccuaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR0); 134 eccuaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR1); 135 eccusyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND0); 136 eccusyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND1); 137 eccusyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND2); 138 139 writel_relaxed(AL_MC_ECC_CLEAR_UE_COUNT | AL_MC_ECC_CLEAR_UE_ERR, 140 al_mc->mmio_base + AL_MC_ECC_CLEAR); 141 142 dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n", 143 eccuaddr0, eccuaddr1); 144 145 rank = FIELD_GET(AL_MC_ECC_UE_ADDR0_RANK, eccuaddr0); 146 row = FIELD_GET(AL_MC_ECC_UE_ADDR0_ROW, eccuaddr0); 147 148 bg = FIELD_GET(AL_MC_ECC_UE_ADDR1_BG, eccuaddr1); 149 bank = FIELD_GET(AL_MC_ECC_UE_ADDR1_BANK, eccuaddr1); 150 column = FIELD_GET(AL_MC_ECC_UE_ADDR1_COLUMN, eccuaddr1); 151 152 prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_UNCORRECTED, 153 rank, row, bg, bank, column, 154 eccusyn0, eccusyn1, eccusyn2); 155 156 spin_lock_irqsave(&al_mc->lock, flags); 157 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 158 ue_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg); 159 spin_unlock_irqrestore(&al_mc->lock, flags); 160 161 return ue_count; 162} 163 164static void al_mc_edac_check(struct mem_ctl_info *mci) 165{ 166 struct al_mc_edac *al_mc = mci->pvt_info; 167 168 if (al_mc->irq_ue <= 0) 169 handle_ue(mci); 170 171 if (al_mc->irq_ce <= 0) 172 handle_ce(mci); 173} 174 175static irqreturn_t al_mc_edac_irq_handler_ue(int irq, void *info) 176{ 177 struct platform_device *pdev = info; 178 struct mem_ctl_info *mci = platform_get_drvdata(pdev); 179 180 if (handle_ue(mci)) 181 return IRQ_HANDLED; 182 return IRQ_NONE; 183} 184 185static irqreturn_t al_mc_edac_irq_handler_ce(int irq, void *info) 186{ 187 struct platform_device *pdev = info; 188 struct mem_ctl_info *mci = platform_get_drvdata(pdev); 189 190 if (handle_ce(mci)) 191 return IRQ_HANDLED; 192 return IRQ_NONE; 193} 194 195static enum scrub_type get_scrub_mode(void __iomem *mmio_base) 196{ 197 u32 ecccfg0; 198 199 ecccfg0 = readl(mmio_base + AL_MC_ECC_CFG); 200 201 if (FIELD_GET(AL_MC_ECC_CFG_SCRUB_DISABLED, ecccfg0)) 202 return SCRUB_NONE; 203 else 204 return SCRUB_HW_SRC; 205} 206 207static void devm_al_mc_edac_free(void *data) 208{ 209 edac_mc_free(data); 210} 211 212static void devm_al_mc_edac_del(void *data) 213{ 214 edac_mc_del_mc(data); 215} 216 217static int al_mc_edac_probe(struct platform_device *pdev) 218{ 219 struct edac_mc_layer layers[1]; 220 struct mem_ctl_info *mci; 221 struct al_mc_edac *al_mc; 222 void __iomem *mmio_base; 223 struct dimm_info *dimm; 224 int ret; 225 226 mmio_base = devm_platform_ioremap_resource(pdev, 0); 227 if (IS_ERR(mmio_base)) { 228 dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n", 229 PTR_ERR(mmio_base)); 230 return PTR_ERR(mmio_base); 231 } 232 233 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; 234 layers[0].size = 1; 235 layers[0].is_virt_csrow = false; 236 mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 237 sizeof(struct al_mc_edac)); 238 if (!mci) 239 return -ENOMEM; 240 241 ret = devm_add_action_or_reset(&pdev->dev, devm_al_mc_edac_free, mci); 242 if (ret) 243 return ret; 244 245 platform_set_drvdata(pdev, mci); 246 al_mc = mci->pvt_info; 247 248 al_mc->mmio_base = mmio_base; 249 250 al_mc->irq_ue = of_irq_get_byname(pdev->dev.of_node, "ue"); 251 if (al_mc->irq_ue <= 0) 252 dev_dbg(&pdev->dev, 253 "no IRQ defined for UE - falling back to polling\n"); 254 255 al_mc->irq_ce = of_irq_get_byname(pdev->dev.of_node, "ce"); 256 if (al_mc->irq_ce <= 0) 257 dev_dbg(&pdev->dev, 258 "no IRQ defined for CE - falling back to polling\n"); 259 260 /* 261 * In case both interrupts (ue/ce) are to be found, use interrupt mode. 262 * In case none of the interrupt are foud, use polling mode. 263 * In case only one interrupt is found, use interrupt mode for it but 264 * keep polling mode enable for the other. 265 */ 266 if (al_mc->irq_ue <= 0 || al_mc->irq_ce <= 0) { 267 edac_op_state = EDAC_OPSTATE_POLL; 268 mci->edac_check = al_mc_edac_check; 269 } else { 270 edac_op_state = EDAC_OPSTATE_INT; 271 } 272 273 spin_lock_init(&al_mc->lock); 274 275 mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4; 276 mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; 277 mci->edac_cap = EDAC_FLAG_SECDED; 278 mci->mod_name = DRV_NAME; 279 mci->ctl_name = "al_mc"; 280 mci->pdev = &pdev->dev; 281 mci->scrub_mode = get_scrub_mode(mmio_base); 282 283 dimm = *mci->dimms; 284 dimm->grain = 1; 285 286 ret = edac_mc_add_mc(mci); 287 if (ret < 0) { 288 dev_err(&pdev->dev, 289 "fail to add memory controller device (%d)\n", 290 ret); 291 return ret; 292 } 293 294 ret = devm_add_action_or_reset(&pdev->dev, devm_al_mc_edac_del, &pdev->dev); 295 if (ret) 296 return ret; 297 298 if (al_mc->irq_ue > 0) { 299 ret = devm_request_irq(&pdev->dev, 300 al_mc->irq_ue, 301 al_mc_edac_irq_handler_ue, 302 IRQF_SHARED, 303 pdev->name, 304 pdev); 305 if (ret != 0) { 306 dev_err(&pdev->dev, 307 "failed to request UE IRQ %d (%d)\n", 308 al_mc->irq_ue, ret); 309 return ret; 310 } 311 } 312 313 if (al_mc->irq_ce > 0) { 314 ret = devm_request_irq(&pdev->dev, 315 al_mc->irq_ce, 316 al_mc_edac_irq_handler_ce, 317 IRQF_SHARED, 318 pdev->name, 319 pdev); 320 if (ret != 0) { 321 dev_err(&pdev->dev, 322 "failed to request CE IRQ %d (%d)\n", 323 al_mc->irq_ce, ret); 324 return ret; 325 } 326 } 327 328 return 0; 329} 330 331static const struct of_device_id al_mc_edac_of_match[] = { 332 { .compatible = "amazon,al-mc-edac", }, 333 {}, 334}; 335 336MODULE_DEVICE_TABLE(of, al_mc_edac_of_match); 337 338static struct platform_driver al_mc_edac_driver = { 339 .probe = al_mc_edac_probe, 340 .driver = { 341 .name = DRV_NAME, 342 .of_match_table = al_mc_edac_of_match, 343 }, 344}; 345 346module_platform_driver(al_mc_edac_driver); 347 348MODULE_LICENSE("GPL v2"); 349MODULE_AUTHOR("Talel Shenhar"); 350MODULE_DESCRIPTION("Amazon's Annapurna Lab's Memory Controller EDAC Driver");