ras.c (7769B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Copyright 2006-2008, IBM Corporation. 4 */ 5 6#undef DEBUG 7 8#include <linux/types.h> 9#include <linux/kernel.h> 10#include <linux/slab.h> 11#include <linux/smp.h> 12#include <linux/reboot.h> 13#include <linux/kexec.h> 14#include <linux/crash_dump.h> 15#include <linux/of.h> 16 17#include <asm/kexec.h> 18#include <asm/reg.h> 19#include <asm/io.h> 20#include <asm/machdep.h> 21#include <asm/rtas.h> 22#include <asm/cell-regs.h> 23 24#include "ras.h" 25 26 27static void dump_fir(int cpu) 28{ 29 struct cbe_pmd_regs __iomem *pregs = cbe_get_cpu_pmd_regs(cpu); 30 struct cbe_iic_regs __iomem *iregs = cbe_get_cpu_iic_regs(cpu); 31 32 if (pregs == NULL) 33 return; 34 35 /* Todo: do some nicer parsing of bits and based on them go down 36 * to other sub-units FIRs and not only IIC 37 */ 38 printk(KERN_ERR "Global Checkstop FIR : 0x%016llx\n", 39 in_be64(&pregs->checkstop_fir)); 40 printk(KERN_ERR "Global Recoverable FIR : 0x%016llx\n", 41 in_be64(&pregs->checkstop_fir)); 42 printk(KERN_ERR "Global MachineCheck FIR : 0x%016llx\n", 43 in_be64(&pregs->spec_att_mchk_fir)); 44 45 if (iregs == NULL) 46 return; 47 printk(KERN_ERR "IOC FIR : 0x%016llx\n", 48 in_be64(&iregs->ioc_fir)); 49 50} 51 52DEFINE_INTERRUPT_HANDLER(cbe_system_error_exception) 53{ 54 int cpu = smp_processor_id(); 55 56 printk(KERN_ERR "System Error Interrupt on CPU %d !\n", cpu); 57 dump_fir(cpu); 58 dump_stack(); 59} 60 61DEFINE_INTERRUPT_HANDLER(cbe_maintenance_exception) 62{ 63 int cpu = smp_processor_id(); 64 65 /* 66 * Nothing implemented for the maintenance interrupt at this point 67 */ 68 69 printk(KERN_ERR "Unhandled Maintenance interrupt on CPU %d !\n", cpu); 70 dump_stack(); 71} 72 73DEFINE_INTERRUPT_HANDLER(cbe_thermal_exception) 74{ 75 int cpu = smp_processor_id(); 76 77 /* 78 * Nothing implemented for the thermal interrupt at this point 79 */ 80 81 printk(KERN_ERR "Unhandled Thermal interrupt on CPU %d !\n", cpu); 82 dump_stack(); 83} 84 85static int cbe_machine_check_handler(struct pt_regs *regs) 86{ 87 int cpu = smp_processor_id(); 88 89 printk(KERN_ERR "Machine Check Interrupt on CPU %d !\n", cpu); 90 dump_fir(cpu); 91 92 /* No recovery from this code now, lets continue */ 93 return 0; 94} 95 96struct ptcal_area { 97 struct list_head list; 98 int nid; 99 int order; 100 struct page *pages; 101}; 102 103static LIST_HEAD(ptcal_list); 104 105static int ptcal_start_tok, ptcal_stop_tok; 106 107static int __init cbe_ptcal_enable_on_node(int nid, int order) 108{ 109 struct ptcal_area *area; 110 int ret = -ENOMEM; 111 unsigned long addr; 112 113 if (is_kdump_kernel()) 114 rtas_call(ptcal_stop_tok, 1, 1, NULL, nid); 115 116 area = kmalloc(sizeof(*area), GFP_KERNEL); 117 if (!area) 118 goto out_err; 119 120 area->nid = nid; 121 area->order = order; 122 area->pages = __alloc_pages_node(area->nid, 123 GFP_KERNEL|__GFP_THISNODE, 124 area->order); 125 126 if (!area->pages) { 127 printk(KERN_WARNING "%s: no page on node %d\n", 128 __func__, area->nid); 129 goto out_free_area; 130 } 131 132 /* 133 * We move the ptcal area to the middle of the allocated 134 * page, in order to avoid prefetches in memcpy and similar 135 * functions stepping on it. 136 */ 137 addr = __pa(page_address(area->pages)) + (PAGE_SIZE >> 1); 138 printk(KERN_DEBUG "%s: enabling PTCAL on node %d address=0x%016lx\n", 139 __func__, area->nid, addr); 140 141 ret = -EIO; 142 if (rtas_call(ptcal_start_tok, 3, 1, NULL, area->nid, 143 (unsigned int)(addr >> 32), 144 (unsigned int)(addr & 0xffffffff))) { 145 printk(KERN_ERR "%s: error enabling PTCAL on node %d!\n", 146 __func__, nid); 147 goto out_free_pages; 148 } 149 150 list_add(&area->list, &ptcal_list); 151 152 return 0; 153 154out_free_pages: 155 __free_pages(area->pages, area->order); 156out_free_area: 157 kfree(area); 158out_err: 159 return ret; 160} 161 162static int __init cbe_ptcal_enable(void) 163{ 164 const u32 *size; 165 struct device_node *np; 166 int order, found_mic = 0; 167 168 np = of_find_node_by_path("/rtas"); 169 if (!np) 170 return -ENODEV; 171 172 size = of_get_property(np, "ibm,cbe-ptcal-size", NULL); 173 if (!size) { 174 of_node_put(np); 175 return -ENODEV; 176 } 177 178 pr_debug("%s: enabling PTCAL, size = 0x%x\n", __func__, *size); 179 order = get_order(*size); 180 of_node_put(np); 181 182 /* support for malta device trees, with be@/mic@ nodes */ 183 for_each_node_by_type(np, "mic-tm") { 184 cbe_ptcal_enable_on_node(of_node_to_nid(np), order); 185 found_mic = 1; 186 } 187 188 if (found_mic) 189 return 0; 190 191 /* support for older device tree - use cpu nodes */ 192 for_each_node_by_type(np, "cpu") { 193 const u32 *nid = of_get_property(np, "node-id", NULL); 194 if (!nid) { 195 printk(KERN_ERR "%s: node %pOF is missing node-id?\n", 196 __func__, np); 197 continue; 198 } 199 cbe_ptcal_enable_on_node(*nid, order); 200 found_mic = 1; 201 } 202 203 return found_mic ? 0 : -ENODEV; 204} 205 206static int cbe_ptcal_disable(void) 207{ 208 struct ptcal_area *area, *tmp; 209 int ret = 0; 210 211 pr_debug("%s: disabling PTCAL\n", __func__); 212 213 list_for_each_entry_safe(area, tmp, &ptcal_list, list) { 214 /* disable ptcal on this node */ 215 if (rtas_call(ptcal_stop_tok, 1, 1, NULL, area->nid)) { 216 printk(KERN_ERR "%s: error disabling PTCAL " 217 "on node %d!\n", __func__, 218 area->nid); 219 ret = -EIO; 220 continue; 221 } 222 223 /* ensure we can access the PTCAL area */ 224 memset(page_address(area->pages), 0, 225 1 << (area->order + PAGE_SHIFT)); 226 227 /* clean up */ 228 list_del(&area->list); 229 __free_pages(area->pages, area->order); 230 kfree(area); 231 } 232 233 return ret; 234} 235 236static int cbe_ptcal_notify_reboot(struct notifier_block *nb, 237 unsigned long code, void *data) 238{ 239 return cbe_ptcal_disable(); 240} 241 242static void cbe_ptcal_crash_shutdown(void) 243{ 244 cbe_ptcal_disable(); 245} 246 247static struct notifier_block cbe_ptcal_reboot_notifier = { 248 .notifier_call = cbe_ptcal_notify_reboot 249}; 250 251#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON 252static int sysreset_hack; 253 254static int __init cbe_sysreset_init(void) 255{ 256 struct cbe_pmd_regs __iomem *regs; 257 258 sysreset_hack = of_machine_is_compatible("IBM,CBPLUS-1.0"); 259 if (!sysreset_hack) 260 return 0; 261 262 regs = cbe_get_cpu_pmd_regs(0); 263 if (!regs) 264 return 0; 265 266 /* Enable JTAG system-reset hack */ 267 out_be32(®s->fir_mode_reg, 268 in_be32(®s->fir_mode_reg) | 269 CBE_PMD_FIR_MODE_M8); 270 271 return 0; 272} 273device_initcall(cbe_sysreset_init); 274 275int cbe_sysreset_hack(void) 276{ 277 struct cbe_pmd_regs __iomem *regs; 278 279 /* 280 * The BMC can inject user triggered system reset exceptions, 281 * but cannot set the system reset reason in srr1, 282 * so check an extra register here. 283 */ 284 if (sysreset_hack && (smp_processor_id() == 0)) { 285 regs = cbe_get_cpu_pmd_regs(0); 286 if (!regs) 287 return 0; 288 if (in_be64(®s->ras_esc_0) & 0x0000ffff) { 289 out_be64(®s->ras_esc_0, 0); 290 return 0; 291 } 292 } 293 return 1; 294} 295#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */ 296 297static int __init cbe_ptcal_init(void) 298{ 299 int ret; 300 ptcal_start_tok = rtas_token("ibm,cbe-start-ptcal"); 301 ptcal_stop_tok = rtas_token("ibm,cbe-stop-ptcal"); 302 303 if (ptcal_start_tok == RTAS_UNKNOWN_SERVICE 304 || ptcal_stop_tok == RTAS_UNKNOWN_SERVICE) 305 return -ENODEV; 306 307 ret = register_reboot_notifier(&cbe_ptcal_reboot_notifier); 308 if (ret) 309 goto out1; 310 311 ret = crash_shutdown_register(&cbe_ptcal_crash_shutdown); 312 if (ret) 313 goto out2; 314 315 return cbe_ptcal_enable(); 316 317out2: 318 unregister_reboot_notifier(&cbe_ptcal_reboot_notifier); 319out1: 320 printk(KERN_ERR "Can't disable PTCAL, so not enabling\n"); 321 return ret; 322} 323 324arch_initcall(cbe_ptcal_init); 325 326void __init cbe_ras_init(void) 327{ 328 unsigned long hid0; 329 330 /* 331 * Enable System Error & thermal interrupts and wakeup conditions 332 */ 333 334 hid0 = mfspr(SPRN_HID0); 335 hid0 |= HID0_CBE_THERM_INT_EN | HID0_CBE_THERM_WAKEUP | 336 HID0_CBE_SYSERR_INT_EN | HID0_CBE_SYSERR_WAKEUP; 337 mtspr(SPRN_HID0, hid0); 338 mb(); 339 340 /* 341 * Install machine check handler. Leave setting of precise mode to 342 * what the firmware did for now 343 */ 344 ppc_md.machine_check_exception = cbe_machine_check_handler; 345 mb(); 346 347 /* 348 * For now, we assume that IOC_FIR is already set to forward some 349 * error conditions to the System Error handler. If that is not true 350 * then it will have to be fixed up here. 351 */ 352}