opal-memory-errors.c (3388B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * OPAL asynchronus Memory error handling support in PowerNV. 4 * 5 * Copyright 2013 IBM Corporation 6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 7 */ 8 9#undef DEBUG 10 11#include <linux/kernel.h> 12#include <linux/init.h> 13#include <linux/of.h> 14#include <linux/mm.h> 15#include <linux/slab.h> 16 17#include <asm/machdep.h> 18#include <asm/opal.h> 19#include <asm/cputable.h> 20 21static int opal_mem_err_nb_init; 22static LIST_HEAD(opal_memory_err_list); 23static DEFINE_SPINLOCK(opal_mem_err_lock); 24 25struct OpalMsgNode { 26 struct list_head list; 27 struct opal_msg msg; 28}; 29 30static void handle_memory_error_event(struct OpalMemoryErrorData *merr_evt) 31{ 32 uint64_t paddr_start, paddr_end; 33 34 pr_debug("%s: Retrieved memory error event, type: 0x%x\n", 35 __func__, merr_evt->type); 36 switch (merr_evt->type) { 37 case OPAL_MEM_ERR_TYPE_RESILIENCE: 38 paddr_start = be64_to_cpu(merr_evt->u.resilience.physical_address_start); 39 paddr_end = be64_to_cpu(merr_evt->u.resilience.physical_address_end); 40 break; 41 case OPAL_MEM_ERR_TYPE_DYN_DALLOC: 42 paddr_start = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start); 43 paddr_end = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end); 44 break; 45 default: 46 return; 47 } 48 49 for (; paddr_start < paddr_end; paddr_start += PAGE_SIZE) { 50 memory_failure(paddr_start >> PAGE_SHIFT, 0); 51 } 52} 53 54static void handle_memory_error(void) 55{ 56 unsigned long flags; 57 struct OpalMemoryErrorData *merr_evt; 58 struct OpalMsgNode *msg_node; 59 60 spin_lock_irqsave(&opal_mem_err_lock, flags); 61 while (!list_empty(&opal_memory_err_list)) { 62 msg_node = list_entry(opal_memory_err_list.next, 63 struct OpalMsgNode, list); 64 list_del(&msg_node->list); 65 spin_unlock_irqrestore(&opal_mem_err_lock, flags); 66 67 merr_evt = (struct OpalMemoryErrorData *) 68 &msg_node->msg.params[0]; 69 handle_memory_error_event(merr_evt); 70 kfree(msg_node); 71 spin_lock_irqsave(&opal_mem_err_lock, flags); 72 } 73 spin_unlock_irqrestore(&opal_mem_err_lock, flags); 74} 75 76static void mem_error_handler(struct work_struct *work) 77{ 78 handle_memory_error(); 79} 80 81static DECLARE_WORK(mem_error_work, mem_error_handler); 82 83/* 84 * opal_memory_err_event - notifier handler that queues up the opal message 85 * to be processed later. 86 */ 87static int opal_memory_err_event(struct notifier_block *nb, 88 unsigned long msg_type, void *msg) 89{ 90 unsigned long flags; 91 struct OpalMsgNode *msg_node; 92 93 if (msg_type != OPAL_MSG_MEM_ERR) 94 return 0; 95 96 msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC); 97 if (!msg_node) { 98 pr_err("MEMORY_ERROR: out of memory, Opal message event not" 99 "handled\n"); 100 return -ENOMEM; 101 } 102 memcpy(&msg_node->msg, msg, sizeof(msg_node->msg)); 103 104 spin_lock_irqsave(&opal_mem_err_lock, flags); 105 list_add(&msg_node->list, &opal_memory_err_list); 106 spin_unlock_irqrestore(&opal_mem_err_lock, flags); 107 108 schedule_work(&mem_error_work); 109 return 0; 110} 111 112static struct notifier_block opal_mem_err_nb = { 113 .notifier_call = opal_memory_err_event, 114 .next = NULL, 115 .priority = 0, 116}; 117 118static int __init opal_mem_err_init(void) 119{ 120 int ret; 121 122 if (!opal_mem_err_nb_init) { 123 ret = opal_message_notifier_register( 124 OPAL_MSG_MEM_ERR, &opal_mem_err_nb); 125 if (ret) { 126 pr_err("%s: Can't register OPAL event notifier (%d)\n", 127 __func__, ret); 128 return ret; 129 } 130 opal_mem_err_nb_init = 1; 131 } 132 return 0; 133} 134machine_device_initcall(powernv, opal_mem_err_init);