mcelog.c (10468B)
1/****************************************************************************** 2 * mcelog.c 3 * Driver for receiving and transferring machine check error infomation 4 * 5 * Copyright (c) 2012 Intel Corporation 6 * Author: Liu, Jinsong <jinsong.liu@intel.com> 7 * Author: Jiang, Yunhong <yunhong.jiang@intel.com> 8 * Author: Ke, Liping <liping.ke@intel.com> 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License version 2 12 * as published by the Free Software Foundation; or, when distributed 13 * separately from the Linux kernel or incorporated into other 14 * software packages, subject to the following license: 15 * 16 * Permission is hereby granted, free of charge, to any person obtaining a copy 17 * of this source file (the "Software"), to deal in the Software without 18 * restriction, including without limitation the rights to use, copy, modify, 19 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 20 * and to permit persons to whom the Software is furnished to do so, subject to 21 * the following conditions: 22 * 23 * The above copyright notice and this permission notice shall be included in 24 * all copies or substantial portions of the Software. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 32 * IN THE SOFTWARE. 33 */ 34 35#define pr_fmt(fmt) "xen_mcelog: " fmt 36 37#include <linux/init.h> 38#include <linux/types.h> 39#include <linux/kernel.h> 40#include <linux/slab.h> 41#include <linux/fs.h> 42#include <linux/device.h> 43#include <linux/miscdevice.h> 44#include <linux/uaccess.h> 45#include <linux/capability.h> 46#include <linux/poll.h> 47#include <linux/sched.h> 48 49#include <xen/interface/xen.h> 50#include <xen/events.h> 51#include <xen/interface/vcpu.h> 52#include <xen/xen.h> 53#include <asm/xen/hypercall.h> 54#include <asm/xen/hypervisor.h> 55 56static struct mc_info g_mi; 57static struct mcinfo_logical_cpu *g_physinfo; 58static uint32_t ncpus; 59 60static DEFINE_MUTEX(mcelog_lock); 61 62static struct xen_mce_log xen_mcelog = { 63 .signature = XEN_MCE_LOG_SIGNATURE, 64 .len = XEN_MCE_LOG_LEN, 65 .recordlen = sizeof(struct xen_mce), 66}; 67 68static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock); 69static int xen_mce_chrdev_open_count; /* #times opened */ 70static int xen_mce_chrdev_open_exclu; /* already open exclusive? */ 71 72static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait); 73 74static int xen_mce_chrdev_open(struct inode *inode, struct file *file) 75{ 76 spin_lock(&xen_mce_chrdev_state_lock); 77 78 if (xen_mce_chrdev_open_exclu || 79 (xen_mce_chrdev_open_count && (file->f_flags & O_EXCL))) { 80 spin_unlock(&xen_mce_chrdev_state_lock); 81 82 return -EBUSY; 83 } 84 85 if (file->f_flags & O_EXCL) 86 xen_mce_chrdev_open_exclu = 1; 87 xen_mce_chrdev_open_count++; 88 89 spin_unlock(&xen_mce_chrdev_state_lock); 90 91 return nonseekable_open(inode, file); 92} 93 94static int xen_mce_chrdev_release(struct inode *inode, struct file *file) 95{ 96 spin_lock(&xen_mce_chrdev_state_lock); 97 98 xen_mce_chrdev_open_count--; 99 xen_mce_chrdev_open_exclu = 0; 100 101 spin_unlock(&xen_mce_chrdev_state_lock); 102 103 return 0; 104} 105 106static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf, 107 size_t usize, loff_t *off) 108{ 109 char __user *buf = ubuf; 110 unsigned num; 111 int i, err; 112 113 mutex_lock(&mcelog_lock); 114 115 num = xen_mcelog.next; 116 117 /* Only supports full reads right now */ 118 err = -EINVAL; 119 if (*off != 0 || usize < XEN_MCE_LOG_LEN*sizeof(struct xen_mce)) 120 goto out; 121 122 err = 0; 123 for (i = 0; i < num; i++) { 124 struct xen_mce *m = &xen_mcelog.entry[i]; 125 126 err |= copy_to_user(buf, m, sizeof(*m)); 127 buf += sizeof(*m); 128 } 129 130 memset(xen_mcelog.entry, 0, num * sizeof(struct xen_mce)); 131 xen_mcelog.next = 0; 132 133 if (err) 134 err = -EFAULT; 135 136out: 137 mutex_unlock(&mcelog_lock); 138 139 return err ? err : buf - ubuf; 140} 141 142static __poll_t xen_mce_chrdev_poll(struct file *file, poll_table *wait) 143{ 144 poll_wait(file, &xen_mce_chrdev_wait, wait); 145 146 if (xen_mcelog.next) 147 return EPOLLIN | EPOLLRDNORM; 148 149 return 0; 150} 151 152static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd, 153 unsigned long arg) 154{ 155 int __user *p = (int __user *)arg; 156 157 if (!capable(CAP_SYS_ADMIN)) 158 return -EPERM; 159 160 switch (cmd) { 161 case MCE_GET_RECORD_LEN: 162 return put_user(sizeof(struct xen_mce), p); 163 case MCE_GET_LOG_LEN: 164 return put_user(XEN_MCE_LOG_LEN, p); 165 case MCE_GETCLEAR_FLAGS: { 166 unsigned flags; 167 168 do { 169 flags = xen_mcelog.flags; 170 } while (cmpxchg(&xen_mcelog.flags, flags, 0) != flags); 171 172 return put_user(flags, p); 173 } 174 default: 175 return -ENOTTY; 176 } 177} 178 179static const struct file_operations xen_mce_chrdev_ops = { 180 .open = xen_mce_chrdev_open, 181 .release = xen_mce_chrdev_release, 182 .read = xen_mce_chrdev_read, 183 .poll = xen_mce_chrdev_poll, 184 .unlocked_ioctl = xen_mce_chrdev_ioctl, 185 .llseek = no_llseek, 186}; 187 188static struct miscdevice xen_mce_chrdev_device = { 189 MISC_MCELOG_MINOR, 190 "mcelog", 191 &xen_mce_chrdev_ops, 192}; 193 194/* 195 * Caller should hold the mcelog_lock 196 */ 197static void xen_mce_log(struct xen_mce *mce) 198{ 199 unsigned entry; 200 201 entry = xen_mcelog.next; 202 203 /* 204 * When the buffer fills up discard new entries. 205 * Assume that the earlier errors are the more 206 * interesting ones: 207 */ 208 if (entry >= XEN_MCE_LOG_LEN) { 209 set_bit(XEN_MCE_OVERFLOW, 210 (unsigned long *)&xen_mcelog.flags); 211 return; 212 } 213 214 memcpy(xen_mcelog.entry + entry, mce, sizeof(struct xen_mce)); 215 216 xen_mcelog.next++; 217} 218 219static int convert_log(struct mc_info *mi) 220{ 221 struct mcinfo_common *mic; 222 struct mcinfo_global *mc_global; 223 struct mcinfo_bank *mc_bank; 224 struct xen_mce m; 225 unsigned int i, j; 226 227 mic = NULL; 228 x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL); 229 if (unlikely(!mic)) { 230 pr_warn("Failed to find global error info\n"); 231 return -ENODEV; 232 } 233 234 memset(&m, 0, sizeof(struct xen_mce)); 235 236 mc_global = (struct mcinfo_global *)mic; 237 m.mcgstatus = mc_global->mc_gstatus; 238 m.apicid = mc_global->mc_apicid; 239 240 for (i = 0; i < ncpus; i++) 241 if (g_physinfo[i].mc_apicid == m.apicid) 242 break; 243 if (unlikely(i == ncpus)) { 244 pr_warn("Failed to match cpu with apicid %d\n", m.apicid); 245 return -ENODEV; 246 } 247 248 m.socketid = g_physinfo[i].mc_chipid; 249 m.cpu = m.extcpu = g_physinfo[i].mc_cpunr; 250 m.cpuvendor = (__u8)g_physinfo[i].mc_vendor; 251 for (j = 0; j < g_physinfo[i].mc_nmsrvals; ++j) 252 switch (g_physinfo[i].mc_msrvalues[j].reg) { 253 case MSR_IA32_MCG_CAP: 254 m.mcgcap = g_physinfo[i].mc_msrvalues[j].value; 255 break; 256 257 case MSR_PPIN: 258 case MSR_AMD_PPIN: 259 m.ppin = g_physinfo[i].mc_msrvalues[j].value; 260 break; 261 } 262 263 mic = NULL; 264 x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK); 265 if (unlikely(!mic)) { 266 pr_warn("Fail to find bank error info\n"); 267 return -ENODEV; 268 } 269 270 do { 271 if ((!mic) || (mic->size == 0) || 272 (mic->type != MC_TYPE_GLOBAL && 273 mic->type != MC_TYPE_BANK && 274 mic->type != MC_TYPE_EXTENDED && 275 mic->type != MC_TYPE_RECOVERY)) 276 break; 277 278 if (mic->type == MC_TYPE_BANK) { 279 mc_bank = (struct mcinfo_bank *)mic; 280 m.misc = mc_bank->mc_misc; 281 m.status = mc_bank->mc_status; 282 m.addr = mc_bank->mc_addr; 283 m.tsc = mc_bank->mc_tsc; 284 m.bank = mc_bank->mc_bank; 285 m.finished = 1; 286 /*log this record*/ 287 xen_mce_log(&m); 288 } 289 mic = x86_mcinfo_next(mic); 290 } while (1); 291 292 return 0; 293} 294 295static int mc_queue_handle(uint32_t flags) 296{ 297 struct xen_mc mc_op; 298 int ret = 0; 299 300 mc_op.cmd = XEN_MC_fetch; 301 set_xen_guest_handle(mc_op.u.mc_fetch.data, &g_mi); 302 do { 303 mc_op.u.mc_fetch.flags = flags; 304 ret = HYPERVISOR_mca(&mc_op); 305 if (ret) { 306 pr_err("Failed to fetch %surgent error log\n", 307 flags == XEN_MC_URGENT ? "" : "non"); 308 break; 309 } 310 311 if (mc_op.u.mc_fetch.flags & XEN_MC_NODATA || 312 mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED) 313 break; 314 else { 315 ret = convert_log(&g_mi); 316 if (ret) 317 pr_warn("Failed to convert this error log, continue acking it anyway\n"); 318 319 mc_op.u.mc_fetch.flags = flags | XEN_MC_ACK; 320 ret = HYPERVISOR_mca(&mc_op); 321 if (ret) { 322 pr_err("Failed to ack previous error log\n"); 323 break; 324 } 325 } 326 } while (1); 327 328 return ret; 329} 330 331/* virq handler for machine check error info*/ 332static void xen_mce_work_fn(struct work_struct *work) 333{ 334 int err; 335 336 mutex_lock(&mcelog_lock); 337 338 /* urgent mc_info */ 339 err = mc_queue_handle(XEN_MC_URGENT); 340 if (err) 341 pr_err("Failed to handle urgent mc_info queue, continue handling nonurgent mc_info queue anyway\n"); 342 343 /* nonurgent mc_info */ 344 err = mc_queue_handle(XEN_MC_NONURGENT); 345 if (err) 346 pr_err("Failed to handle nonurgent mc_info queue\n"); 347 348 /* wake processes polling /dev/mcelog */ 349 wake_up_interruptible(&xen_mce_chrdev_wait); 350 351 mutex_unlock(&mcelog_lock); 352} 353static DECLARE_WORK(xen_mce_work, xen_mce_work_fn); 354 355static irqreturn_t xen_mce_interrupt(int irq, void *dev_id) 356{ 357 schedule_work(&xen_mce_work); 358 return IRQ_HANDLED; 359} 360 361static int bind_virq_for_mce(void) 362{ 363 int ret; 364 struct xen_mc mc_op; 365 366 memset(&mc_op, 0, sizeof(struct xen_mc)); 367 368 /* Fetch physical CPU Numbers */ 369 mc_op.cmd = XEN_MC_physcpuinfo; 370 set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo); 371 ret = HYPERVISOR_mca(&mc_op); 372 if (ret) { 373 pr_err("Failed to get CPU numbers\n"); 374 return ret; 375 } 376 377 /* Fetch each CPU Physical Info for later reference*/ 378 ncpus = mc_op.u.mc_physcpuinfo.ncpus; 379 g_physinfo = kcalloc(ncpus, sizeof(struct mcinfo_logical_cpu), 380 GFP_KERNEL); 381 if (!g_physinfo) 382 return -ENOMEM; 383 set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo); 384 ret = HYPERVISOR_mca(&mc_op); 385 if (ret) { 386 pr_err("Failed to get CPU info\n"); 387 kfree(g_physinfo); 388 return ret; 389 } 390 391 ret = bind_virq_to_irqhandler(VIRQ_MCA, 0, 392 xen_mce_interrupt, 0, "mce", NULL); 393 if (ret < 0) { 394 pr_err("Failed to bind virq\n"); 395 kfree(g_physinfo); 396 return ret; 397 } 398 399 return 0; 400} 401 402static int __init xen_late_init_mcelog(void) 403{ 404 int ret; 405 406 /* Only DOM0 is responsible for MCE logging */ 407 if (!xen_initial_domain()) 408 return -ENODEV; 409 410 /* register character device /dev/mcelog for xen mcelog */ 411 ret = misc_register(&xen_mce_chrdev_device); 412 if (ret) 413 return ret; 414 415 ret = bind_virq_for_mce(); 416 if (ret) 417 goto deregister; 418 419 pr_info("/dev/mcelog registered by Xen\n"); 420 421 return 0; 422 423deregister: 424 misc_deregister(&xen_mce_chrdev_device); 425 return ret; 426} 427device_initcall(xen_late_init_mcelog);