evtchn.c (16473B)
1/****************************************************************************** 2 * evtchn.c 3 * 4 * Driver for receiving and demuxing event-channel signals. 5 * 6 * Copyright (c) 2004-2005, K A Fraser 7 * Multi-process extensions Copyright (c) 2004, Steven Smith 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License version 2 11 * as published by the Free Software Foundation; or, when distributed 12 * separately from the Linux kernel or incorporated into other 13 * software packages, subject to the following license: 14 * 15 * Permission is hereby granted, free of charge, to any person obtaining a copy 16 * of this source file (the "Software"), to deal in the Software without 17 * restriction, including without limitation the rights to use, copy, modify, 18 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 19 * and to permit persons to whom the Software is furnished to do so, subject to 20 * the following conditions: 21 * 22 * The above copyright notice and this permission notice shall be included in 23 * all copies or substantial portions of the Software. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 31 * IN THE SOFTWARE. 32 */ 33 34#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt 35 36#include <linux/module.h> 37#include <linux/kernel.h> 38#include <linux/sched.h> 39#include <linux/slab.h> 40#include <linux/string.h> 41#include <linux/errno.h> 42#include <linux/fs.h> 43#include <linux/miscdevice.h> 44#include <linux/major.h> 45#include <linux/proc_fs.h> 46#include <linux/stat.h> 47#include <linux/poll.h> 48#include <linux/irq.h> 49#include <linux/init.h> 50#include <linux/mutex.h> 51#include <linux/cpu.h> 52#include <linux/mm.h> 53#include <linux/vmalloc.h> 54 55#include <xen/xen.h> 56#include <xen/events.h> 57#include <xen/evtchn.h> 58#include <xen/xen-ops.h> 59#include <asm/xen/hypervisor.h> 60 61struct per_user_data { 62 struct mutex bind_mutex; /* serialize bind/unbind operations */ 63 struct rb_root evtchns; 64 unsigned int nr_evtchns; 65 66 /* Notification ring, accessed via /dev/xen/evtchn. */ 67 unsigned int ring_size; 68 evtchn_port_t *ring; 69 unsigned int ring_cons, ring_prod, ring_overflow; 70 struct mutex ring_cons_mutex; /* protect against concurrent readers */ 71 spinlock_t ring_prod_lock; /* product against concurrent interrupts */ 72 73 /* Processes wait on this queue when ring is empty. */ 74 wait_queue_head_t evtchn_wait; 75 struct fasync_struct *evtchn_async_queue; 76 const char *name; 77 78 domid_t restrict_domid; 79}; 80 81#define UNRESTRICTED_DOMID ((domid_t)-1) 82 83struct user_evtchn { 84 struct rb_node node; 85 struct per_user_data *user; 86 evtchn_port_t port; 87 bool enabled; 88}; 89 90static void evtchn_free_ring(evtchn_port_t *ring) 91{ 92 kvfree(ring); 93} 94 95static unsigned int evtchn_ring_offset(struct per_user_data *u, 96 unsigned int idx) 97{ 98 return idx & (u->ring_size - 1); 99} 100 101static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u, 102 unsigned int idx) 103{ 104 return u->ring + evtchn_ring_offset(u, idx); 105} 106 107static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) 108{ 109 struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL; 110 111 u->nr_evtchns++; 112 113 while (*new) { 114 struct user_evtchn *this; 115 116 this = rb_entry(*new, struct user_evtchn, node); 117 118 parent = *new; 119 if (this->port < evtchn->port) 120 new = &((*new)->rb_left); 121 else if (this->port > evtchn->port) 122 new = &((*new)->rb_right); 123 else 124 return -EEXIST; 125 } 126 127 /* Add new node and rebalance tree. */ 128 rb_link_node(&evtchn->node, parent, new); 129 rb_insert_color(&evtchn->node, &u->evtchns); 130 131 return 0; 132} 133 134static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) 135{ 136 u->nr_evtchns--; 137 rb_erase(&evtchn->node, &u->evtchns); 138 kfree(evtchn); 139} 140 141static struct user_evtchn *find_evtchn(struct per_user_data *u, 142 evtchn_port_t port) 143{ 144 struct rb_node *node = u->evtchns.rb_node; 145 146 while (node) { 147 struct user_evtchn *evtchn; 148 149 evtchn = rb_entry(node, struct user_evtchn, node); 150 151 if (evtchn->port < port) 152 node = node->rb_left; 153 else if (evtchn->port > port) 154 node = node->rb_right; 155 else 156 return evtchn; 157 } 158 return NULL; 159} 160 161static irqreturn_t evtchn_interrupt(int irq, void *data) 162{ 163 struct user_evtchn *evtchn = data; 164 struct per_user_data *u = evtchn->user; 165 unsigned int prod, cons; 166 167 WARN(!evtchn->enabled, 168 "Interrupt for port %u, but apparently not enabled; per-user %p\n", 169 evtchn->port, u); 170 171 evtchn->enabled = false; 172 173 spin_lock(&u->ring_prod_lock); 174 175 prod = READ_ONCE(u->ring_prod); 176 cons = READ_ONCE(u->ring_cons); 177 178 if ((prod - cons) < u->ring_size) { 179 *evtchn_ring_entry(u, prod) = evtchn->port; 180 smp_wmb(); /* Ensure ring contents visible */ 181 WRITE_ONCE(u->ring_prod, prod + 1); 182 if (cons == prod) { 183 wake_up_interruptible(&u->evtchn_wait); 184 kill_fasync(&u->evtchn_async_queue, 185 SIGIO, POLL_IN); 186 } 187 } else 188 u->ring_overflow = 1; 189 190 spin_unlock(&u->ring_prod_lock); 191 192 return IRQ_HANDLED; 193} 194 195static ssize_t evtchn_read(struct file *file, char __user *buf, 196 size_t count, loff_t *ppos) 197{ 198 int rc; 199 unsigned int c, p, bytes1 = 0, bytes2 = 0; 200 struct per_user_data *u = file->private_data; 201 202 /* Whole number of ports. */ 203 count &= ~(sizeof(evtchn_port_t)-1); 204 205 if (count == 0) 206 return 0; 207 208 if (count > PAGE_SIZE) 209 count = PAGE_SIZE; 210 211 for (;;) { 212 mutex_lock(&u->ring_cons_mutex); 213 214 rc = -EFBIG; 215 if (u->ring_overflow) 216 goto unlock_out; 217 218 c = READ_ONCE(u->ring_cons); 219 p = READ_ONCE(u->ring_prod); 220 if (c != p) 221 break; 222 223 mutex_unlock(&u->ring_cons_mutex); 224 225 if (file->f_flags & O_NONBLOCK) 226 return -EAGAIN; 227 228 rc = wait_event_interruptible(u->evtchn_wait, 229 READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod)); 230 if (rc) 231 return rc; 232 } 233 234 /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ 235 if (((c ^ p) & u->ring_size) != 0) { 236 bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) * 237 sizeof(evtchn_port_t); 238 bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t); 239 } else { 240 bytes1 = (p - c) * sizeof(evtchn_port_t); 241 bytes2 = 0; 242 } 243 244 /* Truncate chunks according to caller's maximum byte count. */ 245 if (bytes1 > count) { 246 bytes1 = count; 247 bytes2 = 0; 248 } else if ((bytes1 + bytes2) > count) { 249 bytes2 = count - bytes1; 250 } 251 252 rc = -EFAULT; 253 smp_rmb(); /* Ensure that we see the port before we copy it. */ 254 if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) || 255 ((bytes2 != 0) && 256 copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) 257 goto unlock_out; 258 259 WRITE_ONCE(u->ring_cons, c + (bytes1 + bytes2) / sizeof(evtchn_port_t)); 260 rc = bytes1 + bytes2; 261 262 unlock_out: 263 mutex_unlock(&u->ring_cons_mutex); 264 return rc; 265} 266 267static ssize_t evtchn_write(struct file *file, const char __user *buf, 268 size_t count, loff_t *ppos) 269{ 270 int rc, i; 271 evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL); 272 struct per_user_data *u = file->private_data; 273 274 if (kbuf == NULL) 275 return -ENOMEM; 276 277 /* Whole number of ports. */ 278 count &= ~(sizeof(evtchn_port_t)-1); 279 280 rc = 0; 281 if (count == 0) 282 goto out; 283 284 if (count > PAGE_SIZE) 285 count = PAGE_SIZE; 286 287 rc = -EFAULT; 288 if (copy_from_user(kbuf, buf, count) != 0) 289 goto out; 290 291 mutex_lock(&u->bind_mutex); 292 293 for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { 294 evtchn_port_t port = kbuf[i]; 295 struct user_evtchn *evtchn; 296 297 evtchn = find_evtchn(u, port); 298 if (evtchn && !evtchn->enabled) { 299 evtchn->enabled = true; 300 xen_irq_lateeoi(irq_from_evtchn(port), 0); 301 } 302 } 303 304 mutex_unlock(&u->bind_mutex); 305 306 rc = count; 307 308 out: 309 free_page((unsigned long)kbuf); 310 return rc; 311} 312 313static int evtchn_resize_ring(struct per_user_data *u) 314{ 315 unsigned int new_size; 316 evtchn_port_t *new_ring, *old_ring; 317 318 /* 319 * Ensure the ring is large enough to capture all possible 320 * events. i.e., one free slot for each bound event. 321 */ 322 if (u->nr_evtchns <= u->ring_size) 323 return 0; 324 325 if (u->ring_size == 0) 326 new_size = 64; 327 else 328 new_size = 2 * u->ring_size; 329 330 new_ring = kvmalloc_array(new_size, sizeof(*new_ring), GFP_KERNEL); 331 if (!new_ring) 332 return -ENOMEM; 333 334 old_ring = u->ring; 335 336 /* 337 * Access to the ring contents is serialized by either the 338 * prod /or/ cons lock so take both when resizing. 339 */ 340 mutex_lock(&u->ring_cons_mutex); 341 spin_lock_irq(&u->ring_prod_lock); 342 343 /* 344 * Copy the old ring contents to the new ring. 345 * 346 * To take care of wrapping, a full ring, and the new index 347 * pointing into the second half, simply copy the old contents 348 * twice. 349 * 350 * +---------+ +------------------+ 351 * |34567 12| -> |34567 1234567 12| 352 * +-----p-c-+ +-------c------p---+ 353 */ 354 memcpy(new_ring, old_ring, u->ring_size * sizeof(*u->ring)); 355 memcpy(new_ring + u->ring_size, old_ring, 356 u->ring_size * sizeof(*u->ring)); 357 358 u->ring = new_ring; 359 u->ring_size = new_size; 360 361 spin_unlock_irq(&u->ring_prod_lock); 362 mutex_unlock(&u->ring_cons_mutex); 363 364 evtchn_free_ring(old_ring); 365 366 return 0; 367} 368 369static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port) 370{ 371 struct user_evtchn *evtchn; 372 struct evtchn_close close; 373 int rc = 0; 374 375 /* 376 * Ports are never reused, so every caller should pass in a 377 * unique port. 378 * 379 * (Locking not necessary because we haven't registered the 380 * interrupt handler yet, and our caller has already 381 * serialized bind operations.) 382 */ 383 384 evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL); 385 if (!evtchn) 386 return -ENOMEM; 387 388 evtchn->user = u; 389 evtchn->port = port; 390 evtchn->enabled = true; /* start enabled */ 391 392 rc = add_evtchn(u, evtchn); 393 if (rc < 0) 394 goto err; 395 396 rc = evtchn_resize_ring(u); 397 if (rc < 0) 398 goto err; 399 400 rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0, 401 u->name, evtchn); 402 if (rc < 0) 403 goto err; 404 405 rc = evtchn_make_refcounted(port); 406 return rc; 407 408err: 409 /* bind failed, should close the port now */ 410 close.port = port; 411 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) 412 BUG(); 413 del_evtchn(u, evtchn); 414 return rc; 415} 416 417static void evtchn_unbind_from_user(struct per_user_data *u, 418 struct user_evtchn *evtchn) 419{ 420 int irq = irq_from_evtchn(evtchn->port); 421 422 BUG_ON(irq < 0); 423 424 unbind_from_irqhandler(irq, evtchn); 425 426 del_evtchn(u, evtchn); 427} 428 429static long evtchn_ioctl(struct file *file, 430 unsigned int cmd, unsigned long arg) 431{ 432 int rc; 433 struct per_user_data *u = file->private_data; 434 void __user *uarg = (void __user *) arg; 435 436 /* Prevent bind from racing with unbind */ 437 mutex_lock(&u->bind_mutex); 438 439 switch (cmd) { 440 case IOCTL_EVTCHN_BIND_VIRQ: { 441 struct ioctl_evtchn_bind_virq bind; 442 struct evtchn_bind_virq bind_virq; 443 444 rc = -EACCES; 445 if (u->restrict_domid != UNRESTRICTED_DOMID) 446 break; 447 448 rc = -EFAULT; 449 if (copy_from_user(&bind, uarg, sizeof(bind))) 450 break; 451 452 bind_virq.virq = bind.virq; 453 bind_virq.vcpu = xen_vcpu_nr(0); 454 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, 455 &bind_virq); 456 if (rc != 0) 457 break; 458 459 rc = evtchn_bind_to_user(u, bind_virq.port); 460 if (rc == 0) 461 rc = bind_virq.port; 462 break; 463 } 464 465 case IOCTL_EVTCHN_BIND_INTERDOMAIN: { 466 struct ioctl_evtchn_bind_interdomain bind; 467 struct evtchn_bind_interdomain bind_interdomain; 468 469 rc = -EFAULT; 470 if (copy_from_user(&bind, uarg, sizeof(bind))) 471 break; 472 473 rc = -EACCES; 474 if (u->restrict_domid != UNRESTRICTED_DOMID && 475 u->restrict_domid != bind.remote_domain) 476 break; 477 478 bind_interdomain.remote_dom = bind.remote_domain; 479 bind_interdomain.remote_port = bind.remote_port; 480 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, 481 &bind_interdomain); 482 if (rc != 0) 483 break; 484 485 rc = evtchn_bind_to_user(u, bind_interdomain.local_port); 486 if (rc == 0) 487 rc = bind_interdomain.local_port; 488 break; 489 } 490 491 case IOCTL_EVTCHN_BIND_UNBOUND_PORT: { 492 struct ioctl_evtchn_bind_unbound_port bind; 493 struct evtchn_alloc_unbound alloc_unbound; 494 495 rc = -EACCES; 496 if (u->restrict_domid != UNRESTRICTED_DOMID) 497 break; 498 499 rc = -EFAULT; 500 if (copy_from_user(&bind, uarg, sizeof(bind))) 501 break; 502 503 alloc_unbound.dom = DOMID_SELF; 504 alloc_unbound.remote_dom = bind.remote_domain; 505 rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, 506 &alloc_unbound); 507 if (rc != 0) 508 break; 509 510 rc = evtchn_bind_to_user(u, alloc_unbound.port); 511 if (rc == 0) 512 rc = alloc_unbound.port; 513 break; 514 } 515 516 case IOCTL_EVTCHN_UNBIND: { 517 struct ioctl_evtchn_unbind unbind; 518 struct user_evtchn *evtchn; 519 520 rc = -EFAULT; 521 if (copy_from_user(&unbind, uarg, sizeof(unbind))) 522 break; 523 524 rc = -EINVAL; 525 if (unbind.port >= xen_evtchn_nr_channels()) 526 break; 527 528 rc = -ENOTCONN; 529 evtchn = find_evtchn(u, unbind.port); 530 if (!evtchn) 531 break; 532 533 disable_irq(irq_from_evtchn(unbind.port)); 534 evtchn_unbind_from_user(u, evtchn); 535 rc = 0; 536 break; 537 } 538 539 case IOCTL_EVTCHN_NOTIFY: { 540 struct ioctl_evtchn_notify notify; 541 struct user_evtchn *evtchn; 542 543 rc = -EFAULT; 544 if (copy_from_user(¬ify, uarg, sizeof(notify))) 545 break; 546 547 rc = -ENOTCONN; 548 evtchn = find_evtchn(u, notify.port); 549 if (evtchn) { 550 notify_remote_via_evtchn(notify.port); 551 rc = 0; 552 } 553 break; 554 } 555 556 case IOCTL_EVTCHN_RESET: { 557 /* Initialise the ring to empty. Clear errors. */ 558 mutex_lock(&u->ring_cons_mutex); 559 spin_lock_irq(&u->ring_prod_lock); 560 WRITE_ONCE(u->ring_cons, 0); 561 WRITE_ONCE(u->ring_prod, 0); 562 u->ring_overflow = 0; 563 spin_unlock_irq(&u->ring_prod_lock); 564 mutex_unlock(&u->ring_cons_mutex); 565 rc = 0; 566 break; 567 } 568 569 case IOCTL_EVTCHN_RESTRICT_DOMID: { 570 struct ioctl_evtchn_restrict_domid ierd; 571 572 rc = -EACCES; 573 if (u->restrict_domid != UNRESTRICTED_DOMID) 574 break; 575 576 rc = -EFAULT; 577 if (copy_from_user(&ierd, uarg, sizeof(ierd))) 578 break; 579 580 rc = -EINVAL; 581 if (ierd.domid == 0 || ierd.domid >= DOMID_FIRST_RESERVED) 582 break; 583 584 u->restrict_domid = ierd.domid; 585 rc = 0; 586 587 break; 588 } 589 590 default: 591 rc = -ENOSYS; 592 break; 593 } 594 mutex_unlock(&u->bind_mutex); 595 596 return rc; 597} 598 599static __poll_t evtchn_poll(struct file *file, poll_table *wait) 600{ 601 __poll_t mask = EPOLLOUT | EPOLLWRNORM; 602 struct per_user_data *u = file->private_data; 603 604 poll_wait(file, &u->evtchn_wait, wait); 605 if (READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod)) 606 mask |= EPOLLIN | EPOLLRDNORM; 607 if (u->ring_overflow) 608 mask = EPOLLERR; 609 return mask; 610} 611 612static int evtchn_fasync(int fd, struct file *filp, int on) 613{ 614 struct per_user_data *u = filp->private_data; 615 return fasync_helper(fd, filp, on, &u->evtchn_async_queue); 616} 617 618static int evtchn_open(struct inode *inode, struct file *filp) 619{ 620 struct per_user_data *u; 621 622 u = kzalloc(sizeof(*u), GFP_KERNEL); 623 if (u == NULL) 624 return -ENOMEM; 625 626 u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm); 627 if (u->name == NULL) { 628 kfree(u); 629 return -ENOMEM; 630 } 631 632 init_waitqueue_head(&u->evtchn_wait); 633 634 mutex_init(&u->bind_mutex); 635 mutex_init(&u->ring_cons_mutex); 636 spin_lock_init(&u->ring_prod_lock); 637 638 u->restrict_domid = UNRESTRICTED_DOMID; 639 640 filp->private_data = u; 641 642 return stream_open(inode, filp); 643} 644 645static int evtchn_release(struct inode *inode, struct file *filp) 646{ 647 struct per_user_data *u = filp->private_data; 648 struct rb_node *node; 649 650 while ((node = u->evtchns.rb_node)) { 651 struct user_evtchn *evtchn; 652 653 evtchn = rb_entry(node, struct user_evtchn, node); 654 disable_irq(irq_from_evtchn(evtchn->port)); 655 evtchn_unbind_from_user(u, evtchn); 656 } 657 658 evtchn_free_ring(u->ring); 659 kfree(u->name); 660 kfree(u); 661 662 return 0; 663} 664 665static const struct file_operations evtchn_fops = { 666 .owner = THIS_MODULE, 667 .read = evtchn_read, 668 .write = evtchn_write, 669 .unlocked_ioctl = evtchn_ioctl, 670 .poll = evtchn_poll, 671 .fasync = evtchn_fasync, 672 .open = evtchn_open, 673 .release = evtchn_release, 674 .llseek = no_llseek, 675}; 676 677static struct miscdevice evtchn_miscdev = { 678 .minor = MISC_DYNAMIC_MINOR, 679 .name = "xen/evtchn", 680 .fops = &evtchn_fops, 681}; 682static int __init evtchn_init(void) 683{ 684 int err; 685 686 if (!xen_domain()) 687 return -ENODEV; 688 689 /* Create '/dev/xen/evtchn'. */ 690 err = misc_register(&evtchn_miscdev); 691 if (err != 0) { 692 pr_err("Could not register /dev/xen/evtchn\n"); 693 return err; 694 } 695 696 pr_info("Event-channel device installed\n"); 697 698 return 0; 699} 700 701static void __exit evtchn_cleanup(void) 702{ 703 misc_deregister(&evtchn_miscdev); 704} 705 706module_init(evtchn_init); 707module_exit(evtchn_cleanup); 708 709MODULE_LICENSE("GPL");