vfio.c (57129B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * VFIO core 4 * 5 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 6 * Author: Alex Williamson <alex.williamson@redhat.com> 7 * 8 * Derived from original vfio: 9 * Copyright 2010 Cisco Systems, Inc. All rights reserved. 10 * Author: Tom Lyon, pugs@cisco.com 11 */ 12 13#include <linux/cdev.h> 14#include <linux/compat.h> 15#include <linux/device.h> 16#include <linux/file.h> 17#include <linux/anon_inodes.h> 18#include <linux/fs.h> 19#include <linux/idr.h> 20#include <linux/iommu.h> 21#include <linux/list.h> 22#include <linux/miscdevice.h> 23#include <linux/module.h> 24#include <linux/mutex.h> 25#include <linux/pci.h> 26#include <linux/rwsem.h> 27#include <linux/sched.h> 28#include <linux/slab.h> 29#include <linux/stat.h> 30#include <linux/string.h> 31#include <linux/uaccess.h> 32#include <linux/vfio.h> 33#include <linux/wait.h> 34#include <linux/sched/signal.h> 35#include "vfio.h" 36 37#define DRIVER_VERSION "0.3" 38#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" 39#define DRIVER_DESC "VFIO - User Level meta-driver" 40 41static struct vfio { 42 struct class *class; 43 struct list_head iommu_drivers_list; 44 struct mutex iommu_drivers_lock; 45 struct list_head group_list; 46 struct mutex group_lock; /* locks group_list */ 47 struct ida group_ida; 48 dev_t group_devt; 49} vfio; 50 51struct vfio_iommu_driver { 52 const struct vfio_iommu_driver_ops *ops; 53 struct list_head vfio_next; 54}; 55 56struct vfio_container { 57 struct kref kref; 58 struct list_head group_list; 59 struct rw_semaphore group_lock; 60 struct vfio_iommu_driver *iommu_driver; 61 void *iommu_data; 62 bool noiommu; 63}; 64 65struct vfio_group { 66 struct device dev; 67 struct cdev cdev; 68 refcount_t users; 69 unsigned int container_users; 70 struct iommu_group *iommu_group; 71 struct vfio_container *container; 72 struct list_head device_list; 73 struct mutex device_lock; 74 struct list_head vfio_next; 75 struct list_head container_next; 76 enum vfio_group_type type; 77 unsigned int dev_counter; 78 struct rw_semaphore group_rwsem; 79 struct kvm *kvm; 80 struct file *opened_file; 81 struct blocking_notifier_head notifier; 82}; 83 84#ifdef CONFIG_VFIO_NOIOMMU 85static bool noiommu __read_mostly; 86module_param_named(enable_unsafe_noiommu_mode, 87 noiommu, bool, S_IRUGO | S_IWUSR); 88MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); 89#endif 90 91static DEFINE_XARRAY(vfio_device_set_xa); 92static const struct file_operations vfio_group_fops; 93 94int vfio_assign_device_set(struct vfio_device *device, void *set_id) 95{ 96 unsigned long idx = (unsigned long)set_id; 97 struct vfio_device_set *new_dev_set; 98 struct vfio_device_set *dev_set; 99 100 if (WARN_ON(!set_id)) 101 return -EINVAL; 102 103 /* 104 * Atomically acquire a singleton object in the xarray for this set_id 105 */ 106 xa_lock(&vfio_device_set_xa); 107 dev_set = xa_load(&vfio_device_set_xa, idx); 108 if (dev_set) 109 goto found_get_ref; 110 xa_unlock(&vfio_device_set_xa); 111 112 new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); 113 if (!new_dev_set) 114 return -ENOMEM; 115 mutex_init(&new_dev_set->lock); 116 INIT_LIST_HEAD(&new_dev_set->device_list); 117 new_dev_set->set_id = set_id; 118 119 xa_lock(&vfio_device_set_xa); 120 dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, 121 GFP_KERNEL); 122 if (!dev_set) { 123 dev_set = new_dev_set; 124 goto found_get_ref; 125 } 126 127 kfree(new_dev_set); 128 if (xa_is_err(dev_set)) { 129 xa_unlock(&vfio_device_set_xa); 130 return xa_err(dev_set); 131 } 132 133found_get_ref: 134 dev_set->device_count++; 135 xa_unlock(&vfio_device_set_xa); 136 mutex_lock(&dev_set->lock); 137 device->dev_set = dev_set; 138 list_add_tail(&device->dev_set_list, &dev_set->device_list); 139 mutex_unlock(&dev_set->lock); 140 return 0; 141} 142EXPORT_SYMBOL_GPL(vfio_assign_device_set); 143 144static void vfio_release_device_set(struct vfio_device *device) 145{ 146 struct vfio_device_set *dev_set = device->dev_set; 147 148 if (!dev_set) 149 return; 150 151 mutex_lock(&dev_set->lock); 152 list_del(&device->dev_set_list); 153 mutex_unlock(&dev_set->lock); 154 155 xa_lock(&vfio_device_set_xa); 156 if (!--dev_set->device_count) { 157 __xa_erase(&vfio_device_set_xa, 158 (unsigned long)dev_set->set_id); 159 mutex_destroy(&dev_set->lock); 160 kfree(dev_set); 161 } 162 xa_unlock(&vfio_device_set_xa); 163} 164 165#ifdef CONFIG_VFIO_NOIOMMU 166static void *vfio_noiommu_open(unsigned long arg) 167{ 168 if (arg != VFIO_NOIOMMU_IOMMU) 169 return ERR_PTR(-EINVAL); 170 if (!capable(CAP_SYS_RAWIO)) 171 return ERR_PTR(-EPERM); 172 173 return NULL; 174} 175 176static void vfio_noiommu_release(void *iommu_data) 177{ 178} 179 180static long vfio_noiommu_ioctl(void *iommu_data, 181 unsigned int cmd, unsigned long arg) 182{ 183 if (cmd == VFIO_CHECK_EXTENSION) 184 return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0; 185 186 return -ENOTTY; 187} 188 189static int vfio_noiommu_attach_group(void *iommu_data, 190 struct iommu_group *iommu_group, enum vfio_group_type type) 191{ 192 return 0; 193} 194 195static void vfio_noiommu_detach_group(void *iommu_data, 196 struct iommu_group *iommu_group) 197{ 198} 199 200static const struct vfio_iommu_driver_ops vfio_noiommu_ops = { 201 .name = "vfio-noiommu", 202 .owner = THIS_MODULE, 203 .open = vfio_noiommu_open, 204 .release = vfio_noiommu_release, 205 .ioctl = vfio_noiommu_ioctl, 206 .attach_group = vfio_noiommu_attach_group, 207 .detach_group = vfio_noiommu_detach_group, 208}; 209 210/* 211 * Only noiommu containers can use vfio-noiommu and noiommu containers can only 212 * use vfio-noiommu. 213 */ 214static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, 215 const struct vfio_iommu_driver *driver) 216{ 217 return container->noiommu == (driver->ops == &vfio_noiommu_ops); 218} 219#else 220static inline bool vfio_iommu_driver_allowed(struct vfio_container *container, 221 const struct vfio_iommu_driver *driver) 222{ 223 return true; 224} 225#endif /* CONFIG_VFIO_NOIOMMU */ 226 227/* 228 * IOMMU driver registration 229 */ 230int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) 231{ 232 struct vfio_iommu_driver *driver, *tmp; 233 234 driver = kzalloc(sizeof(*driver), GFP_KERNEL); 235 if (!driver) 236 return -ENOMEM; 237 238 driver->ops = ops; 239 240 mutex_lock(&vfio.iommu_drivers_lock); 241 242 /* Check for duplicates */ 243 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { 244 if (tmp->ops == ops) { 245 mutex_unlock(&vfio.iommu_drivers_lock); 246 kfree(driver); 247 return -EINVAL; 248 } 249 } 250 251 list_add(&driver->vfio_next, &vfio.iommu_drivers_list); 252 253 mutex_unlock(&vfio.iommu_drivers_lock); 254 255 return 0; 256} 257EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); 258 259void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) 260{ 261 struct vfio_iommu_driver *driver; 262 263 mutex_lock(&vfio.iommu_drivers_lock); 264 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 265 if (driver->ops == ops) { 266 list_del(&driver->vfio_next); 267 mutex_unlock(&vfio.iommu_drivers_lock); 268 kfree(driver); 269 return; 270 } 271 } 272 mutex_unlock(&vfio.iommu_drivers_lock); 273} 274EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); 275 276static void vfio_group_get(struct vfio_group *group); 277 278/* 279 * Container objects - containers are created when /dev/vfio/vfio is 280 * opened, but their lifecycle extends until the last user is done, so 281 * it's freed via kref. Must support container/group/device being 282 * closed in any order. 283 */ 284static void vfio_container_get(struct vfio_container *container) 285{ 286 kref_get(&container->kref); 287} 288 289static void vfio_container_release(struct kref *kref) 290{ 291 struct vfio_container *container; 292 container = container_of(kref, struct vfio_container, kref); 293 294 kfree(container); 295} 296 297static void vfio_container_put(struct vfio_container *container) 298{ 299 kref_put(&container->kref, vfio_container_release); 300} 301 302/* 303 * Group objects - create, release, get, put, search 304 */ 305static struct vfio_group * 306__vfio_group_get_from_iommu(struct iommu_group *iommu_group) 307{ 308 struct vfio_group *group; 309 310 list_for_each_entry(group, &vfio.group_list, vfio_next) { 311 if (group->iommu_group == iommu_group) { 312 vfio_group_get(group); 313 return group; 314 } 315 } 316 return NULL; 317} 318 319static struct vfio_group * 320vfio_group_get_from_iommu(struct iommu_group *iommu_group) 321{ 322 struct vfio_group *group; 323 324 mutex_lock(&vfio.group_lock); 325 group = __vfio_group_get_from_iommu(iommu_group); 326 mutex_unlock(&vfio.group_lock); 327 return group; 328} 329 330static void vfio_group_release(struct device *dev) 331{ 332 struct vfio_group *group = container_of(dev, struct vfio_group, dev); 333 334 mutex_destroy(&group->device_lock); 335 iommu_group_put(group->iommu_group); 336 ida_free(&vfio.group_ida, MINOR(group->dev.devt)); 337 kfree(group); 338} 339 340static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, 341 enum vfio_group_type type) 342{ 343 struct vfio_group *group; 344 int minor; 345 346 group = kzalloc(sizeof(*group), GFP_KERNEL); 347 if (!group) 348 return ERR_PTR(-ENOMEM); 349 350 minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); 351 if (minor < 0) { 352 kfree(group); 353 return ERR_PTR(minor); 354 } 355 356 device_initialize(&group->dev); 357 group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); 358 group->dev.class = vfio.class; 359 group->dev.release = vfio_group_release; 360 cdev_init(&group->cdev, &vfio_group_fops); 361 group->cdev.owner = THIS_MODULE; 362 363 refcount_set(&group->users, 1); 364 init_rwsem(&group->group_rwsem); 365 INIT_LIST_HEAD(&group->device_list); 366 mutex_init(&group->device_lock); 367 group->iommu_group = iommu_group; 368 /* put in vfio_group_release() */ 369 iommu_group_ref_get(iommu_group); 370 group->type = type; 371 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); 372 373 return group; 374} 375 376static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, 377 enum vfio_group_type type) 378{ 379 struct vfio_group *group; 380 struct vfio_group *ret; 381 int err; 382 383 group = vfio_group_alloc(iommu_group, type); 384 if (IS_ERR(group)) 385 return group; 386 387 err = dev_set_name(&group->dev, "%s%d", 388 group->type == VFIO_NO_IOMMU ? "noiommu-" : "", 389 iommu_group_id(iommu_group)); 390 if (err) { 391 ret = ERR_PTR(err); 392 goto err_put; 393 } 394 395 mutex_lock(&vfio.group_lock); 396 397 /* Did we race creating this group? */ 398 ret = __vfio_group_get_from_iommu(iommu_group); 399 if (ret) 400 goto err_unlock; 401 402 err = cdev_device_add(&group->cdev, &group->dev); 403 if (err) { 404 ret = ERR_PTR(err); 405 goto err_unlock; 406 } 407 408 list_add(&group->vfio_next, &vfio.group_list); 409 410 mutex_unlock(&vfio.group_lock); 411 return group; 412 413err_unlock: 414 mutex_unlock(&vfio.group_lock); 415err_put: 416 put_device(&group->dev); 417 return ret; 418} 419 420static void vfio_group_put(struct vfio_group *group) 421{ 422 if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock)) 423 return; 424 425 /* 426 * These data structures all have paired operations that can only be 427 * undone when the caller holds a live reference on the group. Since all 428 * pairs must be undone these WARN_ON's indicate some caller did not 429 * properly hold the group reference. 430 */ 431 WARN_ON(!list_empty(&group->device_list)); 432 WARN_ON(group->container || group->container_users); 433 WARN_ON(group->notifier.head); 434 435 list_del(&group->vfio_next); 436 cdev_device_del(&group->cdev, &group->dev); 437 mutex_unlock(&vfio.group_lock); 438 439 put_device(&group->dev); 440} 441 442static void vfio_group_get(struct vfio_group *group) 443{ 444 refcount_inc(&group->users); 445} 446 447/* 448 * Device objects - create, release, get, put, search 449 */ 450/* Device reference always implies a group reference */ 451static void vfio_device_put(struct vfio_device *device) 452{ 453 if (refcount_dec_and_test(&device->refcount)) 454 complete(&device->comp); 455} 456 457static bool vfio_device_try_get(struct vfio_device *device) 458{ 459 return refcount_inc_not_zero(&device->refcount); 460} 461 462static struct vfio_device *vfio_group_get_device(struct vfio_group *group, 463 struct device *dev) 464{ 465 struct vfio_device *device; 466 467 mutex_lock(&group->device_lock); 468 list_for_each_entry(device, &group->device_list, group_next) { 469 if (device->dev == dev && vfio_device_try_get(device)) { 470 mutex_unlock(&group->device_lock); 471 return device; 472 } 473 } 474 mutex_unlock(&group->device_lock); 475 return NULL; 476} 477 478/* 479 * VFIO driver API 480 */ 481void vfio_init_group_dev(struct vfio_device *device, struct device *dev, 482 const struct vfio_device_ops *ops) 483{ 484 init_completion(&device->comp); 485 device->dev = dev; 486 device->ops = ops; 487} 488EXPORT_SYMBOL_GPL(vfio_init_group_dev); 489 490void vfio_uninit_group_dev(struct vfio_device *device) 491{ 492 vfio_release_device_set(device); 493} 494EXPORT_SYMBOL_GPL(vfio_uninit_group_dev); 495 496static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, 497 enum vfio_group_type type) 498{ 499 struct iommu_group *iommu_group; 500 struct vfio_group *group; 501 int ret; 502 503 iommu_group = iommu_group_alloc(); 504 if (IS_ERR(iommu_group)) 505 return ERR_CAST(iommu_group); 506 507 iommu_group_set_name(iommu_group, "vfio-noiommu"); 508 ret = iommu_group_add_device(iommu_group, dev); 509 if (ret) 510 goto out_put_group; 511 512 group = vfio_create_group(iommu_group, type); 513 if (IS_ERR(group)) { 514 ret = PTR_ERR(group); 515 goto out_remove_device; 516 } 517 iommu_group_put(iommu_group); 518 return group; 519 520out_remove_device: 521 iommu_group_remove_device(dev); 522out_put_group: 523 iommu_group_put(iommu_group); 524 return ERR_PTR(ret); 525} 526 527static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) 528{ 529 struct iommu_group *iommu_group; 530 struct vfio_group *group; 531 532 iommu_group = iommu_group_get(dev); 533#ifdef CONFIG_VFIO_NOIOMMU 534 if (!iommu_group && noiommu) { 535 /* 536 * With noiommu enabled, create an IOMMU group for devices that 537 * don't already have one, implying no IOMMU hardware/driver 538 * exists. Taint the kernel because we're about to give a DMA 539 * capable device to a user without IOMMU protection. 540 */ 541 group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); 542 if (!IS_ERR(group)) { 543 add_taint(TAINT_USER, LOCKDEP_STILL_OK); 544 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); 545 } 546 return group; 547 } 548#endif 549 if (!iommu_group) 550 return ERR_PTR(-EINVAL); 551 552 group = vfio_group_get_from_iommu(iommu_group); 553 if (!group) 554 group = vfio_create_group(iommu_group, VFIO_IOMMU); 555 556 /* The vfio_group holds a reference to the iommu_group */ 557 iommu_group_put(iommu_group); 558 return group; 559} 560 561static int __vfio_register_dev(struct vfio_device *device, 562 struct vfio_group *group) 563{ 564 struct vfio_device *existing_device; 565 566 if (IS_ERR(group)) 567 return PTR_ERR(group); 568 569 /* 570 * If the driver doesn't specify a set then the device is added to a 571 * singleton set just for itself. 572 */ 573 if (!device->dev_set) 574 vfio_assign_device_set(device, device); 575 576 existing_device = vfio_group_get_device(group, device->dev); 577 if (existing_device) { 578 dev_WARN(device->dev, "Device already exists on group %d\n", 579 iommu_group_id(group->iommu_group)); 580 vfio_device_put(existing_device); 581 if (group->type == VFIO_NO_IOMMU || 582 group->type == VFIO_EMULATED_IOMMU) 583 iommu_group_remove_device(device->dev); 584 vfio_group_put(group); 585 return -EBUSY; 586 } 587 588 /* Our reference on group is moved to the device */ 589 device->group = group; 590 591 /* Refcounting can't start until the driver calls register */ 592 refcount_set(&device->refcount, 1); 593 594 mutex_lock(&group->device_lock); 595 list_add(&device->group_next, &group->device_list); 596 group->dev_counter++; 597 mutex_unlock(&group->device_lock); 598 599 return 0; 600} 601 602int vfio_register_group_dev(struct vfio_device *device) 603{ 604 /* 605 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to 606 * restore cache coherency. 607 */ 608 if (!iommu_capable(device->dev->bus, IOMMU_CAP_CACHE_COHERENCY)) 609 return -EINVAL; 610 611 return __vfio_register_dev(device, 612 vfio_group_find_or_alloc(device->dev)); 613} 614EXPORT_SYMBOL_GPL(vfio_register_group_dev); 615 616/* 617 * Register a virtual device without IOMMU backing. The user of this 618 * device must not be able to directly trigger unmediated DMA. 619 */ 620int vfio_register_emulated_iommu_dev(struct vfio_device *device) 621{ 622 return __vfio_register_dev(device, 623 vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU)); 624} 625EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); 626 627static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, 628 char *buf) 629{ 630 struct vfio_device *it, *device = ERR_PTR(-ENODEV); 631 632 mutex_lock(&group->device_lock); 633 list_for_each_entry(it, &group->device_list, group_next) { 634 int ret; 635 636 if (it->ops->match) { 637 ret = it->ops->match(it, buf); 638 if (ret < 0) { 639 device = ERR_PTR(ret); 640 break; 641 } 642 } else { 643 ret = !strcmp(dev_name(it->dev), buf); 644 } 645 646 if (ret && vfio_device_try_get(it)) { 647 device = it; 648 break; 649 } 650 } 651 mutex_unlock(&group->device_lock); 652 653 return device; 654} 655 656/* 657 * Decrement the device reference count and wait for the device to be 658 * removed. Open file descriptors for the device... */ 659void vfio_unregister_group_dev(struct vfio_device *device) 660{ 661 struct vfio_group *group = device->group; 662 unsigned int i = 0; 663 bool interrupted = false; 664 long rc; 665 666 vfio_device_put(device); 667 rc = try_wait_for_completion(&device->comp); 668 while (rc <= 0) { 669 if (device->ops->request) 670 device->ops->request(device, i++); 671 672 if (interrupted) { 673 rc = wait_for_completion_timeout(&device->comp, 674 HZ * 10); 675 } else { 676 rc = wait_for_completion_interruptible_timeout( 677 &device->comp, HZ * 10); 678 if (rc < 0) { 679 interrupted = true; 680 dev_warn(device->dev, 681 "Device is currently in use, task" 682 " \"%s\" (%d) " 683 "blocked until device is released", 684 current->comm, task_pid_nr(current)); 685 } 686 } 687 } 688 689 mutex_lock(&group->device_lock); 690 list_del(&device->group_next); 691 group->dev_counter--; 692 mutex_unlock(&group->device_lock); 693 694 if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) 695 iommu_group_remove_device(device->dev); 696 697 /* Matches the get in vfio_register_group_dev() */ 698 vfio_group_put(group); 699} 700EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); 701 702/* 703 * VFIO base fd, /dev/vfio/vfio 704 */ 705static long vfio_ioctl_check_extension(struct vfio_container *container, 706 unsigned long arg) 707{ 708 struct vfio_iommu_driver *driver; 709 long ret = 0; 710 711 down_read(&container->group_lock); 712 713 driver = container->iommu_driver; 714 715 switch (arg) { 716 /* No base extensions yet */ 717 default: 718 /* 719 * If no driver is set, poll all registered drivers for 720 * extensions and return the first positive result. If 721 * a driver is already set, further queries will be passed 722 * only to that driver. 723 */ 724 if (!driver) { 725 mutex_lock(&vfio.iommu_drivers_lock); 726 list_for_each_entry(driver, &vfio.iommu_drivers_list, 727 vfio_next) { 728 729 if (!list_empty(&container->group_list) && 730 !vfio_iommu_driver_allowed(container, 731 driver)) 732 continue; 733 if (!try_module_get(driver->ops->owner)) 734 continue; 735 736 ret = driver->ops->ioctl(NULL, 737 VFIO_CHECK_EXTENSION, 738 arg); 739 module_put(driver->ops->owner); 740 if (ret > 0) 741 break; 742 } 743 mutex_unlock(&vfio.iommu_drivers_lock); 744 } else 745 ret = driver->ops->ioctl(container->iommu_data, 746 VFIO_CHECK_EXTENSION, arg); 747 } 748 749 up_read(&container->group_lock); 750 751 return ret; 752} 753 754/* hold write lock on container->group_lock */ 755static int __vfio_container_attach_groups(struct vfio_container *container, 756 struct vfio_iommu_driver *driver, 757 void *data) 758{ 759 struct vfio_group *group; 760 int ret = -ENODEV; 761 762 list_for_each_entry(group, &container->group_list, container_next) { 763 ret = driver->ops->attach_group(data, group->iommu_group, 764 group->type); 765 if (ret) 766 goto unwind; 767 } 768 769 return ret; 770 771unwind: 772 list_for_each_entry_continue_reverse(group, &container->group_list, 773 container_next) { 774 driver->ops->detach_group(data, group->iommu_group); 775 } 776 777 return ret; 778} 779 780static long vfio_ioctl_set_iommu(struct vfio_container *container, 781 unsigned long arg) 782{ 783 struct vfio_iommu_driver *driver; 784 long ret = -ENODEV; 785 786 down_write(&container->group_lock); 787 788 /* 789 * The container is designed to be an unprivileged interface while 790 * the group can be assigned to specific users. Therefore, only by 791 * adding a group to a container does the user get the privilege of 792 * enabling the iommu, which may allocate finite resources. There 793 * is no unset_iommu, but by removing all the groups from a container, 794 * the container is deprivileged and returns to an unset state. 795 */ 796 if (list_empty(&container->group_list) || container->iommu_driver) { 797 up_write(&container->group_lock); 798 return -EINVAL; 799 } 800 801 mutex_lock(&vfio.iommu_drivers_lock); 802 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { 803 void *data; 804 805 if (!vfio_iommu_driver_allowed(container, driver)) 806 continue; 807 if (!try_module_get(driver->ops->owner)) 808 continue; 809 810 /* 811 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION, 812 * so test which iommu driver reported support for this 813 * extension and call open on them. We also pass them the 814 * magic, allowing a single driver to support multiple 815 * interfaces if they'd like. 816 */ 817 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) { 818 module_put(driver->ops->owner); 819 continue; 820 } 821 822 data = driver->ops->open(arg); 823 if (IS_ERR(data)) { 824 ret = PTR_ERR(data); 825 module_put(driver->ops->owner); 826 continue; 827 } 828 829 ret = __vfio_container_attach_groups(container, driver, data); 830 if (ret) { 831 driver->ops->release(data); 832 module_put(driver->ops->owner); 833 continue; 834 } 835 836 container->iommu_driver = driver; 837 container->iommu_data = data; 838 break; 839 } 840 841 mutex_unlock(&vfio.iommu_drivers_lock); 842 up_write(&container->group_lock); 843 844 return ret; 845} 846 847static long vfio_fops_unl_ioctl(struct file *filep, 848 unsigned int cmd, unsigned long arg) 849{ 850 struct vfio_container *container = filep->private_data; 851 struct vfio_iommu_driver *driver; 852 void *data; 853 long ret = -EINVAL; 854 855 if (!container) 856 return ret; 857 858 switch (cmd) { 859 case VFIO_GET_API_VERSION: 860 ret = VFIO_API_VERSION; 861 break; 862 case VFIO_CHECK_EXTENSION: 863 ret = vfio_ioctl_check_extension(container, arg); 864 break; 865 case VFIO_SET_IOMMU: 866 ret = vfio_ioctl_set_iommu(container, arg); 867 break; 868 default: 869 driver = container->iommu_driver; 870 data = container->iommu_data; 871 872 if (driver) /* passthrough all unrecognized ioctls */ 873 ret = driver->ops->ioctl(data, cmd, arg); 874 } 875 876 return ret; 877} 878 879static int vfio_fops_open(struct inode *inode, struct file *filep) 880{ 881 struct vfio_container *container; 882 883 container = kzalloc(sizeof(*container), GFP_KERNEL); 884 if (!container) 885 return -ENOMEM; 886 887 INIT_LIST_HEAD(&container->group_list); 888 init_rwsem(&container->group_lock); 889 kref_init(&container->kref); 890 891 filep->private_data = container; 892 893 return 0; 894} 895 896static int vfio_fops_release(struct inode *inode, struct file *filep) 897{ 898 struct vfio_container *container = filep->private_data; 899 struct vfio_iommu_driver *driver = container->iommu_driver; 900 901 if (driver && driver->ops->notify) 902 driver->ops->notify(container->iommu_data, 903 VFIO_IOMMU_CONTAINER_CLOSE); 904 905 filep->private_data = NULL; 906 907 vfio_container_put(container); 908 909 return 0; 910} 911 912static const struct file_operations vfio_fops = { 913 .owner = THIS_MODULE, 914 .open = vfio_fops_open, 915 .release = vfio_fops_release, 916 .unlocked_ioctl = vfio_fops_unl_ioctl, 917 .compat_ioctl = compat_ptr_ioctl, 918}; 919 920/* 921 * VFIO Group fd, /dev/vfio/$GROUP 922 */ 923static void __vfio_group_unset_container(struct vfio_group *group) 924{ 925 struct vfio_container *container = group->container; 926 struct vfio_iommu_driver *driver; 927 928 lockdep_assert_held_write(&group->group_rwsem); 929 930 down_write(&container->group_lock); 931 932 driver = container->iommu_driver; 933 if (driver) 934 driver->ops->detach_group(container->iommu_data, 935 group->iommu_group); 936 937 if (group->type == VFIO_IOMMU) 938 iommu_group_release_dma_owner(group->iommu_group); 939 940 group->container = NULL; 941 group->container_users = 0; 942 list_del(&group->container_next); 943 944 /* Detaching the last group deprivileges a container, remove iommu */ 945 if (driver && list_empty(&container->group_list)) { 946 driver->ops->release(container->iommu_data); 947 module_put(driver->ops->owner); 948 container->iommu_driver = NULL; 949 container->iommu_data = NULL; 950 } 951 952 up_write(&container->group_lock); 953 954 vfio_container_put(container); 955} 956 957/* 958 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or 959 * if there was no container to unset. Since the ioctl is called on 960 * the group, we know that still exists, therefore the only valid 961 * transition here is 1->0. 962 */ 963static int vfio_group_unset_container(struct vfio_group *group) 964{ 965 lockdep_assert_held_write(&group->group_rwsem); 966 967 if (!group->container) 968 return -EINVAL; 969 if (group->container_users != 1) 970 return -EBUSY; 971 __vfio_group_unset_container(group); 972 return 0; 973} 974 975static int vfio_group_set_container(struct vfio_group *group, int container_fd) 976{ 977 struct fd f; 978 struct vfio_container *container; 979 struct vfio_iommu_driver *driver; 980 int ret = 0; 981 982 lockdep_assert_held_write(&group->group_rwsem); 983 984 if (group->container || WARN_ON(group->container_users)) 985 return -EINVAL; 986 987 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 988 return -EPERM; 989 990 f = fdget(container_fd); 991 if (!f.file) 992 return -EBADF; 993 994 /* Sanity check, is this really our fd? */ 995 if (f.file->f_op != &vfio_fops) { 996 fdput(f); 997 return -EINVAL; 998 } 999 1000 container = f.file->private_data; 1001 WARN_ON(!container); /* fget ensures we don't race vfio_release */ 1002 1003 down_write(&container->group_lock); 1004 1005 /* Real groups and fake groups cannot mix */ 1006 if (!list_empty(&container->group_list) && 1007 container->noiommu != (group->type == VFIO_NO_IOMMU)) { 1008 ret = -EPERM; 1009 goto unlock_out; 1010 } 1011 1012 if (group->type == VFIO_IOMMU) { 1013 ret = iommu_group_claim_dma_owner(group->iommu_group, f.file); 1014 if (ret) 1015 goto unlock_out; 1016 } 1017 1018 driver = container->iommu_driver; 1019 if (driver) { 1020 ret = driver->ops->attach_group(container->iommu_data, 1021 group->iommu_group, 1022 group->type); 1023 if (ret) { 1024 if (group->type == VFIO_IOMMU) 1025 iommu_group_release_dma_owner( 1026 group->iommu_group); 1027 goto unlock_out; 1028 } 1029 } 1030 1031 group->container = container; 1032 group->container_users = 1; 1033 container->noiommu = (group->type == VFIO_NO_IOMMU); 1034 list_add(&group->container_next, &container->group_list); 1035 1036 /* Get a reference on the container and mark a user within the group */ 1037 vfio_container_get(container); 1038 1039unlock_out: 1040 up_write(&container->group_lock); 1041 fdput(f); 1042 return ret; 1043} 1044 1045static const struct file_operations vfio_device_fops; 1046 1047/* true if the vfio_device has open_device() called but not close_device() */ 1048static bool vfio_assert_device_open(struct vfio_device *device) 1049{ 1050 return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); 1051} 1052 1053static int vfio_device_assign_container(struct vfio_device *device) 1054{ 1055 struct vfio_group *group = device->group; 1056 1057 lockdep_assert_held_write(&group->group_rwsem); 1058 1059 if (!group->container || !group->container->iommu_driver || 1060 WARN_ON(!group->container_users)) 1061 return -EINVAL; 1062 1063 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) 1064 return -EPERM; 1065 1066 get_file(group->opened_file); 1067 group->container_users++; 1068 return 0; 1069} 1070 1071static void vfio_device_unassign_container(struct vfio_device *device) 1072{ 1073 down_write(&device->group->group_rwsem); 1074 WARN_ON(device->group->container_users <= 1); 1075 device->group->container_users--; 1076 fput(device->group->opened_file); 1077 up_write(&device->group->group_rwsem); 1078} 1079 1080static struct file *vfio_device_open(struct vfio_device *device) 1081{ 1082 struct file *filep; 1083 int ret; 1084 1085 down_write(&device->group->group_rwsem); 1086 ret = vfio_device_assign_container(device); 1087 up_write(&device->group->group_rwsem); 1088 if (ret) 1089 return ERR_PTR(ret); 1090 1091 if (!try_module_get(device->dev->driver->owner)) { 1092 ret = -ENODEV; 1093 goto err_unassign_container; 1094 } 1095 1096 mutex_lock(&device->dev_set->lock); 1097 device->open_count++; 1098 if (device->open_count == 1) { 1099 /* 1100 * Here we pass the KVM pointer with the group under the read 1101 * lock. If the device driver will use it, it must obtain a 1102 * reference and release it during close_device. 1103 */ 1104 down_read(&device->group->group_rwsem); 1105 device->kvm = device->group->kvm; 1106 1107 if (device->ops->open_device) { 1108 ret = device->ops->open_device(device); 1109 if (ret) 1110 goto err_undo_count; 1111 } 1112 up_read(&device->group->group_rwsem); 1113 } 1114 mutex_unlock(&device->dev_set->lock); 1115 1116 /* 1117 * We can't use anon_inode_getfd() because we need to modify 1118 * the f_mode flags directly to allow more than just ioctls 1119 */ 1120 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, 1121 device, O_RDWR); 1122 if (IS_ERR(filep)) { 1123 ret = PTR_ERR(filep); 1124 goto err_close_device; 1125 } 1126 1127 /* 1128 * TODO: add an anon_inode interface to do this. 1129 * Appears to be missing by lack of need rather than 1130 * explicitly prevented. Now there's need. 1131 */ 1132 filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); 1133 1134 if (device->group->type == VFIO_NO_IOMMU) 1135 dev_warn(device->dev, "vfio-noiommu device opened by user " 1136 "(%s:%d)\n", current->comm, task_pid_nr(current)); 1137 /* 1138 * On success the ref of device is moved to the file and 1139 * put in vfio_device_fops_release() 1140 */ 1141 return filep; 1142 1143err_close_device: 1144 mutex_lock(&device->dev_set->lock); 1145 down_read(&device->group->group_rwsem); 1146 if (device->open_count == 1 && device->ops->close_device) 1147 device->ops->close_device(device); 1148err_undo_count: 1149 device->open_count--; 1150 if (device->open_count == 0 && device->kvm) 1151 device->kvm = NULL; 1152 up_read(&device->group->group_rwsem); 1153 mutex_unlock(&device->dev_set->lock); 1154 module_put(device->dev->driver->owner); 1155err_unassign_container: 1156 vfio_device_unassign_container(device); 1157 return ERR_PTR(ret); 1158} 1159 1160static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) 1161{ 1162 struct vfio_device *device; 1163 struct file *filep; 1164 int fdno; 1165 int ret; 1166 1167 device = vfio_device_get_from_name(group, buf); 1168 if (IS_ERR(device)) 1169 return PTR_ERR(device); 1170 1171 fdno = get_unused_fd_flags(O_CLOEXEC); 1172 if (fdno < 0) { 1173 ret = fdno; 1174 goto err_put_device; 1175 } 1176 1177 filep = vfio_device_open(device); 1178 if (IS_ERR(filep)) { 1179 ret = PTR_ERR(filep); 1180 goto err_put_fdno; 1181 } 1182 1183 fd_install(fdno, filep); 1184 return fdno; 1185 1186err_put_fdno: 1187 put_unused_fd(fdno); 1188err_put_device: 1189 vfio_device_put(device); 1190 return ret; 1191} 1192 1193static long vfio_group_fops_unl_ioctl(struct file *filep, 1194 unsigned int cmd, unsigned long arg) 1195{ 1196 struct vfio_group *group = filep->private_data; 1197 long ret = -ENOTTY; 1198 1199 switch (cmd) { 1200 case VFIO_GROUP_GET_STATUS: 1201 { 1202 struct vfio_group_status status; 1203 unsigned long minsz; 1204 1205 minsz = offsetofend(struct vfio_group_status, flags); 1206 1207 if (copy_from_user(&status, (void __user *)arg, minsz)) 1208 return -EFAULT; 1209 1210 if (status.argsz < minsz) 1211 return -EINVAL; 1212 1213 status.flags = 0; 1214 1215 down_read(&group->group_rwsem); 1216 if (group->container) 1217 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | 1218 VFIO_GROUP_FLAGS_VIABLE; 1219 else if (!iommu_group_dma_owner_claimed(group->iommu_group)) 1220 status.flags |= VFIO_GROUP_FLAGS_VIABLE; 1221 up_read(&group->group_rwsem); 1222 1223 if (copy_to_user((void __user *)arg, &status, minsz)) 1224 return -EFAULT; 1225 1226 ret = 0; 1227 break; 1228 } 1229 case VFIO_GROUP_SET_CONTAINER: 1230 { 1231 int fd; 1232 1233 if (get_user(fd, (int __user *)arg)) 1234 return -EFAULT; 1235 1236 if (fd < 0) 1237 return -EINVAL; 1238 1239 down_write(&group->group_rwsem); 1240 ret = vfio_group_set_container(group, fd); 1241 up_write(&group->group_rwsem); 1242 break; 1243 } 1244 case VFIO_GROUP_UNSET_CONTAINER: 1245 down_write(&group->group_rwsem); 1246 ret = vfio_group_unset_container(group); 1247 up_write(&group->group_rwsem); 1248 break; 1249 case VFIO_GROUP_GET_DEVICE_FD: 1250 { 1251 char *buf; 1252 1253 buf = strndup_user((const char __user *)arg, PAGE_SIZE); 1254 if (IS_ERR(buf)) 1255 return PTR_ERR(buf); 1256 1257 ret = vfio_group_get_device_fd(group, buf); 1258 kfree(buf); 1259 break; 1260 } 1261 } 1262 1263 return ret; 1264} 1265 1266static int vfio_group_fops_open(struct inode *inode, struct file *filep) 1267{ 1268 struct vfio_group *group = 1269 container_of(inode->i_cdev, struct vfio_group, cdev); 1270 int ret; 1271 1272 down_write(&group->group_rwsem); 1273 1274 /* users can be zero if this races with vfio_group_put() */ 1275 if (!refcount_inc_not_zero(&group->users)) { 1276 ret = -ENODEV; 1277 goto err_unlock; 1278 } 1279 1280 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { 1281 ret = -EPERM; 1282 goto err_put; 1283 } 1284 1285 /* 1286 * Do we need multiple instances of the group open? Seems not. 1287 */ 1288 if (group->opened_file) { 1289 ret = -EBUSY; 1290 goto err_put; 1291 } 1292 group->opened_file = filep; 1293 filep->private_data = group; 1294 1295 up_write(&group->group_rwsem); 1296 return 0; 1297err_put: 1298 vfio_group_put(group); 1299err_unlock: 1300 up_write(&group->group_rwsem); 1301 return ret; 1302} 1303 1304static int vfio_group_fops_release(struct inode *inode, struct file *filep) 1305{ 1306 struct vfio_group *group = filep->private_data; 1307 1308 filep->private_data = NULL; 1309 1310 down_write(&group->group_rwsem); 1311 /* 1312 * Device FDs hold a group file reference, therefore the group release 1313 * is only called when there are no open devices. 1314 */ 1315 WARN_ON(group->notifier.head); 1316 if (group->container) { 1317 WARN_ON(group->container_users != 1); 1318 __vfio_group_unset_container(group); 1319 } 1320 group->opened_file = NULL; 1321 up_write(&group->group_rwsem); 1322 1323 vfio_group_put(group); 1324 1325 return 0; 1326} 1327 1328static const struct file_operations vfio_group_fops = { 1329 .owner = THIS_MODULE, 1330 .unlocked_ioctl = vfio_group_fops_unl_ioctl, 1331 .compat_ioctl = compat_ptr_ioctl, 1332 .open = vfio_group_fops_open, 1333 .release = vfio_group_fops_release, 1334}; 1335 1336/* 1337 * VFIO Device fd 1338 */ 1339static int vfio_device_fops_release(struct inode *inode, struct file *filep) 1340{ 1341 struct vfio_device *device = filep->private_data; 1342 1343 mutex_lock(&device->dev_set->lock); 1344 vfio_assert_device_open(device); 1345 down_read(&device->group->group_rwsem); 1346 if (device->open_count == 1 && device->ops->close_device) 1347 device->ops->close_device(device); 1348 up_read(&device->group->group_rwsem); 1349 device->open_count--; 1350 if (device->open_count == 0) 1351 device->kvm = NULL; 1352 mutex_unlock(&device->dev_set->lock); 1353 1354 module_put(device->dev->driver->owner); 1355 1356 vfio_device_unassign_container(device); 1357 1358 vfio_device_put(device); 1359 1360 return 0; 1361} 1362 1363/* 1364 * vfio_mig_get_next_state - Compute the next step in the FSM 1365 * @cur_fsm - The current state the device is in 1366 * @new_fsm - The target state to reach 1367 * @next_fsm - Pointer to the next step to get to new_fsm 1368 * 1369 * Return 0 upon success, otherwise -errno 1370 * Upon success the next step in the state progression between cur_fsm and 1371 * new_fsm will be set in next_fsm. 1372 * 1373 * This breaks down requests for combination transitions into smaller steps and 1374 * returns the next step to get to new_fsm. The function may need to be called 1375 * multiple times before reaching new_fsm. 1376 * 1377 */ 1378int vfio_mig_get_next_state(struct vfio_device *device, 1379 enum vfio_device_mig_state cur_fsm, 1380 enum vfio_device_mig_state new_fsm, 1381 enum vfio_device_mig_state *next_fsm) 1382{ 1383 enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 }; 1384 /* 1385 * The coding in this table requires the driver to implement the 1386 * following FSM arcs: 1387 * RESUMING -> STOP 1388 * STOP -> RESUMING 1389 * STOP -> STOP_COPY 1390 * STOP_COPY -> STOP 1391 * 1392 * If P2P is supported then the driver must also implement these FSM 1393 * arcs: 1394 * RUNNING -> RUNNING_P2P 1395 * RUNNING_P2P -> RUNNING 1396 * RUNNING_P2P -> STOP 1397 * STOP -> RUNNING_P2P 1398 * Without P2P the driver must implement: 1399 * RUNNING -> STOP 1400 * STOP -> RUNNING 1401 * 1402 * The coding will step through multiple states for some combination 1403 * transitions; if all optional features are supported, this means the 1404 * following ones: 1405 * RESUMING -> STOP -> RUNNING_P2P 1406 * RESUMING -> STOP -> RUNNING_P2P -> RUNNING 1407 * RESUMING -> STOP -> STOP_COPY 1408 * RUNNING -> RUNNING_P2P -> STOP 1409 * RUNNING -> RUNNING_P2P -> STOP -> RESUMING 1410 * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY 1411 * RUNNING_P2P -> STOP -> RESUMING 1412 * RUNNING_P2P -> STOP -> STOP_COPY 1413 * STOP -> RUNNING_P2P -> RUNNING 1414 * STOP_COPY -> STOP -> RESUMING 1415 * STOP_COPY -> STOP -> RUNNING_P2P 1416 * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING 1417 */ 1418 static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { 1419 [VFIO_DEVICE_STATE_STOP] = { 1420 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 1421 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, 1422 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 1423 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 1424 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 1425 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1426 }, 1427 [VFIO_DEVICE_STATE_RUNNING] = { 1428 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, 1429 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 1430 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, 1431 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, 1432 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 1433 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1434 }, 1435 [VFIO_DEVICE_STATE_STOP_COPY] = { 1436 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 1437 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 1438 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, 1439 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 1440 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 1441 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1442 }, 1443 [VFIO_DEVICE_STATE_RESUMING] = { 1444 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 1445 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, 1446 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 1447 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, 1448 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, 1449 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1450 }, 1451 [VFIO_DEVICE_STATE_RUNNING_P2P] = { 1452 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, 1453 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, 1454 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, 1455 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, 1456 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, 1457 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1458 }, 1459 [VFIO_DEVICE_STATE_ERROR] = { 1460 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, 1461 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, 1462 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, 1463 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, 1464 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, 1465 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, 1466 }, 1467 }; 1468 1469 static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { 1470 [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, 1471 [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, 1472 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, 1473 [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, 1474 [VFIO_DEVICE_STATE_RUNNING_P2P] = 1475 VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P, 1476 [VFIO_DEVICE_STATE_ERROR] = ~0U, 1477 }; 1478 1479 if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 1480 (state_flags_table[cur_fsm] & device->migration_flags) != 1481 state_flags_table[cur_fsm])) 1482 return -EINVAL; 1483 1484 if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) || 1485 (state_flags_table[new_fsm] & device->migration_flags) != 1486 state_flags_table[new_fsm]) 1487 return -EINVAL; 1488 1489 /* 1490 * Arcs touching optional and unsupported states are skipped over. The 1491 * driver will instead see an arc from the original state to the next 1492 * logical state, as per the above comment. 1493 */ 1494 *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm]; 1495 while ((state_flags_table[*next_fsm] & device->migration_flags) != 1496 state_flags_table[*next_fsm]) 1497 *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm]; 1498 1499 return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL; 1500} 1501EXPORT_SYMBOL_GPL(vfio_mig_get_next_state); 1502 1503/* 1504 * Convert the drivers's struct file into a FD number and return it to userspace 1505 */ 1506static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg, 1507 struct vfio_device_feature_mig_state *mig) 1508{ 1509 int ret; 1510 int fd; 1511 1512 fd = get_unused_fd_flags(O_CLOEXEC); 1513 if (fd < 0) { 1514 ret = fd; 1515 goto out_fput; 1516 } 1517 1518 mig->data_fd = fd; 1519 if (copy_to_user(arg, mig, sizeof(*mig))) { 1520 ret = -EFAULT; 1521 goto out_put_unused; 1522 } 1523 fd_install(fd, filp); 1524 return 0; 1525 1526out_put_unused: 1527 put_unused_fd(fd); 1528out_fput: 1529 fput(filp); 1530 return ret; 1531} 1532 1533static int 1534vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, 1535 u32 flags, void __user *arg, 1536 size_t argsz) 1537{ 1538 size_t minsz = 1539 offsetofend(struct vfio_device_feature_mig_state, data_fd); 1540 struct vfio_device_feature_mig_state mig; 1541 struct file *filp = NULL; 1542 int ret; 1543 1544 if (!device->ops->migration_set_state || 1545 !device->ops->migration_get_state) 1546 return -ENOTTY; 1547 1548 ret = vfio_check_feature(flags, argsz, 1549 VFIO_DEVICE_FEATURE_SET | 1550 VFIO_DEVICE_FEATURE_GET, 1551 sizeof(mig)); 1552 if (ret != 1) 1553 return ret; 1554 1555 if (copy_from_user(&mig, arg, minsz)) 1556 return -EFAULT; 1557 1558 if (flags & VFIO_DEVICE_FEATURE_GET) { 1559 enum vfio_device_mig_state curr_state; 1560 1561 ret = device->ops->migration_get_state(device, &curr_state); 1562 if (ret) 1563 return ret; 1564 mig.device_state = curr_state; 1565 goto out_copy; 1566 } 1567 1568 /* Handle the VFIO_DEVICE_FEATURE_SET */ 1569 filp = device->ops->migration_set_state(device, mig.device_state); 1570 if (IS_ERR(filp) || !filp) 1571 goto out_copy; 1572 1573 return vfio_ioct_mig_return_fd(filp, arg, &mig); 1574out_copy: 1575 mig.data_fd = -1; 1576 if (copy_to_user(arg, &mig, sizeof(mig))) 1577 return -EFAULT; 1578 if (IS_ERR(filp)) 1579 return PTR_ERR(filp); 1580 return 0; 1581} 1582 1583static int vfio_ioctl_device_feature_migration(struct vfio_device *device, 1584 u32 flags, void __user *arg, 1585 size_t argsz) 1586{ 1587 struct vfio_device_feature_migration mig = { 1588 .flags = device->migration_flags, 1589 }; 1590 int ret; 1591 1592 if (!device->ops->migration_set_state || 1593 !device->ops->migration_get_state) 1594 return -ENOTTY; 1595 1596 ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, 1597 sizeof(mig)); 1598 if (ret != 1) 1599 return ret; 1600 if (copy_to_user(arg, &mig, sizeof(mig))) 1601 return -EFAULT; 1602 return 0; 1603} 1604 1605static int vfio_ioctl_device_feature(struct vfio_device *device, 1606 struct vfio_device_feature __user *arg) 1607{ 1608 size_t minsz = offsetofend(struct vfio_device_feature, flags); 1609 struct vfio_device_feature feature; 1610 1611 if (copy_from_user(&feature, arg, minsz)) 1612 return -EFAULT; 1613 1614 if (feature.argsz < minsz) 1615 return -EINVAL; 1616 1617 /* Check unknown flags */ 1618 if (feature.flags & 1619 ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET | 1620 VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE)) 1621 return -EINVAL; 1622 1623 /* GET & SET are mutually exclusive except with PROBE */ 1624 if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) && 1625 (feature.flags & VFIO_DEVICE_FEATURE_SET) && 1626 (feature.flags & VFIO_DEVICE_FEATURE_GET)) 1627 return -EINVAL; 1628 1629 switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) { 1630 case VFIO_DEVICE_FEATURE_MIGRATION: 1631 return vfio_ioctl_device_feature_migration( 1632 device, feature.flags, arg->data, 1633 feature.argsz - minsz); 1634 case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: 1635 return vfio_ioctl_device_feature_mig_device_state( 1636 device, feature.flags, arg->data, 1637 feature.argsz - minsz); 1638 default: 1639 if (unlikely(!device->ops->device_feature)) 1640 return -EINVAL; 1641 return device->ops->device_feature(device, feature.flags, 1642 arg->data, 1643 feature.argsz - minsz); 1644 } 1645} 1646 1647static long vfio_device_fops_unl_ioctl(struct file *filep, 1648 unsigned int cmd, unsigned long arg) 1649{ 1650 struct vfio_device *device = filep->private_data; 1651 1652 switch (cmd) { 1653 case VFIO_DEVICE_FEATURE: 1654 return vfio_ioctl_device_feature(device, (void __user *)arg); 1655 default: 1656 if (unlikely(!device->ops->ioctl)) 1657 return -EINVAL; 1658 return device->ops->ioctl(device, cmd, arg); 1659 } 1660} 1661 1662static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, 1663 size_t count, loff_t *ppos) 1664{ 1665 struct vfio_device *device = filep->private_data; 1666 1667 if (unlikely(!device->ops->read)) 1668 return -EINVAL; 1669 1670 return device->ops->read(device, buf, count, ppos); 1671} 1672 1673static ssize_t vfio_device_fops_write(struct file *filep, 1674 const char __user *buf, 1675 size_t count, loff_t *ppos) 1676{ 1677 struct vfio_device *device = filep->private_data; 1678 1679 if (unlikely(!device->ops->write)) 1680 return -EINVAL; 1681 1682 return device->ops->write(device, buf, count, ppos); 1683} 1684 1685static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) 1686{ 1687 struct vfio_device *device = filep->private_data; 1688 1689 if (unlikely(!device->ops->mmap)) 1690 return -EINVAL; 1691 1692 return device->ops->mmap(device, vma); 1693} 1694 1695static const struct file_operations vfio_device_fops = { 1696 .owner = THIS_MODULE, 1697 .release = vfio_device_fops_release, 1698 .read = vfio_device_fops_read, 1699 .write = vfio_device_fops_write, 1700 .unlocked_ioctl = vfio_device_fops_unl_ioctl, 1701 .compat_ioctl = compat_ptr_ioctl, 1702 .mmap = vfio_device_fops_mmap, 1703}; 1704 1705/** 1706 * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file 1707 * @file: VFIO group file 1708 * 1709 * The returned iommu_group is valid as long as a ref is held on the file. 1710 */ 1711struct iommu_group *vfio_file_iommu_group(struct file *file) 1712{ 1713 struct vfio_group *group = file->private_data; 1714 1715 if (file->f_op != &vfio_group_fops) 1716 return NULL; 1717 return group->iommu_group; 1718} 1719EXPORT_SYMBOL_GPL(vfio_file_iommu_group); 1720 1721/** 1722 * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file 1723 * is always CPU cache coherent 1724 * @file: VFIO group file 1725 * 1726 * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop 1727 * bit in DMA transactions. A return of false indicates that the user has 1728 * rights to access additional instructions such as wbinvd on x86. 1729 */ 1730bool vfio_file_enforced_coherent(struct file *file) 1731{ 1732 struct vfio_group *group = file->private_data; 1733 bool ret; 1734 1735 if (file->f_op != &vfio_group_fops) 1736 return true; 1737 1738 down_read(&group->group_rwsem); 1739 if (group->container) { 1740 ret = vfio_ioctl_check_extension(group->container, 1741 VFIO_DMA_CC_IOMMU); 1742 } else { 1743 /* 1744 * Since the coherency state is determined only once a container 1745 * is attached the user must do so before they can prove they 1746 * have permission. 1747 */ 1748 ret = true; 1749 } 1750 up_read(&group->group_rwsem); 1751 return ret; 1752} 1753EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); 1754 1755/** 1756 * vfio_file_set_kvm - Link a kvm with VFIO drivers 1757 * @file: VFIO group file 1758 * @kvm: KVM to link 1759 * 1760 * When a VFIO device is first opened the KVM will be available in 1761 * device->kvm if one was associated with the group. 1762 */ 1763void vfio_file_set_kvm(struct file *file, struct kvm *kvm) 1764{ 1765 struct vfio_group *group = file->private_data; 1766 1767 if (file->f_op != &vfio_group_fops) 1768 return; 1769 1770 down_write(&group->group_rwsem); 1771 group->kvm = kvm; 1772 up_write(&group->group_rwsem); 1773} 1774EXPORT_SYMBOL_GPL(vfio_file_set_kvm); 1775 1776/** 1777 * vfio_file_has_dev - True if the VFIO file is a handle for device 1778 * @file: VFIO file to check 1779 * @device: Device that must be part of the file 1780 * 1781 * Returns true if given file has permission to manipulate the given device. 1782 */ 1783bool vfio_file_has_dev(struct file *file, struct vfio_device *device) 1784{ 1785 struct vfio_group *group = file->private_data; 1786 1787 if (file->f_op != &vfio_group_fops) 1788 return false; 1789 1790 return group == device->group; 1791} 1792EXPORT_SYMBOL_GPL(vfio_file_has_dev); 1793 1794/* 1795 * Sub-module support 1796 */ 1797/* 1798 * Helper for managing a buffer of info chain capabilities, allocate or 1799 * reallocate a buffer with additional @size, filling in @id and @version 1800 * of the capability. A pointer to the new capability is returned. 1801 * 1802 * NB. The chain is based at the head of the buffer, so new entries are 1803 * added to the tail, vfio_info_cap_shift() should be called to fixup the 1804 * next offsets prior to copying to the user buffer. 1805 */ 1806struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, 1807 size_t size, u16 id, u16 version) 1808{ 1809 void *buf; 1810 struct vfio_info_cap_header *header, *tmp; 1811 1812 buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); 1813 if (!buf) { 1814 kfree(caps->buf); 1815 caps->size = 0; 1816 return ERR_PTR(-ENOMEM); 1817 } 1818 1819 caps->buf = buf; 1820 header = buf + caps->size; 1821 1822 /* Eventually copied to user buffer, zero */ 1823 memset(header, 0, size); 1824 1825 header->id = id; 1826 header->version = version; 1827 1828 /* Add to the end of the capability chain */ 1829 for (tmp = buf; tmp->next; tmp = buf + tmp->next) 1830 ; /* nothing */ 1831 1832 tmp->next = caps->size; 1833 caps->size += size; 1834 1835 return header; 1836} 1837EXPORT_SYMBOL_GPL(vfio_info_cap_add); 1838 1839void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) 1840{ 1841 struct vfio_info_cap_header *tmp; 1842 void *buf = (void *)caps->buf; 1843 1844 for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) 1845 tmp->next += offset; 1846} 1847EXPORT_SYMBOL(vfio_info_cap_shift); 1848 1849int vfio_info_add_capability(struct vfio_info_cap *caps, 1850 struct vfio_info_cap_header *cap, size_t size) 1851{ 1852 struct vfio_info_cap_header *header; 1853 1854 header = vfio_info_cap_add(caps, size, cap->id, cap->version); 1855 if (IS_ERR(header)) 1856 return PTR_ERR(header); 1857 1858 memcpy(header + 1, cap + 1, size - sizeof(*header)); 1859 1860 return 0; 1861} 1862EXPORT_SYMBOL(vfio_info_add_capability); 1863 1864int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, 1865 int max_irq_type, size_t *data_size) 1866{ 1867 unsigned long minsz; 1868 size_t size; 1869 1870 minsz = offsetofend(struct vfio_irq_set, count); 1871 1872 if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) || 1873 (hdr->count >= (U32_MAX - hdr->start)) || 1874 (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | 1875 VFIO_IRQ_SET_ACTION_TYPE_MASK))) 1876 return -EINVAL; 1877 1878 if (data_size) 1879 *data_size = 0; 1880 1881 if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs) 1882 return -EINVAL; 1883 1884 switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { 1885 case VFIO_IRQ_SET_DATA_NONE: 1886 size = 0; 1887 break; 1888 case VFIO_IRQ_SET_DATA_BOOL: 1889 size = sizeof(uint8_t); 1890 break; 1891 case VFIO_IRQ_SET_DATA_EVENTFD: 1892 size = sizeof(int32_t); 1893 break; 1894 default: 1895 return -EINVAL; 1896 } 1897 1898 if (size) { 1899 if (hdr->argsz - minsz < hdr->count * size) 1900 return -EINVAL; 1901 1902 if (!data_size) 1903 return -EINVAL; 1904 1905 *data_size = hdr->count * size; 1906 } 1907 1908 return 0; 1909} 1910EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); 1911 1912/* 1913 * Pin a set of guest PFNs and return their associated host PFNs for local 1914 * domain only. 1915 * @device [in] : device 1916 * @user_pfn [in]: array of user/guest PFNs to be pinned. 1917 * @npage [in] : count of elements in user_pfn array. This count should not 1918 * be greater VFIO_PIN_PAGES_MAX_ENTRIES. 1919 * @prot [in] : protection flags 1920 * @phys_pfn[out]: array of host PFNs 1921 * Return error or number of pages pinned. 1922 */ 1923int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, 1924 int npage, int prot, unsigned long *phys_pfn) 1925{ 1926 struct vfio_container *container; 1927 struct vfio_group *group = device->group; 1928 struct vfio_iommu_driver *driver; 1929 int ret; 1930 1931 if (!user_pfn || !phys_pfn || !npage || 1932 !vfio_assert_device_open(device)) 1933 return -EINVAL; 1934 1935 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) 1936 return -E2BIG; 1937 1938 if (group->dev_counter > 1) 1939 return -EINVAL; 1940 1941 /* group->container cannot change while a vfio device is open */ 1942 container = group->container; 1943 driver = container->iommu_driver; 1944 if (likely(driver && driver->ops->pin_pages)) 1945 ret = driver->ops->pin_pages(container->iommu_data, 1946 group->iommu_group, user_pfn, 1947 npage, prot, phys_pfn); 1948 else 1949 ret = -ENOTTY; 1950 1951 return ret; 1952} 1953EXPORT_SYMBOL(vfio_pin_pages); 1954 1955/* 1956 * Unpin set of host PFNs for local domain only. 1957 * @device [in] : device 1958 * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest 1959 * PFNs should not be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 1960 * @npage [in] : count of elements in user_pfn array. This count should not 1961 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. 1962 * Return error or number of pages unpinned. 1963 */ 1964int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, 1965 int npage) 1966{ 1967 struct vfio_container *container; 1968 struct vfio_iommu_driver *driver; 1969 int ret; 1970 1971 if (!user_pfn || !npage || !vfio_assert_device_open(device)) 1972 return -EINVAL; 1973 1974 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) 1975 return -E2BIG; 1976 1977 /* group->container cannot change while a vfio device is open */ 1978 container = device->group->container; 1979 driver = container->iommu_driver; 1980 if (likely(driver && driver->ops->unpin_pages)) 1981 ret = driver->ops->unpin_pages(container->iommu_data, user_pfn, 1982 npage); 1983 else 1984 ret = -ENOTTY; 1985 1986 return ret; 1987} 1988EXPORT_SYMBOL(vfio_unpin_pages); 1989 1990/* 1991 * This interface allows the CPUs to perform some sort of virtual DMA on 1992 * behalf of the device. 1993 * 1994 * CPUs read/write from/into a range of IOVAs pointing to user space memory 1995 * into/from a kernel buffer. 1996 * 1997 * As the read/write of user space memory is conducted via the CPUs and is 1998 * not a real device DMA, it is not necessary to pin the user space memory. 1999 * 2000 * @device [in] : VFIO device 2001 * @user_iova [in] : base IOVA of a user space buffer 2002 * @data [in] : pointer to kernel buffer 2003 * @len [in] : kernel buffer length 2004 * @write : indicate read or write 2005 * Return error code on failure or 0 on success. 2006 */ 2007int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, 2008 size_t len, bool write) 2009{ 2010 struct vfio_container *container; 2011 struct vfio_iommu_driver *driver; 2012 int ret = 0; 2013 2014 if (!data || len <= 0 || !vfio_assert_device_open(device)) 2015 return -EINVAL; 2016 2017 /* group->container cannot change while a vfio device is open */ 2018 container = device->group->container; 2019 driver = container->iommu_driver; 2020 2021 if (likely(driver && driver->ops->dma_rw)) 2022 ret = driver->ops->dma_rw(container->iommu_data, 2023 user_iova, data, len, write); 2024 else 2025 ret = -ENOTTY; 2026 return ret; 2027} 2028EXPORT_SYMBOL(vfio_dma_rw); 2029 2030static int vfio_register_iommu_notifier(struct vfio_group *group, 2031 unsigned long *events, 2032 struct notifier_block *nb) 2033{ 2034 struct vfio_container *container; 2035 struct vfio_iommu_driver *driver; 2036 int ret; 2037 2038 lockdep_assert_held_read(&group->group_rwsem); 2039 2040 container = group->container; 2041 driver = container->iommu_driver; 2042 if (likely(driver && driver->ops->register_notifier)) 2043 ret = driver->ops->register_notifier(container->iommu_data, 2044 events, nb); 2045 else 2046 ret = -ENOTTY; 2047 2048 return ret; 2049} 2050 2051static int vfio_unregister_iommu_notifier(struct vfio_group *group, 2052 struct notifier_block *nb) 2053{ 2054 struct vfio_container *container; 2055 struct vfio_iommu_driver *driver; 2056 int ret; 2057 2058 lockdep_assert_held_read(&group->group_rwsem); 2059 2060 container = group->container; 2061 driver = container->iommu_driver; 2062 if (likely(driver && driver->ops->unregister_notifier)) 2063 ret = driver->ops->unregister_notifier(container->iommu_data, 2064 nb); 2065 else 2066 ret = -ENOTTY; 2067 2068 return ret; 2069} 2070 2071int vfio_register_notifier(struct vfio_device *device, 2072 enum vfio_notify_type type, unsigned long *events, 2073 struct notifier_block *nb) 2074{ 2075 struct vfio_group *group = device->group; 2076 int ret; 2077 2078 if (!nb || !events || (*events == 0) || 2079 !vfio_assert_device_open(device)) 2080 return -EINVAL; 2081 2082 switch (type) { 2083 case VFIO_IOMMU_NOTIFY: 2084 ret = vfio_register_iommu_notifier(group, events, nb); 2085 break; 2086 default: 2087 ret = -EINVAL; 2088 } 2089 return ret; 2090} 2091EXPORT_SYMBOL(vfio_register_notifier); 2092 2093int vfio_unregister_notifier(struct vfio_device *device, 2094 enum vfio_notify_type type, 2095 struct notifier_block *nb) 2096{ 2097 struct vfio_group *group = device->group; 2098 int ret; 2099 2100 if (!nb || !vfio_assert_device_open(device)) 2101 return -EINVAL; 2102 2103 switch (type) { 2104 case VFIO_IOMMU_NOTIFY: 2105 ret = vfio_unregister_iommu_notifier(group, nb); 2106 break; 2107 default: 2108 ret = -EINVAL; 2109 } 2110 return ret; 2111} 2112EXPORT_SYMBOL(vfio_unregister_notifier); 2113 2114/* 2115 * Module/class support 2116 */ 2117static char *vfio_devnode(struct device *dev, umode_t *mode) 2118{ 2119 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); 2120} 2121 2122static struct miscdevice vfio_dev = { 2123 .minor = VFIO_MINOR, 2124 .name = "vfio", 2125 .fops = &vfio_fops, 2126 .nodename = "vfio/vfio", 2127 .mode = S_IRUGO | S_IWUGO, 2128}; 2129 2130static int __init vfio_init(void) 2131{ 2132 int ret; 2133 2134 ida_init(&vfio.group_ida); 2135 mutex_init(&vfio.group_lock); 2136 mutex_init(&vfio.iommu_drivers_lock); 2137 INIT_LIST_HEAD(&vfio.group_list); 2138 INIT_LIST_HEAD(&vfio.iommu_drivers_list); 2139 2140 ret = misc_register(&vfio_dev); 2141 if (ret) { 2142 pr_err("vfio: misc device register failed\n"); 2143 return ret; 2144 } 2145 2146 /* /dev/vfio/$GROUP */ 2147 vfio.class = class_create(THIS_MODULE, "vfio"); 2148 if (IS_ERR(vfio.class)) { 2149 ret = PTR_ERR(vfio.class); 2150 goto err_class; 2151 } 2152 2153 vfio.class->devnode = vfio_devnode; 2154 2155 ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); 2156 if (ret) 2157 goto err_alloc_chrdev; 2158 2159 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 2160 2161#ifdef CONFIG_VFIO_NOIOMMU 2162 vfio_register_iommu_driver(&vfio_noiommu_ops); 2163#endif 2164 return 0; 2165 2166err_alloc_chrdev: 2167 class_destroy(vfio.class); 2168 vfio.class = NULL; 2169err_class: 2170 misc_deregister(&vfio_dev); 2171 return ret; 2172} 2173 2174static void __exit vfio_cleanup(void) 2175{ 2176 WARN_ON(!list_empty(&vfio.group_list)); 2177 2178#ifdef CONFIG_VFIO_NOIOMMU 2179 vfio_unregister_iommu_driver(&vfio_noiommu_ops); 2180#endif 2181 ida_destroy(&vfio.group_ida); 2182 unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); 2183 class_destroy(vfio.class); 2184 vfio.class = NULL; 2185 misc_deregister(&vfio_dev); 2186 xa_destroy(&vfio_device_set_xa); 2187} 2188 2189module_init(vfio_init); 2190module_exit(vfio_cleanup); 2191 2192MODULE_VERSION(DRIVER_VERSION); 2193MODULE_LICENSE("GPL v2"); 2194MODULE_AUTHOR(DRIVER_AUTHOR); 2195MODULE_DESCRIPTION(DRIVER_DESC); 2196MODULE_ALIAS_MISCDEV(VFIO_MINOR); 2197MODULE_ALIAS("devname:vfio/vfio"); 2198MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");