cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vduse_dev.c (37635B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * VDUSE: vDPA Device in Userspace
      4 *
      5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
      6 *
      7 * Author: Xie Yongji <xieyongji@bytedance.com>
      8 *
      9 */
     10
     11#include <linux/init.h>
     12#include <linux/module.h>
     13#include <linux/cdev.h>
     14#include <linux/device.h>
     15#include <linux/eventfd.h>
     16#include <linux/slab.h>
     17#include <linux/wait.h>
     18#include <linux/dma-map-ops.h>
     19#include <linux/poll.h>
     20#include <linux/file.h>
     21#include <linux/uio.h>
     22#include <linux/vdpa.h>
     23#include <linux/nospec.h>
     24#include <uapi/linux/vduse.h>
     25#include <uapi/linux/vdpa.h>
     26#include <uapi/linux/virtio_config.h>
     27#include <uapi/linux/virtio_ids.h>
     28#include <uapi/linux/virtio_blk.h>
     29#include <linux/mod_devicetable.h>
     30
     31#include "iova_domain.h"
     32
     33#define DRV_AUTHOR   "Yongji Xie <xieyongji@bytedance.com>"
     34#define DRV_DESC     "vDPA Device in Userspace"
     35#define DRV_LICENSE  "GPL v2"
     36
     37#define VDUSE_DEV_MAX (1U << MINORBITS)
     38#define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
     39#define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
     40#define VDUSE_MSG_DEFAULT_TIMEOUT 30
     41
     42struct vduse_virtqueue {
     43	u16 index;
     44	u16 num_max;
     45	u32 num;
     46	u64 desc_addr;
     47	u64 driver_addr;
     48	u64 device_addr;
     49	struct vdpa_vq_state state;
     50	bool ready;
     51	bool kicked;
     52	spinlock_t kick_lock;
     53	spinlock_t irq_lock;
     54	struct eventfd_ctx *kickfd;
     55	struct vdpa_callback cb;
     56	struct work_struct inject;
     57	struct work_struct kick;
     58};
     59
     60struct vduse_dev;
     61
     62struct vduse_vdpa {
     63	struct vdpa_device vdpa;
     64	struct vduse_dev *dev;
     65};
     66
     67struct vduse_dev {
     68	struct vduse_vdpa *vdev;
     69	struct device *dev;
     70	struct vduse_virtqueue *vqs;
     71	struct vduse_iova_domain *domain;
     72	char *name;
     73	struct mutex lock;
     74	spinlock_t msg_lock;
     75	u64 msg_unique;
     76	u32 msg_timeout;
     77	wait_queue_head_t waitq;
     78	struct list_head send_list;
     79	struct list_head recv_list;
     80	struct vdpa_callback config_cb;
     81	struct work_struct inject;
     82	spinlock_t irq_lock;
     83	struct rw_semaphore rwsem;
     84	int minor;
     85	bool broken;
     86	bool connected;
     87	u64 api_version;
     88	u64 device_features;
     89	u64 driver_features;
     90	u32 device_id;
     91	u32 vendor_id;
     92	u32 generation;
     93	u32 config_size;
     94	void *config;
     95	u8 status;
     96	u32 vq_num;
     97	u32 vq_align;
     98};
     99
    100struct vduse_dev_msg {
    101	struct vduse_dev_request req;
    102	struct vduse_dev_response resp;
    103	struct list_head list;
    104	wait_queue_head_t waitq;
    105	bool completed;
    106};
    107
    108struct vduse_control {
    109	u64 api_version;
    110};
    111
    112static DEFINE_MUTEX(vduse_lock);
    113static DEFINE_IDR(vduse_idr);
    114
    115static dev_t vduse_major;
    116static struct class *vduse_class;
    117static struct cdev vduse_ctrl_cdev;
    118static struct cdev vduse_cdev;
    119static struct workqueue_struct *vduse_irq_wq;
    120
    121static u32 allowed_device_id[] = {
    122	VIRTIO_ID_BLOCK,
    123};
    124
    125static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
    126{
    127	struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
    128
    129	return vdev->dev;
    130}
    131
    132static inline struct vduse_dev *dev_to_vduse(struct device *dev)
    133{
    134	struct vdpa_device *vdpa = dev_to_vdpa(dev);
    135
    136	return vdpa_to_vduse(vdpa);
    137}
    138
    139static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
    140					    uint32_t request_id)
    141{
    142	struct vduse_dev_msg *msg;
    143
    144	list_for_each_entry(msg, head, list) {
    145		if (msg->req.request_id == request_id) {
    146			list_del(&msg->list);
    147			return msg;
    148		}
    149	}
    150
    151	return NULL;
    152}
    153
    154static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
    155{
    156	struct vduse_dev_msg *msg = NULL;
    157
    158	if (!list_empty(head)) {
    159		msg = list_first_entry(head, struct vduse_dev_msg, list);
    160		list_del(&msg->list);
    161	}
    162
    163	return msg;
    164}
    165
    166static void vduse_enqueue_msg(struct list_head *head,
    167			      struct vduse_dev_msg *msg)
    168{
    169	list_add_tail(&msg->list, head);
    170}
    171
    172static void vduse_dev_broken(struct vduse_dev *dev)
    173{
    174	struct vduse_dev_msg *msg, *tmp;
    175
    176	if (unlikely(dev->broken))
    177		return;
    178
    179	list_splice_init(&dev->recv_list, &dev->send_list);
    180	list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
    181		list_del(&msg->list);
    182		msg->completed = 1;
    183		msg->resp.result = VDUSE_REQ_RESULT_FAILED;
    184		wake_up(&msg->waitq);
    185	}
    186	dev->broken = true;
    187	wake_up(&dev->waitq);
    188}
    189
    190static int vduse_dev_msg_sync(struct vduse_dev *dev,
    191			      struct vduse_dev_msg *msg)
    192{
    193	int ret;
    194
    195	if (unlikely(dev->broken))
    196		return -EIO;
    197
    198	init_waitqueue_head(&msg->waitq);
    199	spin_lock(&dev->msg_lock);
    200	if (unlikely(dev->broken)) {
    201		spin_unlock(&dev->msg_lock);
    202		return -EIO;
    203	}
    204	msg->req.request_id = dev->msg_unique++;
    205	vduse_enqueue_msg(&dev->send_list, msg);
    206	wake_up(&dev->waitq);
    207	spin_unlock(&dev->msg_lock);
    208	if (dev->msg_timeout)
    209		ret = wait_event_killable_timeout(msg->waitq, msg->completed,
    210						  (long)dev->msg_timeout * HZ);
    211	else
    212		ret = wait_event_killable(msg->waitq, msg->completed);
    213
    214	spin_lock(&dev->msg_lock);
    215	if (!msg->completed) {
    216		list_del(&msg->list);
    217		msg->resp.result = VDUSE_REQ_RESULT_FAILED;
    218		/* Mark the device as malfunction when there is a timeout */
    219		if (!ret)
    220			vduse_dev_broken(dev);
    221	}
    222	ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
    223	spin_unlock(&dev->msg_lock);
    224
    225	return ret;
    226}
    227
    228static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
    229					 struct vduse_virtqueue *vq,
    230					 struct vdpa_vq_state_packed *packed)
    231{
    232	struct vduse_dev_msg msg = { 0 };
    233	int ret;
    234
    235	msg.req.type = VDUSE_GET_VQ_STATE;
    236	msg.req.vq_state.index = vq->index;
    237
    238	ret = vduse_dev_msg_sync(dev, &msg);
    239	if (ret)
    240		return ret;
    241
    242	packed->last_avail_counter =
    243			msg.resp.vq_state.packed.last_avail_counter & 0x0001;
    244	packed->last_avail_idx =
    245			msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
    246	packed->last_used_counter =
    247			msg.resp.vq_state.packed.last_used_counter & 0x0001;
    248	packed->last_used_idx =
    249			msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
    250
    251	return 0;
    252}
    253
    254static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
    255					struct vduse_virtqueue *vq,
    256					struct vdpa_vq_state_split *split)
    257{
    258	struct vduse_dev_msg msg = { 0 };
    259	int ret;
    260
    261	msg.req.type = VDUSE_GET_VQ_STATE;
    262	msg.req.vq_state.index = vq->index;
    263
    264	ret = vduse_dev_msg_sync(dev, &msg);
    265	if (ret)
    266		return ret;
    267
    268	split->avail_index = msg.resp.vq_state.split.avail_index;
    269
    270	return 0;
    271}
    272
    273static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
    274{
    275	struct vduse_dev_msg msg = { 0 };
    276
    277	msg.req.type = VDUSE_SET_STATUS;
    278	msg.req.s.status = status;
    279
    280	return vduse_dev_msg_sync(dev, &msg);
    281}
    282
    283static int vduse_dev_update_iotlb(struct vduse_dev *dev,
    284				  u64 start, u64 last)
    285{
    286	struct vduse_dev_msg msg = { 0 };
    287
    288	if (last < start)
    289		return -EINVAL;
    290
    291	msg.req.type = VDUSE_UPDATE_IOTLB;
    292	msg.req.iova.start = start;
    293	msg.req.iova.last = last;
    294
    295	return vduse_dev_msg_sync(dev, &msg);
    296}
    297
    298static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
    299{
    300	struct file *file = iocb->ki_filp;
    301	struct vduse_dev *dev = file->private_data;
    302	struct vduse_dev_msg *msg;
    303	int size = sizeof(struct vduse_dev_request);
    304	ssize_t ret;
    305
    306	if (iov_iter_count(to) < size)
    307		return -EINVAL;
    308
    309	spin_lock(&dev->msg_lock);
    310	while (1) {
    311		msg = vduse_dequeue_msg(&dev->send_list);
    312		if (msg)
    313			break;
    314
    315		ret = -EAGAIN;
    316		if (file->f_flags & O_NONBLOCK)
    317			goto unlock;
    318
    319		spin_unlock(&dev->msg_lock);
    320		ret = wait_event_interruptible_exclusive(dev->waitq,
    321					!list_empty(&dev->send_list));
    322		if (ret)
    323			return ret;
    324
    325		spin_lock(&dev->msg_lock);
    326	}
    327	spin_unlock(&dev->msg_lock);
    328	ret = copy_to_iter(&msg->req, size, to);
    329	spin_lock(&dev->msg_lock);
    330	if (ret != size) {
    331		ret = -EFAULT;
    332		vduse_enqueue_msg(&dev->send_list, msg);
    333		goto unlock;
    334	}
    335	vduse_enqueue_msg(&dev->recv_list, msg);
    336unlock:
    337	spin_unlock(&dev->msg_lock);
    338
    339	return ret;
    340}
    341
    342static bool is_mem_zero(const char *ptr, int size)
    343{
    344	int i;
    345
    346	for (i = 0; i < size; i++) {
    347		if (ptr[i])
    348			return false;
    349	}
    350	return true;
    351}
    352
    353static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
    354{
    355	struct file *file = iocb->ki_filp;
    356	struct vduse_dev *dev = file->private_data;
    357	struct vduse_dev_response resp;
    358	struct vduse_dev_msg *msg;
    359	size_t ret;
    360
    361	ret = copy_from_iter(&resp, sizeof(resp), from);
    362	if (ret != sizeof(resp))
    363		return -EINVAL;
    364
    365	if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
    366		return -EINVAL;
    367
    368	spin_lock(&dev->msg_lock);
    369	msg = vduse_find_msg(&dev->recv_list, resp.request_id);
    370	if (!msg) {
    371		ret = -ENOENT;
    372		goto unlock;
    373	}
    374
    375	memcpy(&msg->resp, &resp, sizeof(resp));
    376	msg->completed = 1;
    377	wake_up(&msg->waitq);
    378unlock:
    379	spin_unlock(&dev->msg_lock);
    380
    381	return ret;
    382}
    383
    384static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
    385{
    386	struct vduse_dev *dev = file->private_data;
    387	__poll_t mask = 0;
    388
    389	poll_wait(file, &dev->waitq, wait);
    390
    391	spin_lock(&dev->msg_lock);
    392
    393	if (unlikely(dev->broken))
    394		mask |= EPOLLERR;
    395	if (!list_empty(&dev->send_list))
    396		mask |= EPOLLIN | EPOLLRDNORM;
    397	if (!list_empty(&dev->recv_list))
    398		mask |= EPOLLOUT | EPOLLWRNORM;
    399
    400	spin_unlock(&dev->msg_lock);
    401
    402	return mask;
    403}
    404
    405static void vduse_dev_reset(struct vduse_dev *dev)
    406{
    407	int i;
    408	struct vduse_iova_domain *domain = dev->domain;
    409
    410	/* The coherent mappings are handled in vduse_dev_free_coherent() */
    411	if (domain->bounce_map)
    412		vduse_domain_reset_bounce_map(domain);
    413
    414	down_write(&dev->rwsem);
    415
    416	dev->status = 0;
    417	dev->driver_features = 0;
    418	dev->generation++;
    419	spin_lock(&dev->irq_lock);
    420	dev->config_cb.callback = NULL;
    421	dev->config_cb.private = NULL;
    422	spin_unlock(&dev->irq_lock);
    423	flush_work(&dev->inject);
    424
    425	for (i = 0; i < dev->vq_num; i++) {
    426		struct vduse_virtqueue *vq = &dev->vqs[i];
    427
    428		vq->ready = false;
    429		vq->desc_addr = 0;
    430		vq->driver_addr = 0;
    431		vq->device_addr = 0;
    432		vq->num = 0;
    433		memset(&vq->state, 0, sizeof(vq->state));
    434
    435		spin_lock(&vq->kick_lock);
    436		vq->kicked = false;
    437		if (vq->kickfd)
    438			eventfd_ctx_put(vq->kickfd);
    439		vq->kickfd = NULL;
    440		spin_unlock(&vq->kick_lock);
    441
    442		spin_lock(&vq->irq_lock);
    443		vq->cb.callback = NULL;
    444		vq->cb.private = NULL;
    445		spin_unlock(&vq->irq_lock);
    446		flush_work(&vq->inject);
    447		flush_work(&vq->kick);
    448	}
    449
    450	up_write(&dev->rwsem);
    451}
    452
    453static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
    454				u64 desc_area, u64 driver_area,
    455				u64 device_area)
    456{
    457	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    458	struct vduse_virtqueue *vq = &dev->vqs[idx];
    459
    460	vq->desc_addr = desc_area;
    461	vq->driver_addr = driver_area;
    462	vq->device_addr = device_area;
    463
    464	return 0;
    465}
    466
    467static void vduse_vq_kick(struct vduse_virtqueue *vq)
    468{
    469	spin_lock(&vq->kick_lock);
    470	if (!vq->ready)
    471		goto unlock;
    472
    473	if (vq->kickfd)
    474		eventfd_signal(vq->kickfd, 1);
    475	else
    476		vq->kicked = true;
    477unlock:
    478	spin_unlock(&vq->kick_lock);
    479}
    480
    481static void vduse_vq_kick_work(struct work_struct *work)
    482{
    483	struct vduse_virtqueue *vq = container_of(work,
    484					struct vduse_virtqueue, kick);
    485
    486	vduse_vq_kick(vq);
    487}
    488
    489static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
    490{
    491	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    492	struct vduse_virtqueue *vq = &dev->vqs[idx];
    493
    494	if (!eventfd_signal_allowed()) {
    495		schedule_work(&vq->kick);
    496		return;
    497	}
    498	vduse_vq_kick(vq);
    499}
    500
    501static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
    502			      struct vdpa_callback *cb)
    503{
    504	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    505	struct vduse_virtqueue *vq = &dev->vqs[idx];
    506
    507	spin_lock(&vq->irq_lock);
    508	vq->cb.callback = cb->callback;
    509	vq->cb.private = cb->private;
    510	spin_unlock(&vq->irq_lock);
    511}
    512
    513static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
    514{
    515	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    516	struct vduse_virtqueue *vq = &dev->vqs[idx];
    517
    518	vq->num = num;
    519}
    520
    521static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
    522					u16 idx, bool ready)
    523{
    524	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    525	struct vduse_virtqueue *vq = &dev->vqs[idx];
    526
    527	vq->ready = ready;
    528}
    529
    530static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
    531{
    532	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    533	struct vduse_virtqueue *vq = &dev->vqs[idx];
    534
    535	return vq->ready;
    536}
    537
    538static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
    539				const struct vdpa_vq_state *state)
    540{
    541	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    542	struct vduse_virtqueue *vq = &dev->vqs[idx];
    543
    544	if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
    545		vq->state.packed.last_avail_counter =
    546				state->packed.last_avail_counter;
    547		vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
    548		vq->state.packed.last_used_counter =
    549				state->packed.last_used_counter;
    550		vq->state.packed.last_used_idx = state->packed.last_used_idx;
    551	} else
    552		vq->state.split.avail_index = state->split.avail_index;
    553
    554	return 0;
    555}
    556
    557static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
    558				struct vdpa_vq_state *state)
    559{
    560	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    561	struct vduse_virtqueue *vq = &dev->vqs[idx];
    562
    563	if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
    564		return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
    565
    566	return vduse_dev_get_vq_state_split(dev, vq, &state->split);
    567}
    568
    569static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
    570{
    571	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    572
    573	return dev->vq_align;
    574}
    575
    576static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
    577{
    578	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    579
    580	return dev->device_features;
    581}
    582
    583static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
    584{
    585	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    586
    587	dev->driver_features = features;
    588	return 0;
    589}
    590
    591static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
    592{
    593	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    594
    595	return dev->driver_features;
    596}
    597
    598static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
    599				  struct vdpa_callback *cb)
    600{
    601	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    602
    603	spin_lock(&dev->irq_lock);
    604	dev->config_cb.callback = cb->callback;
    605	dev->config_cb.private = cb->private;
    606	spin_unlock(&dev->irq_lock);
    607}
    608
    609static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
    610{
    611	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    612	u16 num_max = 0;
    613	int i;
    614
    615	for (i = 0; i < dev->vq_num; i++)
    616		if (num_max < dev->vqs[i].num_max)
    617			num_max = dev->vqs[i].num_max;
    618
    619	return num_max;
    620}
    621
    622static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
    623{
    624	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    625
    626	return dev->device_id;
    627}
    628
    629static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
    630{
    631	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    632
    633	return dev->vendor_id;
    634}
    635
    636static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
    637{
    638	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    639
    640	return dev->status;
    641}
    642
    643static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
    644{
    645	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    646
    647	if (vduse_dev_set_status(dev, status))
    648		return;
    649
    650	dev->status = status;
    651}
    652
    653static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
    654{
    655	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    656
    657	return dev->config_size;
    658}
    659
    660static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
    661				  void *buf, unsigned int len)
    662{
    663	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    664
    665	if (offset > dev->config_size ||
    666	    len > dev->config_size - offset)
    667		return;
    668
    669	memcpy(buf, dev->config + offset, len);
    670}
    671
    672static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
    673			const void *buf, unsigned int len)
    674{
    675	/* Now we only support read-only configuration space */
    676}
    677
    678static int vduse_vdpa_reset(struct vdpa_device *vdpa)
    679{
    680	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    681	int ret = vduse_dev_set_status(dev, 0);
    682
    683	vduse_dev_reset(dev);
    684
    685	return ret;
    686}
    687
    688static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
    689{
    690	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    691
    692	return dev->generation;
    693}
    694
    695static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
    696				unsigned int asid,
    697				struct vhost_iotlb *iotlb)
    698{
    699	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    700	int ret;
    701
    702	ret = vduse_domain_set_map(dev->domain, iotlb);
    703	if (ret)
    704		return ret;
    705
    706	ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
    707	if (ret) {
    708		vduse_domain_clear_map(dev->domain, iotlb);
    709		return ret;
    710	}
    711
    712	return 0;
    713}
    714
    715static void vduse_vdpa_free(struct vdpa_device *vdpa)
    716{
    717	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
    718
    719	dev->vdev = NULL;
    720}
    721
    722static const struct vdpa_config_ops vduse_vdpa_config_ops = {
    723	.set_vq_address		= vduse_vdpa_set_vq_address,
    724	.kick_vq		= vduse_vdpa_kick_vq,
    725	.set_vq_cb		= vduse_vdpa_set_vq_cb,
    726	.set_vq_num             = vduse_vdpa_set_vq_num,
    727	.set_vq_ready		= vduse_vdpa_set_vq_ready,
    728	.get_vq_ready		= vduse_vdpa_get_vq_ready,
    729	.set_vq_state		= vduse_vdpa_set_vq_state,
    730	.get_vq_state		= vduse_vdpa_get_vq_state,
    731	.get_vq_align		= vduse_vdpa_get_vq_align,
    732	.get_device_features	= vduse_vdpa_get_device_features,
    733	.set_driver_features	= vduse_vdpa_set_driver_features,
    734	.get_driver_features	= vduse_vdpa_get_driver_features,
    735	.set_config_cb		= vduse_vdpa_set_config_cb,
    736	.get_vq_num_max		= vduse_vdpa_get_vq_num_max,
    737	.get_device_id		= vduse_vdpa_get_device_id,
    738	.get_vendor_id		= vduse_vdpa_get_vendor_id,
    739	.get_status		= vduse_vdpa_get_status,
    740	.set_status		= vduse_vdpa_set_status,
    741	.get_config_size	= vduse_vdpa_get_config_size,
    742	.get_config		= vduse_vdpa_get_config,
    743	.set_config		= vduse_vdpa_set_config,
    744	.get_generation		= vduse_vdpa_get_generation,
    745	.reset			= vduse_vdpa_reset,
    746	.set_map		= vduse_vdpa_set_map,
    747	.free			= vduse_vdpa_free,
    748};
    749
    750static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
    751				     unsigned long offset, size_t size,
    752				     enum dma_data_direction dir,
    753				     unsigned long attrs)
    754{
    755	struct vduse_dev *vdev = dev_to_vduse(dev);
    756	struct vduse_iova_domain *domain = vdev->domain;
    757
    758	return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
    759}
    760
    761static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
    762				size_t size, enum dma_data_direction dir,
    763				unsigned long attrs)
    764{
    765	struct vduse_dev *vdev = dev_to_vduse(dev);
    766	struct vduse_iova_domain *domain = vdev->domain;
    767
    768	return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
    769}
    770
    771static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
    772					dma_addr_t *dma_addr, gfp_t flag,
    773					unsigned long attrs)
    774{
    775	struct vduse_dev *vdev = dev_to_vduse(dev);
    776	struct vduse_iova_domain *domain = vdev->domain;
    777	unsigned long iova;
    778	void *addr;
    779
    780	*dma_addr = DMA_MAPPING_ERROR;
    781	addr = vduse_domain_alloc_coherent(domain, size,
    782				(dma_addr_t *)&iova, flag, attrs);
    783	if (!addr)
    784		return NULL;
    785
    786	*dma_addr = (dma_addr_t)iova;
    787
    788	return addr;
    789}
    790
    791static void vduse_dev_free_coherent(struct device *dev, size_t size,
    792					void *vaddr, dma_addr_t dma_addr,
    793					unsigned long attrs)
    794{
    795	struct vduse_dev *vdev = dev_to_vduse(dev);
    796	struct vduse_iova_domain *domain = vdev->domain;
    797
    798	vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
    799}
    800
    801static size_t vduse_dev_max_mapping_size(struct device *dev)
    802{
    803	struct vduse_dev *vdev = dev_to_vduse(dev);
    804	struct vduse_iova_domain *domain = vdev->domain;
    805
    806	return domain->bounce_size;
    807}
    808
    809static const struct dma_map_ops vduse_dev_dma_ops = {
    810	.map_page = vduse_dev_map_page,
    811	.unmap_page = vduse_dev_unmap_page,
    812	.alloc = vduse_dev_alloc_coherent,
    813	.free = vduse_dev_free_coherent,
    814	.max_mapping_size = vduse_dev_max_mapping_size,
    815};
    816
    817static unsigned int perm_to_file_flags(u8 perm)
    818{
    819	unsigned int flags = 0;
    820
    821	switch (perm) {
    822	case VDUSE_ACCESS_WO:
    823		flags |= O_WRONLY;
    824		break;
    825	case VDUSE_ACCESS_RO:
    826		flags |= O_RDONLY;
    827		break;
    828	case VDUSE_ACCESS_RW:
    829		flags |= O_RDWR;
    830		break;
    831	default:
    832		WARN(1, "invalidate vhost IOTLB permission\n");
    833		break;
    834	}
    835
    836	return flags;
    837}
    838
    839static int vduse_kickfd_setup(struct vduse_dev *dev,
    840			struct vduse_vq_eventfd *eventfd)
    841{
    842	struct eventfd_ctx *ctx = NULL;
    843	struct vduse_virtqueue *vq;
    844	u32 index;
    845
    846	if (eventfd->index >= dev->vq_num)
    847		return -EINVAL;
    848
    849	index = array_index_nospec(eventfd->index, dev->vq_num);
    850	vq = &dev->vqs[index];
    851	if (eventfd->fd >= 0) {
    852		ctx = eventfd_ctx_fdget(eventfd->fd);
    853		if (IS_ERR(ctx))
    854			return PTR_ERR(ctx);
    855	} else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
    856		return 0;
    857
    858	spin_lock(&vq->kick_lock);
    859	if (vq->kickfd)
    860		eventfd_ctx_put(vq->kickfd);
    861	vq->kickfd = ctx;
    862	if (vq->ready && vq->kicked && vq->kickfd) {
    863		eventfd_signal(vq->kickfd, 1);
    864		vq->kicked = false;
    865	}
    866	spin_unlock(&vq->kick_lock);
    867
    868	return 0;
    869}
    870
    871static bool vduse_dev_is_ready(struct vduse_dev *dev)
    872{
    873	int i;
    874
    875	for (i = 0; i < dev->vq_num; i++)
    876		if (!dev->vqs[i].num_max)
    877			return false;
    878
    879	return true;
    880}
    881
    882static void vduse_dev_irq_inject(struct work_struct *work)
    883{
    884	struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
    885
    886	spin_lock_irq(&dev->irq_lock);
    887	if (dev->config_cb.callback)
    888		dev->config_cb.callback(dev->config_cb.private);
    889	spin_unlock_irq(&dev->irq_lock);
    890}
    891
    892static void vduse_vq_irq_inject(struct work_struct *work)
    893{
    894	struct vduse_virtqueue *vq = container_of(work,
    895					struct vduse_virtqueue, inject);
    896
    897	spin_lock_irq(&vq->irq_lock);
    898	if (vq->ready && vq->cb.callback)
    899		vq->cb.callback(vq->cb.private);
    900	spin_unlock_irq(&vq->irq_lock);
    901}
    902
    903static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
    904				    struct work_struct *irq_work)
    905{
    906	int ret = -EINVAL;
    907
    908	down_read(&dev->rwsem);
    909	if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
    910		goto unlock;
    911
    912	ret = 0;
    913	queue_work(vduse_irq_wq, irq_work);
    914unlock:
    915	up_read(&dev->rwsem);
    916
    917	return ret;
    918}
    919
    920static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
    921			    unsigned long arg)
    922{
    923	struct vduse_dev *dev = file->private_data;
    924	void __user *argp = (void __user *)arg;
    925	int ret;
    926
    927	if (unlikely(dev->broken))
    928		return -EPERM;
    929
    930	switch (cmd) {
    931	case VDUSE_IOTLB_GET_FD: {
    932		struct vduse_iotlb_entry entry;
    933		struct vhost_iotlb_map *map;
    934		struct vdpa_map_file *map_file;
    935		struct vduse_iova_domain *domain = dev->domain;
    936		struct file *f = NULL;
    937
    938		ret = -EFAULT;
    939		if (copy_from_user(&entry, argp, sizeof(entry)))
    940			break;
    941
    942		ret = -EINVAL;
    943		if (entry.start > entry.last)
    944			break;
    945
    946		spin_lock(&domain->iotlb_lock);
    947		map = vhost_iotlb_itree_first(domain->iotlb,
    948					      entry.start, entry.last);
    949		if (map) {
    950			map_file = (struct vdpa_map_file *)map->opaque;
    951			f = get_file(map_file->file);
    952			entry.offset = map_file->offset;
    953			entry.start = map->start;
    954			entry.last = map->last;
    955			entry.perm = map->perm;
    956		}
    957		spin_unlock(&domain->iotlb_lock);
    958		ret = -EINVAL;
    959		if (!f)
    960			break;
    961
    962		ret = -EFAULT;
    963		if (copy_to_user(argp, &entry, sizeof(entry))) {
    964			fput(f);
    965			break;
    966		}
    967		ret = receive_fd(f, perm_to_file_flags(entry.perm));
    968		fput(f);
    969		break;
    970	}
    971	case VDUSE_DEV_GET_FEATURES:
    972		/*
    973		 * Just mirror what driver wrote here.
    974		 * The driver is expected to check FEATURE_OK later.
    975		 */
    976		ret = put_user(dev->driver_features, (u64 __user *)argp);
    977		break;
    978	case VDUSE_DEV_SET_CONFIG: {
    979		struct vduse_config_data config;
    980		unsigned long size = offsetof(struct vduse_config_data,
    981					      buffer);
    982
    983		ret = -EFAULT;
    984		if (copy_from_user(&config, argp, size))
    985			break;
    986
    987		ret = -EINVAL;
    988		if (config.offset > dev->config_size ||
    989		    config.length == 0 ||
    990		    config.length > dev->config_size - config.offset)
    991			break;
    992
    993		ret = -EFAULT;
    994		if (copy_from_user(dev->config + config.offset, argp + size,
    995				   config.length))
    996			break;
    997
    998		ret = 0;
    999		break;
   1000	}
   1001	case VDUSE_DEV_INJECT_CONFIG_IRQ:
   1002		ret = vduse_dev_queue_irq_work(dev, &dev->inject);
   1003		break;
   1004	case VDUSE_VQ_SETUP: {
   1005		struct vduse_vq_config config;
   1006		u32 index;
   1007
   1008		ret = -EFAULT;
   1009		if (copy_from_user(&config, argp, sizeof(config)))
   1010			break;
   1011
   1012		ret = -EINVAL;
   1013		if (config.index >= dev->vq_num)
   1014			break;
   1015
   1016		if (!is_mem_zero((const char *)config.reserved,
   1017				 sizeof(config.reserved)))
   1018			break;
   1019
   1020		index = array_index_nospec(config.index, dev->vq_num);
   1021		dev->vqs[index].num_max = config.max_size;
   1022		ret = 0;
   1023		break;
   1024	}
   1025	case VDUSE_VQ_GET_INFO: {
   1026		struct vduse_vq_info vq_info;
   1027		struct vduse_virtqueue *vq;
   1028		u32 index;
   1029
   1030		ret = -EFAULT;
   1031		if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
   1032			break;
   1033
   1034		ret = -EINVAL;
   1035		if (vq_info.index >= dev->vq_num)
   1036			break;
   1037
   1038		index = array_index_nospec(vq_info.index, dev->vq_num);
   1039		vq = &dev->vqs[index];
   1040		vq_info.desc_addr = vq->desc_addr;
   1041		vq_info.driver_addr = vq->driver_addr;
   1042		vq_info.device_addr = vq->device_addr;
   1043		vq_info.num = vq->num;
   1044
   1045		if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
   1046			vq_info.packed.last_avail_counter =
   1047				vq->state.packed.last_avail_counter;
   1048			vq_info.packed.last_avail_idx =
   1049				vq->state.packed.last_avail_idx;
   1050			vq_info.packed.last_used_counter =
   1051				vq->state.packed.last_used_counter;
   1052			vq_info.packed.last_used_idx =
   1053				vq->state.packed.last_used_idx;
   1054		} else
   1055			vq_info.split.avail_index =
   1056				vq->state.split.avail_index;
   1057
   1058		vq_info.ready = vq->ready;
   1059
   1060		ret = -EFAULT;
   1061		if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
   1062			break;
   1063
   1064		ret = 0;
   1065		break;
   1066	}
   1067	case VDUSE_VQ_SETUP_KICKFD: {
   1068		struct vduse_vq_eventfd eventfd;
   1069
   1070		ret = -EFAULT;
   1071		if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
   1072			break;
   1073
   1074		ret = vduse_kickfd_setup(dev, &eventfd);
   1075		break;
   1076	}
   1077	case VDUSE_VQ_INJECT_IRQ: {
   1078		u32 index;
   1079
   1080		ret = -EFAULT;
   1081		if (get_user(index, (u32 __user *)argp))
   1082			break;
   1083
   1084		ret = -EINVAL;
   1085		if (index >= dev->vq_num)
   1086			break;
   1087
   1088		index = array_index_nospec(index, dev->vq_num);
   1089		ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
   1090		break;
   1091	}
   1092	default:
   1093		ret = -ENOIOCTLCMD;
   1094		break;
   1095	}
   1096
   1097	return ret;
   1098}
   1099
   1100static int vduse_dev_release(struct inode *inode, struct file *file)
   1101{
   1102	struct vduse_dev *dev = file->private_data;
   1103
   1104	spin_lock(&dev->msg_lock);
   1105	/* Make sure the inflight messages can processed after reconncection */
   1106	list_splice_init(&dev->recv_list, &dev->send_list);
   1107	spin_unlock(&dev->msg_lock);
   1108	dev->connected = false;
   1109
   1110	return 0;
   1111}
   1112
   1113static struct vduse_dev *vduse_dev_get_from_minor(int minor)
   1114{
   1115	struct vduse_dev *dev;
   1116
   1117	mutex_lock(&vduse_lock);
   1118	dev = idr_find(&vduse_idr, minor);
   1119	mutex_unlock(&vduse_lock);
   1120
   1121	return dev;
   1122}
   1123
   1124static int vduse_dev_open(struct inode *inode, struct file *file)
   1125{
   1126	int ret;
   1127	struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
   1128
   1129	if (!dev)
   1130		return -ENODEV;
   1131
   1132	ret = -EBUSY;
   1133	mutex_lock(&dev->lock);
   1134	if (dev->connected)
   1135		goto unlock;
   1136
   1137	ret = 0;
   1138	dev->connected = true;
   1139	file->private_data = dev;
   1140unlock:
   1141	mutex_unlock(&dev->lock);
   1142
   1143	return ret;
   1144}
   1145
   1146static const struct file_operations vduse_dev_fops = {
   1147	.owner		= THIS_MODULE,
   1148	.open		= vduse_dev_open,
   1149	.release	= vduse_dev_release,
   1150	.read_iter	= vduse_dev_read_iter,
   1151	.write_iter	= vduse_dev_write_iter,
   1152	.poll		= vduse_dev_poll,
   1153	.unlocked_ioctl	= vduse_dev_ioctl,
   1154	.compat_ioctl	= compat_ptr_ioctl,
   1155	.llseek		= noop_llseek,
   1156};
   1157
   1158static struct vduse_dev *vduse_dev_create(void)
   1159{
   1160	struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
   1161
   1162	if (!dev)
   1163		return NULL;
   1164
   1165	mutex_init(&dev->lock);
   1166	spin_lock_init(&dev->msg_lock);
   1167	INIT_LIST_HEAD(&dev->send_list);
   1168	INIT_LIST_HEAD(&dev->recv_list);
   1169	spin_lock_init(&dev->irq_lock);
   1170	init_rwsem(&dev->rwsem);
   1171
   1172	INIT_WORK(&dev->inject, vduse_dev_irq_inject);
   1173	init_waitqueue_head(&dev->waitq);
   1174
   1175	return dev;
   1176}
   1177
   1178static void vduse_dev_destroy(struct vduse_dev *dev)
   1179{
   1180	kfree(dev);
   1181}
   1182
   1183static struct vduse_dev *vduse_find_dev(const char *name)
   1184{
   1185	struct vduse_dev *dev;
   1186	int id;
   1187
   1188	idr_for_each_entry(&vduse_idr, dev, id)
   1189		if (!strcmp(dev->name, name))
   1190			return dev;
   1191
   1192	return NULL;
   1193}
   1194
   1195static int vduse_destroy_dev(char *name)
   1196{
   1197	struct vduse_dev *dev = vduse_find_dev(name);
   1198
   1199	if (!dev)
   1200		return -EINVAL;
   1201
   1202	mutex_lock(&dev->lock);
   1203	if (dev->vdev || dev->connected) {
   1204		mutex_unlock(&dev->lock);
   1205		return -EBUSY;
   1206	}
   1207	dev->connected = true;
   1208	mutex_unlock(&dev->lock);
   1209
   1210	vduse_dev_reset(dev);
   1211	device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
   1212	idr_remove(&vduse_idr, dev->minor);
   1213	kvfree(dev->config);
   1214	kfree(dev->vqs);
   1215	vduse_domain_destroy(dev->domain);
   1216	kfree(dev->name);
   1217	vduse_dev_destroy(dev);
   1218	module_put(THIS_MODULE);
   1219
   1220	return 0;
   1221}
   1222
   1223static bool device_is_allowed(u32 device_id)
   1224{
   1225	int i;
   1226
   1227	for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
   1228		if (allowed_device_id[i] == device_id)
   1229			return true;
   1230
   1231	return false;
   1232}
   1233
   1234static bool features_is_valid(u64 features)
   1235{
   1236	if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
   1237		return false;
   1238
   1239	/* Now we only support read-only configuration space */
   1240	if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
   1241		return false;
   1242
   1243	return true;
   1244}
   1245
   1246static bool vduse_validate_config(struct vduse_dev_config *config)
   1247{
   1248	if (!is_mem_zero((const char *)config->reserved,
   1249			 sizeof(config->reserved)))
   1250		return false;
   1251
   1252	if (config->vq_align > PAGE_SIZE)
   1253		return false;
   1254
   1255	if (config->config_size > PAGE_SIZE)
   1256		return false;
   1257
   1258	if (!device_is_allowed(config->device_id))
   1259		return false;
   1260
   1261	if (!features_is_valid(config->features))
   1262		return false;
   1263
   1264	return true;
   1265}
   1266
   1267static ssize_t msg_timeout_show(struct device *device,
   1268				struct device_attribute *attr, char *buf)
   1269{
   1270	struct vduse_dev *dev = dev_get_drvdata(device);
   1271
   1272	return sysfs_emit(buf, "%u\n", dev->msg_timeout);
   1273}
   1274
   1275static ssize_t msg_timeout_store(struct device *device,
   1276				 struct device_attribute *attr,
   1277				 const char *buf, size_t count)
   1278{
   1279	struct vduse_dev *dev = dev_get_drvdata(device);
   1280	int ret;
   1281
   1282	ret = kstrtouint(buf, 10, &dev->msg_timeout);
   1283	if (ret < 0)
   1284		return ret;
   1285
   1286	return count;
   1287}
   1288
   1289static DEVICE_ATTR_RW(msg_timeout);
   1290
   1291static struct attribute *vduse_dev_attrs[] = {
   1292	&dev_attr_msg_timeout.attr,
   1293	NULL
   1294};
   1295
   1296ATTRIBUTE_GROUPS(vduse_dev);
   1297
   1298static int vduse_create_dev(struct vduse_dev_config *config,
   1299			    void *config_buf, u64 api_version)
   1300{
   1301	int i, ret;
   1302	struct vduse_dev *dev;
   1303
   1304	ret = -EEXIST;
   1305	if (vduse_find_dev(config->name))
   1306		goto err;
   1307
   1308	ret = -ENOMEM;
   1309	dev = vduse_dev_create();
   1310	if (!dev)
   1311		goto err;
   1312
   1313	dev->api_version = api_version;
   1314	dev->device_features = config->features;
   1315	dev->device_id = config->device_id;
   1316	dev->vendor_id = config->vendor_id;
   1317	dev->name = kstrdup(config->name, GFP_KERNEL);
   1318	if (!dev->name)
   1319		goto err_str;
   1320
   1321	dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
   1322					  VDUSE_BOUNCE_SIZE);
   1323	if (!dev->domain)
   1324		goto err_domain;
   1325
   1326	dev->config = config_buf;
   1327	dev->config_size = config->config_size;
   1328	dev->vq_align = config->vq_align;
   1329	dev->vq_num = config->vq_num;
   1330	dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
   1331	if (!dev->vqs)
   1332		goto err_vqs;
   1333
   1334	for (i = 0; i < dev->vq_num; i++) {
   1335		dev->vqs[i].index = i;
   1336		INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject);
   1337		INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work);
   1338		spin_lock_init(&dev->vqs[i].kick_lock);
   1339		spin_lock_init(&dev->vqs[i].irq_lock);
   1340	}
   1341
   1342	ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
   1343	if (ret < 0)
   1344		goto err_idr;
   1345
   1346	dev->minor = ret;
   1347	dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
   1348	dev->dev = device_create_with_groups(vduse_class, NULL,
   1349				MKDEV(MAJOR(vduse_major), dev->minor),
   1350				dev, vduse_dev_groups, "%s", config->name);
   1351	if (IS_ERR(dev->dev)) {
   1352		ret = PTR_ERR(dev->dev);
   1353		goto err_dev;
   1354	}
   1355	__module_get(THIS_MODULE);
   1356
   1357	return 0;
   1358err_dev:
   1359	idr_remove(&vduse_idr, dev->minor);
   1360err_idr:
   1361	kfree(dev->vqs);
   1362err_vqs:
   1363	vduse_domain_destroy(dev->domain);
   1364err_domain:
   1365	kfree(dev->name);
   1366err_str:
   1367	vduse_dev_destroy(dev);
   1368err:
   1369	return ret;
   1370}
   1371
   1372static long vduse_ioctl(struct file *file, unsigned int cmd,
   1373			unsigned long arg)
   1374{
   1375	int ret;
   1376	void __user *argp = (void __user *)arg;
   1377	struct vduse_control *control = file->private_data;
   1378
   1379	mutex_lock(&vduse_lock);
   1380	switch (cmd) {
   1381	case VDUSE_GET_API_VERSION:
   1382		ret = put_user(control->api_version, (u64 __user *)argp);
   1383		break;
   1384	case VDUSE_SET_API_VERSION: {
   1385		u64 api_version;
   1386
   1387		ret = -EFAULT;
   1388		if (get_user(api_version, (u64 __user *)argp))
   1389			break;
   1390
   1391		ret = -EINVAL;
   1392		if (api_version > VDUSE_API_VERSION)
   1393			break;
   1394
   1395		ret = 0;
   1396		control->api_version = api_version;
   1397		break;
   1398	}
   1399	case VDUSE_CREATE_DEV: {
   1400		struct vduse_dev_config config;
   1401		unsigned long size = offsetof(struct vduse_dev_config, config);
   1402		void *buf;
   1403
   1404		ret = -EFAULT;
   1405		if (copy_from_user(&config, argp, size))
   1406			break;
   1407
   1408		ret = -EINVAL;
   1409		if (vduse_validate_config(&config) == false)
   1410			break;
   1411
   1412		buf = vmemdup_user(argp + size, config.config_size);
   1413		if (IS_ERR(buf)) {
   1414			ret = PTR_ERR(buf);
   1415			break;
   1416		}
   1417		config.name[VDUSE_NAME_MAX - 1] = '\0';
   1418		ret = vduse_create_dev(&config, buf, control->api_version);
   1419		if (ret)
   1420			kvfree(buf);
   1421		break;
   1422	}
   1423	case VDUSE_DESTROY_DEV: {
   1424		char name[VDUSE_NAME_MAX];
   1425
   1426		ret = -EFAULT;
   1427		if (copy_from_user(name, argp, VDUSE_NAME_MAX))
   1428			break;
   1429
   1430		name[VDUSE_NAME_MAX - 1] = '\0';
   1431		ret = vduse_destroy_dev(name);
   1432		break;
   1433	}
   1434	default:
   1435		ret = -EINVAL;
   1436		break;
   1437	}
   1438	mutex_unlock(&vduse_lock);
   1439
   1440	return ret;
   1441}
   1442
   1443static int vduse_release(struct inode *inode, struct file *file)
   1444{
   1445	struct vduse_control *control = file->private_data;
   1446
   1447	kfree(control);
   1448	return 0;
   1449}
   1450
   1451static int vduse_open(struct inode *inode, struct file *file)
   1452{
   1453	struct vduse_control *control;
   1454
   1455	control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
   1456	if (!control)
   1457		return -ENOMEM;
   1458
   1459	control->api_version = VDUSE_API_VERSION;
   1460	file->private_data = control;
   1461
   1462	return 0;
   1463}
   1464
   1465static const struct file_operations vduse_ctrl_fops = {
   1466	.owner		= THIS_MODULE,
   1467	.open		= vduse_open,
   1468	.release	= vduse_release,
   1469	.unlocked_ioctl	= vduse_ioctl,
   1470	.compat_ioctl	= compat_ptr_ioctl,
   1471	.llseek		= noop_llseek,
   1472};
   1473
   1474static char *vduse_devnode(struct device *dev, umode_t *mode)
   1475{
   1476	return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
   1477}
   1478
   1479struct vduse_mgmt_dev {
   1480	struct vdpa_mgmt_dev mgmt_dev;
   1481	struct device dev;
   1482};
   1483
   1484static struct vduse_mgmt_dev *vduse_mgmt;
   1485
   1486static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
   1487{
   1488	struct vduse_vdpa *vdev;
   1489	int ret;
   1490
   1491	if (dev->vdev)
   1492		return -EEXIST;
   1493
   1494	vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
   1495				 &vduse_vdpa_config_ops, 1, 1, name, true);
   1496	if (IS_ERR(vdev))
   1497		return PTR_ERR(vdev);
   1498
   1499	dev->vdev = vdev;
   1500	vdev->dev = dev;
   1501	vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
   1502	ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
   1503	if (ret) {
   1504		put_device(&vdev->vdpa.dev);
   1505		return ret;
   1506	}
   1507	set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
   1508	vdev->vdpa.dma_dev = &vdev->vdpa.dev;
   1509	vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
   1510
   1511	return 0;
   1512}
   1513
   1514static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
   1515			const struct vdpa_dev_set_config *config)
   1516{
   1517	struct vduse_dev *dev;
   1518	int ret;
   1519
   1520	mutex_lock(&vduse_lock);
   1521	dev = vduse_find_dev(name);
   1522	if (!dev || !vduse_dev_is_ready(dev)) {
   1523		mutex_unlock(&vduse_lock);
   1524		return -EINVAL;
   1525	}
   1526	ret = vduse_dev_init_vdpa(dev, name);
   1527	mutex_unlock(&vduse_lock);
   1528	if (ret)
   1529		return ret;
   1530
   1531	ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
   1532	if (ret) {
   1533		put_device(&dev->vdev->vdpa.dev);
   1534		return ret;
   1535	}
   1536
   1537	return 0;
   1538}
   1539
   1540static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
   1541{
   1542	_vdpa_unregister_device(dev);
   1543}
   1544
   1545static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
   1546	.dev_add = vdpa_dev_add,
   1547	.dev_del = vdpa_dev_del,
   1548};
   1549
   1550static struct virtio_device_id id_table[] = {
   1551	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
   1552	{ 0 },
   1553};
   1554
   1555static void vduse_mgmtdev_release(struct device *dev)
   1556{
   1557	struct vduse_mgmt_dev *mgmt_dev;
   1558
   1559	mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
   1560	kfree(mgmt_dev);
   1561}
   1562
   1563static int vduse_mgmtdev_init(void)
   1564{
   1565	int ret;
   1566
   1567	vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
   1568	if (!vduse_mgmt)
   1569		return -ENOMEM;
   1570
   1571	ret = dev_set_name(&vduse_mgmt->dev, "vduse");
   1572	if (ret) {
   1573		kfree(vduse_mgmt);
   1574		return ret;
   1575	}
   1576
   1577	vduse_mgmt->dev.release = vduse_mgmtdev_release;
   1578
   1579	ret = device_register(&vduse_mgmt->dev);
   1580	if (ret)
   1581		goto dev_reg_err;
   1582
   1583	vduse_mgmt->mgmt_dev.id_table = id_table;
   1584	vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
   1585	vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
   1586	ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
   1587	if (ret)
   1588		device_unregister(&vduse_mgmt->dev);
   1589
   1590	return ret;
   1591
   1592dev_reg_err:
   1593	put_device(&vduse_mgmt->dev);
   1594	return ret;
   1595}
   1596
   1597static void vduse_mgmtdev_exit(void)
   1598{
   1599	vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
   1600	device_unregister(&vduse_mgmt->dev);
   1601}
   1602
   1603static int vduse_init(void)
   1604{
   1605	int ret;
   1606	struct device *dev;
   1607
   1608	vduse_class = class_create(THIS_MODULE, "vduse");
   1609	if (IS_ERR(vduse_class))
   1610		return PTR_ERR(vduse_class);
   1611
   1612	vduse_class->devnode = vduse_devnode;
   1613
   1614	ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
   1615	if (ret)
   1616		goto err_chardev_region;
   1617
   1618	/* /dev/vduse/control */
   1619	cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
   1620	vduse_ctrl_cdev.owner = THIS_MODULE;
   1621	ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
   1622	if (ret)
   1623		goto err_ctrl_cdev;
   1624
   1625	dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
   1626	if (IS_ERR(dev)) {
   1627		ret = PTR_ERR(dev);
   1628		goto err_device;
   1629	}
   1630
   1631	/* /dev/vduse/$DEVICE */
   1632	cdev_init(&vduse_cdev, &vduse_dev_fops);
   1633	vduse_cdev.owner = THIS_MODULE;
   1634	ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
   1635		       VDUSE_DEV_MAX - 1);
   1636	if (ret)
   1637		goto err_cdev;
   1638
   1639	vduse_irq_wq = alloc_workqueue("vduse-irq",
   1640				WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
   1641	if (!vduse_irq_wq) {
   1642		ret = -ENOMEM;
   1643		goto err_wq;
   1644	}
   1645
   1646	ret = vduse_domain_init();
   1647	if (ret)
   1648		goto err_domain;
   1649
   1650	ret = vduse_mgmtdev_init();
   1651	if (ret)
   1652		goto err_mgmtdev;
   1653
   1654	return 0;
   1655err_mgmtdev:
   1656	vduse_domain_exit();
   1657err_domain:
   1658	destroy_workqueue(vduse_irq_wq);
   1659err_wq:
   1660	cdev_del(&vduse_cdev);
   1661err_cdev:
   1662	device_destroy(vduse_class, vduse_major);
   1663err_device:
   1664	cdev_del(&vduse_ctrl_cdev);
   1665err_ctrl_cdev:
   1666	unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
   1667err_chardev_region:
   1668	class_destroy(vduse_class);
   1669	return ret;
   1670}
   1671module_init(vduse_init);
   1672
   1673static void vduse_exit(void)
   1674{
   1675	vduse_mgmtdev_exit();
   1676	vduse_domain_exit();
   1677	destroy_workqueue(vduse_irq_wq);
   1678	cdev_del(&vduse_cdev);
   1679	device_destroy(vduse_class, vduse_major);
   1680	cdev_del(&vduse_ctrl_cdev);
   1681	unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
   1682	class_destroy(vduse_class);
   1683}
   1684module_exit(vduse_exit);
   1685
   1686MODULE_LICENSE(DRV_LICENSE);
   1687MODULE_AUTHOR(DRV_AUTHOR);
   1688MODULE_DESCRIPTION(DRV_DESC);