cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vdpa.c (32459B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Copyright (C) 2018-2020 Intel Corporation.
      4 * Copyright (C) 2020 Red Hat, Inc.
      5 *
      6 * Author: Tiwei Bie <tiwei.bie@intel.com>
      7 *         Jason Wang <jasowang@redhat.com>
      8 *
      9 * Thanks Michael S. Tsirkin for the valuable comments and
     10 * suggestions.  And thanks to Cunming Liang and Zhihong Wang for all
     11 * their supports.
     12 */
     13
     14#include <linux/kernel.h>
     15#include <linux/module.h>
     16#include <linux/cdev.h>
     17#include <linux/device.h>
     18#include <linux/mm.h>
     19#include <linux/slab.h>
     20#include <linux/iommu.h>
     21#include <linux/uuid.h>
     22#include <linux/vdpa.h>
     23#include <linux/nospec.h>
     24#include <linux/vhost.h>
     25
     26#include "vhost.h"
     27
     28enum {
     29	VHOST_VDPA_BACKEND_FEATURES =
     30	(1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
     31	(1ULL << VHOST_BACKEND_F_IOTLB_BATCH) |
     32	(1ULL << VHOST_BACKEND_F_IOTLB_ASID),
     33};
     34
     35#define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
     36
     37#define VHOST_VDPA_IOTLB_BUCKETS 16
     38
     39struct vhost_vdpa_as {
     40	struct hlist_node hash_link;
     41	struct vhost_iotlb iotlb;
     42	u32 id;
     43};
     44
     45struct vhost_vdpa {
     46	struct vhost_dev vdev;
     47	struct iommu_domain *domain;
     48	struct vhost_virtqueue *vqs;
     49	struct completion completion;
     50	struct vdpa_device *vdpa;
     51	struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
     52	struct device dev;
     53	struct cdev cdev;
     54	atomic_t opened;
     55	u32 nvqs;
     56	int virtio_id;
     57	int minor;
     58	struct eventfd_ctx *config_ctx;
     59	int in_batch;
     60	struct vdpa_iova_range range;
     61	u32 batch_asid;
     62};
     63
     64static DEFINE_IDA(vhost_vdpa_ida);
     65
     66static dev_t vhost_vdpa_major;
     67
     68static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb)
     69{
     70	struct vhost_vdpa_as *as = container_of(iotlb, struct
     71						vhost_vdpa_as, iotlb);
     72	return as->id;
     73}
     74
     75static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid)
     76{
     77	struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
     78	struct vhost_vdpa_as *as;
     79
     80	hlist_for_each_entry(as, head, hash_link)
     81		if (as->id == asid)
     82			return as;
     83
     84	return NULL;
     85}
     86
     87static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid)
     88{
     89	struct vhost_vdpa_as *as = asid_to_as(v, asid);
     90
     91	if (!as)
     92		return NULL;
     93
     94	return &as->iotlb;
     95}
     96
     97static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid)
     98{
     99	struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
    100	struct vhost_vdpa_as *as;
    101
    102	if (asid_to_as(v, asid))
    103		return NULL;
    104
    105	if (asid >= v->vdpa->nas)
    106		return NULL;
    107
    108	as = kmalloc(sizeof(*as), GFP_KERNEL);
    109	if (!as)
    110		return NULL;
    111
    112	vhost_iotlb_init(&as->iotlb, 0, 0);
    113	as->id = asid;
    114	hlist_add_head(&as->hash_link, head);
    115
    116	return as;
    117}
    118
    119static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
    120						      u32 asid)
    121{
    122	struct vhost_vdpa_as *as = asid_to_as(v, asid);
    123
    124	if (as)
    125		return as;
    126
    127	return vhost_vdpa_alloc_as(v, asid);
    128}
    129
    130static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
    131{
    132	struct vhost_vdpa_as *as = asid_to_as(v, asid);
    133
    134	if (!as)
    135		return -EINVAL;
    136
    137	hlist_del(&as->hash_link);
    138	vhost_iotlb_reset(&as->iotlb);
    139	kfree(as);
    140
    141	return 0;
    142}
    143
    144static void handle_vq_kick(struct vhost_work *work)
    145{
    146	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
    147						  poll.work);
    148	struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev);
    149	const struct vdpa_config_ops *ops = v->vdpa->config;
    150
    151	ops->kick_vq(v->vdpa, vq - v->vqs);
    152}
    153
    154static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
    155{
    156	struct vhost_virtqueue *vq = private;
    157	struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
    158
    159	if (call_ctx)
    160		eventfd_signal(call_ctx, 1);
    161
    162	return IRQ_HANDLED;
    163}
    164
    165static irqreturn_t vhost_vdpa_config_cb(void *private)
    166{
    167	struct vhost_vdpa *v = private;
    168	struct eventfd_ctx *config_ctx = v->config_ctx;
    169
    170	if (config_ctx)
    171		eventfd_signal(config_ctx, 1);
    172
    173	return IRQ_HANDLED;
    174}
    175
    176static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
    177{
    178	struct vhost_virtqueue *vq = &v->vqs[qid];
    179	const struct vdpa_config_ops *ops = v->vdpa->config;
    180	struct vdpa_device *vdpa = v->vdpa;
    181	int ret, irq;
    182
    183	if (!ops->get_vq_irq)
    184		return;
    185
    186	irq = ops->get_vq_irq(vdpa, qid);
    187	if (irq < 0)
    188		return;
    189
    190	irq_bypass_unregister_producer(&vq->call_ctx.producer);
    191	if (!vq->call_ctx.ctx)
    192		return;
    193
    194	vq->call_ctx.producer.token = vq->call_ctx.ctx;
    195	vq->call_ctx.producer.irq = irq;
    196	ret = irq_bypass_register_producer(&vq->call_ctx.producer);
    197	if (unlikely(ret))
    198		dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret =  %d\n",
    199			 qid, vq->call_ctx.producer.token, ret);
    200}
    201
    202static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
    203{
    204	struct vhost_virtqueue *vq = &v->vqs[qid];
    205
    206	irq_bypass_unregister_producer(&vq->call_ctx.producer);
    207}
    208
    209static int vhost_vdpa_reset(struct vhost_vdpa *v)
    210{
    211	struct vdpa_device *vdpa = v->vdpa;
    212
    213	v->in_batch = 0;
    214
    215	return vdpa_reset(vdpa);
    216}
    217
    218static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
    219{
    220	struct vdpa_device *vdpa = v->vdpa;
    221	const struct vdpa_config_ops *ops = vdpa->config;
    222	u32 device_id;
    223
    224	device_id = ops->get_device_id(vdpa);
    225
    226	if (copy_to_user(argp, &device_id, sizeof(device_id)))
    227		return -EFAULT;
    228
    229	return 0;
    230}
    231
    232static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
    233{
    234	struct vdpa_device *vdpa = v->vdpa;
    235	const struct vdpa_config_ops *ops = vdpa->config;
    236	u8 status;
    237
    238	status = ops->get_status(vdpa);
    239
    240	if (copy_to_user(statusp, &status, sizeof(status)))
    241		return -EFAULT;
    242
    243	return 0;
    244}
    245
    246static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
    247{
    248	struct vdpa_device *vdpa = v->vdpa;
    249	const struct vdpa_config_ops *ops = vdpa->config;
    250	u8 status, status_old;
    251	u32 nvqs = v->nvqs;
    252	int ret;
    253	u16 i;
    254
    255	if (copy_from_user(&status, statusp, sizeof(status)))
    256		return -EFAULT;
    257
    258	status_old = ops->get_status(vdpa);
    259
    260	/*
    261	 * Userspace shouldn't remove status bits unless reset the
    262	 * status to 0.
    263	 */
    264	if (status != 0 && (status_old & ~status) != 0)
    265		return -EINVAL;
    266
    267	if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
    268		for (i = 0; i < nvqs; i++)
    269			vhost_vdpa_unsetup_vq_irq(v, i);
    270
    271	if (status == 0) {
    272		ret = vdpa_reset(vdpa);
    273		if (ret)
    274			return ret;
    275	} else
    276		vdpa_set_status(vdpa, status);
    277
    278	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
    279		for (i = 0; i < nvqs; i++)
    280			vhost_vdpa_setup_vq_irq(v, i);
    281
    282	return 0;
    283}
    284
    285static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
    286				      struct vhost_vdpa_config *c)
    287{
    288	struct vdpa_device *vdpa = v->vdpa;
    289	size_t size = vdpa->config->get_config_size(vdpa);
    290
    291	if (c->len == 0 || c->off > size)
    292		return -EINVAL;
    293
    294	if (c->len > size - c->off)
    295		return -E2BIG;
    296
    297	return 0;
    298}
    299
    300static long vhost_vdpa_get_config(struct vhost_vdpa *v,
    301				  struct vhost_vdpa_config __user *c)
    302{
    303	struct vdpa_device *vdpa = v->vdpa;
    304	struct vhost_vdpa_config config;
    305	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
    306	u8 *buf;
    307
    308	if (copy_from_user(&config, c, size))
    309		return -EFAULT;
    310	if (vhost_vdpa_config_validate(v, &config))
    311		return -EINVAL;
    312	buf = kvzalloc(config.len, GFP_KERNEL);
    313	if (!buf)
    314		return -ENOMEM;
    315
    316	vdpa_get_config(vdpa, config.off, buf, config.len);
    317
    318	if (copy_to_user(c->buf, buf, config.len)) {
    319		kvfree(buf);
    320		return -EFAULT;
    321	}
    322
    323	kvfree(buf);
    324	return 0;
    325}
    326
    327static long vhost_vdpa_set_config(struct vhost_vdpa *v,
    328				  struct vhost_vdpa_config __user *c)
    329{
    330	struct vdpa_device *vdpa = v->vdpa;
    331	struct vhost_vdpa_config config;
    332	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
    333	u8 *buf;
    334
    335	if (copy_from_user(&config, c, size))
    336		return -EFAULT;
    337	if (vhost_vdpa_config_validate(v, &config))
    338		return -EINVAL;
    339
    340	buf = vmemdup_user(c->buf, config.len);
    341	if (IS_ERR(buf))
    342		return PTR_ERR(buf);
    343
    344	vdpa_set_config(vdpa, config.off, buf, config.len);
    345
    346	kvfree(buf);
    347	return 0;
    348}
    349
    350static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
    351{
    352	struct vdpa_device *vdpa = v->vdpa;
    353	const struct vdpa_config_ops *ops = vdpa->config;
    354	u64 features;
    355
    356	features = ops->get_device_features(vdpa);
    357
    358	if (copy_to_user(featurep, &features, sizeof(features)))
    359		return -EFAULT;
    360
    361	return 0;
    362}
    363
    364static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
    365{
    366	struct vdpa_device *vdpa = v->vdpa;
    367	const struct vdpa_config_ops *ops = vdpa->config;
    368	u64 features;
    369
    370	/*
    371	 * It's not allowed to change the features after they have
    372	 * been negotiated.
    373	 */
    374	if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK)
    375		return -EBUSY;
    376
    377	if (copy_from_user(&features, featurep, sizeof(features)))
    378		return -EFAULT;
    379
    380	if (vdpa_set_features(vdpa, features))
    381		return -EINVAL;
    382
    383	return 0;
    384}
    385
    386static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
    387{
    388	struct vdpa_device *vdpa = v->vdpa;
    389	const struct vdpa_config_ops *ops = vdpa->config;
    390	u16 num;
    391
    392	num = ops->get_vq_num_max(vdpa);
    393
    394	if (copy_to_user(argp, &num, sizeof(num)))
    395		return -EFAULT;
    396
    397	return 0;
    398}
    399
    400static void vhost_vdpa_config_put(struct vhost_vdpa *v)
    401{
    402	if (v->config_ctx) {
    403		eventfd_ctx_put(v->config_ctx);
    404		v->config_ctx = NULL;
    405	}
    406}
    407
    408static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
    409{
    410	struct vdpa_callback cb;
    411	int fd;
    412	struct eventfd_ctx *ctx;
    413
    414	cb.callback = vhost_vdpa_config_cb;
    415	cb.private = v;
    416	if (copy_from_user(&fd, argp, sizeof(fd)))
    417		return  -EFAULT;
    418
    419	ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
    420	swap(ctx, v->config_ctx);
    421
    422	if (!IS_ERR_OR_NULL(ctx))
    423		eventfd_ctx_put(ctx);
    424
    425	if (IS_ERR(v->config_ctx)) {
    426		long ret = PTR_ERR(v->config_ctx);
    427
    428		v->config_ctx = NULL;
    429		return ret;
    430	}
    431
    432	v->vdpa->config->set_config_cb(v->vdpa, &cb);
    433
    434	return 0;
    435}
    436
    437static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
    438{
    439	struct vhost_vdpa_iova_range range = {
    440		.first = v->range.first,
    441		.last = v->range.last,
    442	};
    443
    444	if (copy_to_user(argp, &range, sizeof(range)))
    445		return -EFAULT;
    446	return 0;
    447}
    448
    449static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp)
    450{
    451	struct vdpa_device *vdpa = v->vdpa;
    452	const struct vdpa_config_ops *ops = vdpa->config;
    453	u32 size;
    454
    455	size = ops->get_config_size(vdpa);
    456
    457	if (copy_to_user(argp, &size, sizeof(size)))
    458		return -EFAULT;
    459
    460	return 0;
    461}
    462
    463static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
    464{
    465	struct vdpa_device *vdpa = v->vdpa;
    466
    467	if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs)))
    468		return -EFAULT;
    469
    470	return 0;
    471}
    472
    473static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
    474				   void __user *argp)
    475{
    476	struct vdpa_device *vdpa = v->vdpa;
    477	const struct vdpa_config_ops *ops = vdpa->config;
    478	struct vdpa_vq_state vq_state;
    479	struct vdpa_callback cb;
    480	struct vhost_virtqueue *vq;
    481	struct vhost_vring_state s;
    482	u32 idx;
    483	long r;
    484
    485	r = get_user(idx, (u32 __user *)argp);
    486	if (r < 0)
    487		return r;
    488
    489	if (idx >= v->nvqs)
    490		return -ENOBUFS;
    491
    492	idx = array_index_nospec(idx, v->nvqs);
    493	vq = &v->vqs[idx];
    494
    495	switch (cmd) {
    496	case VHOST_VDPA_SET_VRING_ENABLE:
    497		if (copy_from_user(&s, argp, sizeof(s)))
    498			return -EFAULT;
    499		ops->set_vq_ready(vdpa, idx, s.num);
    500		return 0;
    501	case VHOST_VDPA_GET_VRING_GROUP:
    502		if (!ops->get_vq_group)
    503			return -EOPNOTSUPP;
    504		s.index = idx;
    505		s.num = ops->get_vq_group(vdpa, idx);
    506		if (s.num >= vdpa->ngroups)
    507			return -EIO;
    508		else if (copy_to_user(argp, &s, sizeof(s)))
    509			return -EFAULT;
    510		return 0;
    511	case VHOST_VDPA_SET_GROUP_ASID:
    512		if (copy_from_user(&s, argp, sizeof(s)))
    513			return -EFAULT;
    514		if (s.num >= vdpa->nas)
    515			return -EINVAL;
    516		if (!ops->set_group_asid)
    517			return -EOPNOTSUPP;
    518		return ops->set_group_asid(vdpa, idx, s.num);
    519	case VHOST_GET_VRING_BASE:
    520		r = ops->get_vq_state(v->vdpa, idx, &vq_state);
    521		if (r)
    522			return r;
    523
    524		vq->last_avail_idx = vq_state.split.avail_index;
    525		break;
    526	}
    527
    528	r = vhost_vring_ioctl(&v->vdev, cmd, argp);
    529	if (r)
    530		return r;
    531
    532	switch (cmd) {
    533	case VHOST_SET_VRING_ADDR:
    534		if (ops->set_vq_address(vdpa, idx,
    535					(u64)(uintptr_t)vq->desc,
    536					(u64)(uintptr_t)vq->avail,
    537					(u64)(uintptr_t)vq->used))
    538			r = -EINVAL;
    539		break;
    540
    541	case VHOST_SET_VRING_BASE:
    542		vq_state.split.avail_index = vq->last_avail_idx;
    543		if (ops->set_vq_state(vdpa, idx, &vq_state))
    544			r = -EINVAL;
    545		break;
    546
    547	case VHOST_SET_VRING_CALL:
    548		if (vq->call_ctx.ctx) {
    549			cb.callback = vhost_vdpa_virtqueue_cb;
    550			cb.private = vq;
    551		} else {
    552			cb.callback = NULL;
    553			cb.private = NULL;
    554		}
    555		ops->set_vq_cb(vdpa, idx, &cb);
    556		vhost_vdpa_setup_vq_irq(v, idx);
    557		break;
    558
    559	case VHOST_SET_VRING_NUM:
    560		ops->set_vq_num(vdpa, idx, vq->num);
    561		break;
    562	}
    563
    564	return r;
    565}
    566
    567static long vhost_vdpa_unlocked_ioctl(struct file *filep,
    568				      unsigned int cmd, unsigned long arg)
    569{
    570	struct vhost_vdpa *v = filep->private_data;
    571	struct vhost_dev *d = &v->vdev;
    572	void __user *argp = (void __user *)arg;
    573	u64 __user *featurep = argp;
    574	u64 features;
    575	long r = 0;
    576
    577	if (cmd == VHOST_SET_BACKEND_FEATURES) {
    578		if (copy_from_user(&features, featurep, sizeof(features)))
    579			return -EFAULT;
    580		if (features & ~VHOST_VDPA_BACKEND_FEATURES)
    581			return -EOPNOTSUPP;
    582		vhost_set_backend_features(&v->vdev, features);
    583		return 0;
    584	}
    585
    586	mutex_lock(&d->mutex);
    587
    588	switch (cmd) {
    589	case VHOST_VDPA_GET_DEVICE_ID:
    590		r = vhost_vdpa_get_device_id(v, argp);
    591		break;
    592	case VHOST_VDPA_GET_STATUS:
    593		r = vhost_vdpa_get_status(v, argp);
    594		break;
    595	case VHOST_VDPA_SET_STATUS:
    596		r = vhost_vdpa_set_status(v, argp);
    597		break;
    598	case VHOST_VDPA_GET_CONFIG:
    599		r = vhost_vdpa_get_config(v, argp);
    600		break;
    601	case VHOST_VDPA_SET_CONFIG:
    602		r = vhost_vdpa_set_config(v, argp);
    603		break;
    604	case VHOST_GET_FEATURES:
    605		r = vhost_vdpa_get_features(v, argp);
    606		break;
    607	case VHOST_SET_FEATURES:
    608		r = vhost_vdpa_set_features(v, argp);
    609		break;
    610	case VHOST_VDPA_GET_VRING_NUM:
    611		r = vhost_vdpa_get_vring_num(v, argp);
    612		break;
    613	case VHOST_VDPA_GET_GROUP_NUM:
    614		if (copy_to_user(argp, &v->vdpa->ngroups,
    615				 sizeof(v->vdpa->ngroups)))
    616			r = -EFAULT;
    617		break;
    618	case VHOST_VDPA_GET_AS_NUM:
    619		if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas)))
    620			r = -EFAULT;
    621		break;
    622	case VHOST_SET_LOG_BASE:
    623	case VHOST_SET_LOG_FD:
    624		r = -ENOIOCTLCMD;
    625		break;
    626	case VHOST_VDPA_SET_CONFIG_CALL:
    627		r = vhost_vdpa_set_config_call(v, argp);
    628		break;
    629	case VHOST_GET_BACKEND_FEATURES:
    630		features = VHOST_VDPA_BACKEND_FEATURES;
    631		if (copy_to_user(featurep, &features, sizeof(features)))
    632			r = -EFAULT;
    633		break;
    634	case VHOST_VDPA_GET_IOVA_RANGE:
    635		r = vhost_vdpa_get_iova_range(v, argp);
    636		break;
    637	case VHOST_VDPA_GET_CONFIG_SIZE:
    638		r = vhost_vdpa_get_config_size(v, argp);
    639		break;
    640	case VHOST_VDPA_GET_VQS_COUNT:
    641		r = vhost_vdpa_get_vqs_count(v, argp);
    642		break;
    643	default:
    644		r = vhost_dev_ioctl(&v->vdev, cmd, argp);
    645		if (r == -ENOIOCTLCMD)
    646			r = vhost_vdpa_vring_ioctl(v, cmd, argp);
    647		break;
    648	}
    649
    650	mutex_unlock(&d->mutex);
    651	return r;
    652}
    653
    654static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v,
    655				struct vhost_iotlb *iotlb,
    656				u64 start, u64 last)
    657{
    658	struct vhost_dev *dev = &v->vdev;
    659	struct vhost_iotlb_map *map;
    660	struct page *page;
    661	unsigned long pfn, pinned;
    662
    663	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
    664		pinned = PFN_DOWN(map->size);
    665		for (pfn = PFN_DOWN(map->addr);
    666		     pinned > 0; pfn++, pinned--) {
    667			page = pfn_to_page(pfn);
    668			if (map->perm & VHOST_ACCESS_WO)
    669				set_page_dirty_lock(page);
    670			unpin_user_page(page);
    671		}
    672		atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
    673		vhost_iotlb_map_free(iotlb, map);
    674	}
    675}
    676
    677static void vhost_vdpa_va_unmap(struct vhost_vdpa *v,
    678				struct vhost_iotlb *iotlb,
    679				u64 start, u64 last)
    680{
    681	struct vhost_iotlb_map *map;
    682	struct vdpa_map_file *map_file;
    683
    684	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
    685		map_file = (struct vdpa_map_file *)map->opaque;
    686		fput(map_file->file);
    687		kfree(map_file);
    688		vhost_iotlb_map_free(iotlb, map);
    689	}
    690}
    691
    692static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
    693				   struct vhost_iotlb *iotlb,
    694				   u64 start, u64 last)
    695{
    696	struct vdpa_device *vdpa = v->vdpa;
    697
    698	if (vdpa->use_va)
    699		return vhost_vdpa_va_unmap(v, iotlb, start, last);
    700
    701	return vhost_vdpa_pa_unmap(v, iotlb, start, last);
    702}
    703
    704static int perm_to_iommu_flags(u32 perm)
    705{
    706	int flags = 0;
    707
    708	switch (perm) {
    709	case VHOST_ACCESS_WO:
    710		flags |= IOMMU_WRITE;
    711		break;
    712	case VHOST_ACCESS_RO:
    713		flags |= IOMMU_READ;
    714		break;
    715	case VHOST_ACCESS_RW:
    716		flags |= (IOMMU_WRITE | IOMMU_READ);
    717		break;
    718	default:
    719		WARN(1, "invalidate vhost IOTLB permission\n");
    720		break;
    721	}
    722
    723	return flags | IOMMU_CACHE;
    724}
    725
    726static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
    727			  u64 iova, u64 size, u64 pa, u32 perm, void *opaque)
    728{
    729	struct vhost_dev *dev = &v->vdev;
    730	struct vdpa_device *vdpa = v->vdpa;
    731	const struct vdpa_config_ops *ops = vdpa->config;
    732	u32 asid = iotlb_to_asid(iotlb);
    733	int r = 0;
    734
    735	r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1,
    736				      pa, perm, opaque);
    737	if (r)
    738		return r;
    739
    740	if (ops->dma_map) {
    741		r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
    742	} else if (ops->set_map) {
    743		if (!v->in_batch)
    744			r = ops->set_map(vdpa, asid, iotlb);
    745	} else {
    746		r = iommu_map(v->domain, iova, pa, size,
    747			      perm_to_iommu_flags(perm));
    748	}
    749	if (r) {
    750		vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
    751		return r;
    752	}
    753
    754	if (!vdpa->use_va)
    755		atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
    756
    757	return 0;
    758}
    759
    760static void vhost_vdpa_unmap(struct vhost_vdpa *v,
    761			     struct vhost_iotlb *iotlb,
    762			     u64 iova, u64 size)
    763{
    764	struct vdpa_device *vdpa = v->vdpa;
    765	const struct vdpa_config_ops *ops = vdpa->config;
    766	u32 asid = iotlb_to_asid(iotlb);
    767
    768	vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1);
    769
    770	if (ops->dma_map) {
    771		ops->dma_unmap(vdpa, asid, iova, size);
    772	} else if (ops->set_map) {
    773		if (!v->in_batch)
    774			ops->set_map(vdpa, asid, iotlb);
    775	} else {
    776		iommu_unmap(v->domain, iova, size);
    777	}
    778
    779	/* If we are in the middle of batch processing, delay the free
    780	 * of AS until BATCH_END.
    781	 */
    782	if (!v->in_batch && !iotlb->nmaps)
    783		vhost_vdpa_remove_as(v, asid);
    784}
    785
    786static int vhost_vdpa_va_map(struct vhost_vdpa *v,
    787			     struct vhost_iotlb *iotlb,
    788			     u64 iova, u64 size, u64 uaddr, u32 perm)
    789{
    790	struct vhost_dev *dev = &v->vdev;
    791	u64 offset, map_size, map_iova = iova;
    792	struct vdpa_map_file *map_file;
    793	struct vm_area_struct *vma;
    794	int ret = 0;
    795
    796	mmap_read_lock(dev->mm);
    797
    798	while (size) {
    799		vma = find_vma(dev->mm, uaddr);
    800		if (!vma) {
    801			ret = -EINVAL;
    802			break;
    803		}
    804		map_size = min(size, vma->vm_end - uaddr);
    805		if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
    806			!(vma->vm_flags & (VM_IO | VM_PFNMAP))))
    807			goto next;
    808
    809		map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
    810		if (!map_file) {
    811			ret = -ENOMEM;
    812			break;
    813		}
    814		offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
    815		map_file->offset = offset;
    816		map_file->file = get_file(vma->vm_file);
    817		ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr,
    818				     perm, map_file);
    819		if (ret) {
    820			fput(map_file->file);
    821			kfree(map_file);
    822			break;
    823		}
    824next:
    825		size -= map_size;
    826		uaddr += map_size;
    827		map_iova += map_size;
    828	}
    829	if (ret)
    830		vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova);
    831
    832	mmap_read_unlock(dev->mm);
    833
    834	return ret;
    835}
    836
    837static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
    838			     struct vhost_iotlb *iotlb,
    839			     u64 iova, u64 size, u64 uaddr, u32 perm)
    840{
    841	struct vhost_dev *dev = &v->vdev;
    842	struct page **page_list;
    843	unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
    844	unsigned int gup_flags = FOLL_LONGTERM;
    845	unsigned long npages, cur_base, map_pfn, last_pfn = 0;
    846	unsigned long lock_limit, sz2pin, nchunks, i;
    847	u64 start = iova;
    848	long pinned;
    849	int ret = 0;
    850
    851	/* Limit the use of memory for bookkeeping */
    852	page_list = (struct page **) __get_free_page(GFP_KERNEL);
    853	if (!page_list)
    854		return -ENOMEM;
    855
    856	if (perm & VHOST_ACCESS_WO)
    857		gup_flags |= FOLL_WRITE;
    858
    859	npages = PFN_UP(size + (iova & ~PAGE_MASK));
    860	if (!npages) {
    861		ret = -EINVAL;
    862		goto free;
    863	}
    864
    865	mmap_read_lock(dev->mm);
    866
    867	lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
    868	if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
    869		ret = -ENOMEM;
    870		goto unlock;
    871	}
    872
    873	cur_base = uaddr & PAGE_MASK;
    874	iova &= PAGE_MASK;
    875	nchunks = 0;
    876
    877	while (npages) {
    878		sz2pin = min_t(unsigned long, npages, list_size);
    879		pinned = pin_user_pages(cur_base, sz2pin,
    880					gup_flags, page_list, NULL);
    881		if (sz2pin != pinned) {
    882			if (pinned < 0) {
    883				ret = pinned;
    884			} else {
    885				unpin_user_pages(page_list, pinned);
    886				ret = -ENOMEM;
    887			}
    888			goto out;
    889		}
    890		nchunks++;
    891
    892		if (!last_pfn)
    893			map_pfn = page_to_pfn(page_list[0]);
    894
    895		for (i = 0; i < pinned; i++) {
    896			unsigned long this_pfn = page_to_pfn(page_list[i]);
    897			u64 csize;
    898
    899			if (last_pfn && (this_pfn != last_pfn + 1)) {
    900				/* Pin a contiguous chunk of memory */
    901				csize = PFN_PHYS(last_pfn - map_pfn + 1);
    902				ret = vhost_vdpa_map(v, iotlb, iova, csize,
    903						     PFN_PHYS(map_pfn),
    904						     perm, NULL);
    905				if (ret) {
    906					/*
    907					 * Unpin the pages that are left unmapped
    908					 * from this point on in the current
    909					 * page_list. The remaining outstanding
    910					 * ones which may stride across several
    911					 * chunks will be covered in the common
    912					 * error path subsequently.
    913					 */
    914					unpin_user_pages(&page_list[i],
    915							 pinned - i);
    916					goto out;
    917				}
    918
    919				map_pfn = this_pfn;
    920				iova += csize;
    921				nchunks = 0;
    922			}
    923
    924			last_pfn = this_pfn;
    925		}
    926
    927		cur_base += PFN_PHYS(pinned);
    928		npages -= pinned;
    929	}
    930
    931	/* Pin the rest chunk */
    932	ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1),
    933			     PFN_PHYS(map_pfn), perm, NULL);
    934out:
    935	if (ret) {
    936		if (nchunks) {
    937			unsigned long pfn;
    938
    939			/*
    940			 * Unpin the outstanding pages which are yet to be
    941			 * mapped but haven't due to vdpa_map() or
    942			 * pin_user_pages() failure.
    943			 *
    944			 * Mapped pages are accounted in vdpa_map(), hence
    945			 * the corresponding unpinning will be handled by
    946			 * vdpa_unmap().
    947			 */
    948			WARN_ON(!last_pfn);
    949			for (pfn = map_pfn; pfn <= last_pfn; pfn++)
    950				unpin_user_page(pfn_to_page(pfn));
    951		}
    952		vhost_vdpa_unmap(v, iotlb, start, size);
    953	}
    954unlock:
    955	mmap_read_unlock(dev->mm);
    956free:
    957	free_page((unsigned long)page_list);
    958	return ret;
    959
    960}
    961
    962static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
    963					   struct vhost_iotlb *iotlb,
    964					   struct vhost_iotlb_msg *msg)
    965{
    966	struct vdpa_device *vdpa = v->vdpa;
    967
    968	if (msg->iova < v->range.first || !msg->size ||
    969	    msg->iova > U64_MAX - msg->size + 1 ||
    970	    msg->iova + msg->size - 1 > v->range.last)
    971		return -EINVAL;
    972
    973	if (vhost_iotlb_itree_first(iotlb, msg->iova,
    974				    msg->iova + msg->size - 1))
    975		return -EEXIST;
    976
    977	if (vdpa->use_va)
    978		return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size,
    979					 msg->uaddr, msg->perm);
    980
    981	return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr,
    982				 msg->perm);
    983}
    984
    985static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
    986					struct vhost_iotlb_msg *msg)
    987{
    988	struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
    989	struct vdpa_device *vdpa = v->vdpa;
    990	const struct vdpa_config_ops *ops = vdpa->config;
    991	struct vhost_iotlb *iotlb = NULL;
    992	struct vhost_vdpa_as *as = NULL;
    993	int r = 0;
    994
    995	mutex_lock(&dev->mutex);
    996
    997	r = vhost_dev_check_owner(dev);
    998	if (r)
    999		goto unlock;
   1000
   1001	if (msg->type == VHOST_IOTLB_UPDATE ||
   1002	    msg->type == VHOST_IOTLB_BATCH_BEGIN) {
   1003		as = vhost_vdpa_find_alloc_as(v, asid);
   1004		if (!as) {
   1005			dev_err(&v->dev, "can't find and alloc asid %d\n",
   1006				asid);
   1007			r = -EINVAL;
   1008			goto unlock;
   1009		}
   1010		iotlb = &as->iotlb;
   1011	} else
   1012		iotlb = asid_to_iotlb(v, asid);
   1013
   1014	if ((v->in_batch && v->batch_asid != asid) || !iotlb) {
   1015		if (v->in_batch && v->batch_asid != asid) {
   1016			dev_info(&v->dev, "batch id %d asid %d\n",
   1017				 v->batch_asid, asid);
   1018		}
   1019		if (!iotlb)
   1020			dev_err(&v->dev, "no iotlb for asid %d\n", asid);
   1021		r = -EINVAL;
   1022		goto unlock;
   1023	}
   1024
   1025	switch (msg->type) {
   1026	case VHOST_IOTLB_UPDATE:
   1027		r = vhost_vdpa_process_iotlb_update(v, iotlb, msg);
   1028		break;
   1029	case VHOST_IOTLB_INVALIDATE:
   1030		vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size);
   1031		break;
   1032	case VHOST_IOTLB_BATCH_BEGIN:
   1033		v->batch_asid = asid;
   1034		v->in_batch = true;
   1035		break;
   1036	case VHOST_IOTLB_BATCH_END:
   1037		if (v->in_batch && ops->set_map)
   1038			ops->set_map(vdpa, asid, iotlb);
   1039		v->in_batch = false;
   1040		if (!iotlb->nmaps)
   1041			vhost_vdpa_remove_as(v, asid);
   1042		break;
   1043	default:
   1044		r = -EINVAL;
   1045		break;
   1046	}
   1047unlock:
   1048	mutex_unlock(&dev->mutex);
   1049
   1050	return r;
   1051}
   1052
   1053static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
   1054					 struct iov_iter *from)
   1055{
   1056	struct file *file = iocb->ki_filp;
   1057	struct vhost_vdpa *v = file->private_data;
   1058	struct vhost_dev *dev = &v->vdev;
   1059
   1060	return vhost_chr_write_iter(dev, from);
   1061}
   1062
   1063static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
   1064{
   1065	struct vdpa_device *vdpa = v->vdpa;
   1066	const struct vdpa_config_ops *ops = vdpa->config;
   1067	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
   1068	struct bus_type *bus;
   1069	int ret;
   1070
   1071	/* Device want to do DMA by itself */
   1072	if (ops->set_map || ops->dma_map)
   1073		return 0;
   1074
   1075	bus = dma_dev->bus;
   1076	if (!bus)
   1077		return -EFAULT;
   1078
   1079	if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
   1080		return -ENOTSUPP;
   1081
   1082	v->domain = iommu_domain_alloc(bus);
   1083	if (!v->domain)
   1084		return -EIO;
   1085
   1086	ret = iommu_attach_device(v->domain, dma_dev);
   1087	if (ret)
   1088		goto err_attach;
   1089
   1090	return 0;
   1091
   1092err_attach:
   1093	iommu_domain_free(v->domain);
   1094	return ret;
   1095}
   1096
   1097static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
   1098{
   1099	struct vdpa_device *vdpa = v->vdpa;
   1100	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
   1101
   1102	if (v->domain) {
   1103		iommu_detach_device(v->domain, dma_dev);
   1104		iommu_domain_free(v->domain);
   1105	}
   1106
   1107	v->domain = NULL;
   1108}
   1109
   1110static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
   1111{
   1112	struct vdpa_iova_range *range = &v->range;
   1113	struct vdpa_device *vdpa = v->vdpa;
   1114	const struct vdpa_config_ops *ops = vdpa->config;
   1115
   1116	if (ops->get_iova_range) {
   1117		*range = ops->get_iova_range(vdpa);
   1118	} else if (v->domain && v->domain->geometry.force_aperture) {
   1119		range->first = v->domain->geometry.aperture_start;
   1120		range->last = v->domain->geometry.aperture_end;
   1121	} else {
   1122		range->first = 0;
   1123		range->last = ULLONG_MAX;
   1124	}
   1125}
   1126
   1127static void vhost_vdpa_cleanup(struct vhost_vdpa *v)
   1128{
   1129	struct vhost_vdpa_as *as;
   1130	u32 asid;
   1131
   1132	vhost_dev_cleanup(&v->vdev);
   1133	kfree(v->vdev.vqs);
   1134
   1135	for (asid = 0; asid < v->vdpa->nas; asid++) {
   1136		as = asid_to_as(v, asid);
   1137		if (as)
   1138			vhost_vdpa_remove_as(v, asid);
   1139	}
   1140}
   1141
   1142static int vhost_vdpa_open(struct inode *inode, struct file *filep)
   1143{
   1144	struct vhost_vdpa *v;
   1145	struct vhost_dev *dev;
   1146	struct vhost_virtqueue **vqs;
   1147	int r, opened;
   1148	u32 i, nvqs;
   1149
   1150	v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
   1151
   1152	opened = atomic_cmpxchg(&v->opened, 0, 1);
   1153	if (opened)
   1154		return -EBUSY;
   1155
   1156	nvqs = v->nvqs;
   1157	r = vhost_vdpa_reset(v);
   1158	if (r)
   1159		goto err;
   1160
   1161	vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
   1162	if (!vqs) {
   1163		r = -ENOMEM;
   1164		goto err;
   1165	}
   1166
   1167	dev = &v->vdev;
   1168	for (i = 0; i < nvqs; i++) {
   1169		vqs[i] = &v->vqs[i];
   1170		vqs[i]->handle_kick = handle_vq_kick;
   1171	}
   1172	vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
   1173		       vhost_vdpa_process_iotlb_msg);
   1174
   1175	r = vhost_vdpa_alloc_domain(v);
   1176	if (r)
   1177		goto err_alloc_domain;
   1178
   1179	vhost_vdpa_set_iova_range(v);
   1180
   1181	filep->private_data = v;
   1182
   1183	return 0;
   1184
   1185err_alloc_domain:
   1186	vhost_vdpa_cleanup(v);
   1187err:
   1188	atomic_dec(&v->opened);
   1189	return r;
   1190}
   1191
   1192static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
   1193{
   1194	u32 i;
   1195
   1196	for (i = 0; i < v->nvqs; i++)
   1197		vhost_vdpa_unsetup_vq_irq(v, i);
   1198}
   1199
   1200static int vhost_vdpa_release(struct inode *inode, struct file *filep)
   1201{
   1202	struct vhost_vdpa *v = filep->private_data;
   1203	struct vhost_dev *d = &v->vdev;
   1204
   1205	mutex_lock(&d->mutex);
   1206	filep->private_data = NULL;
   1207	vhost_vdpa_clean_irq(v);
   1208	vhost_vdpa_reset(v);
   1209	vhost_dev_stop(&v->vdev);
   1210	vhost_vdpa_free_domain(v);
   1211	vhost_vdpa_config_put(v);
   1212	vhost_vdpa_cleanup(v);
   1213	mutex_unlock(&d->mutex);
   1214
   1215	atomic_dec(&v->opened);
   1216	complete(&v->completion);
   1217
   1218	return 0;
   1219}
   1220
   1221#ifdef CONFIG_MMU
   1222static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
   1223{
   1224	struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
   1225	struct vdpa_device *vdpa = v->vdpa;
   1226	const struct vdpa_config_ops *ops = vdpa->config;
   1227	struct vdpa_notification_area notify;
   1228	struct vm_area_struct *vma = vmf->vma;
   1229	u16 index = vma->vm_pgoff;
   1230
   1231	notify = ops->get_vq_notification(vdpa, index);
   1232
   1233	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
   1234	if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
   1235			    PFN_DOWN(notify.addr), PAGE_SIZE,
   1236			    vma->vm_page_prot))
   1237		return VM_FAULT_SIGBUS;
   1238
   1239	return VM_FAULT_NOPAGE;
   1240}
   1241
   1242static const struct vm_operations_struct vhost_vdpa_vm_ops = {
   1243	.fault = vhost_vdpa_fault,
   1244};
   1245
   1246static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
   1247{
   1248	struct vhost_vdpa *v = vma->vm_file->private_data;
   1249	struct vdpa_device *vdpa = v->vdpa;
   1250	const struct vdpa_config_ops *ops = vdpa->config;
   1251	struct vdpa_notification_area notify;
   1252	unsigned long index = vma->vm_pgoff;
   1253
   1254	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
   1255		return -EINVAL;
   1256	if ((vma->vm_flags & VM_SHARED) == 0)
   1257		return -EINVAL;
   1258	if (vma->vm_flags & VM_READ)
   1259		return -EINVAL;
   1260	if (index > 65535)
   1261		return -EINVAL;
   1262	if (!ops->get_vq_notification)
   1263		return -ENOTSUPP;
   1264
   1265	/* To be safe and easily modelled by userspace, We only
   1266	 * support the doorbell which sits on the page boundary and
   1267	 * does not share the page with other registers.
   1268	 */
   1269	notify = ops->get_vq_notification(vdpa, index);
   1270	if (notify.addr & (PAGE_SIZE - 1))
   1271		return -EINVAL;
   1272	if (vma->vm_end - vma->vm_start != notify.size)
   1273		return -ENOTSUPP;
   1274
   1275	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
   1276	vma->vm_ops = &vhost_vdpa_vm_ops;
   1277	return 0;
   1278}
   1279#endif /* CONFIG_MMU */
   1280
   1281static const struct file_operations vhost_vdpa_fops = {
   1282	.owner		= THIS_MODULE,
   1283	.open		= vhost_vdpa_open,
   1284	.release	= vhost_vdpa_release,
   1285	.write_iter	= vhost_vdpa_chr_write_iter,
   1286	.unlocked_ioctl	= vhost_vdpa_unlocked_ioctl,
   1287#ifdef CONFIG_MMU
   1288	.mmap		= vhost_vdpa_mmap,
   1289#endif /* CONFIG_MMU */
   1290	.compat_ioctl	= compat_ptr_ioctl,
   1291};
   1292
   1293static void vhost_vdpa_release_dev(struct device *device)
   1294{
   1295	struct vhost_vdpa *v =
   1296	       container_of(device, struct vhost_vdpa, dev);
   1297
   1298	ida_simple_remove(&vhost_vdpa_ida, v->minor);
   1299	kfree(v->vqs);
   1300	kfree(v);
   1301}
   1302
   1303static int vhost_vdpa_probe(struct vdpa_device *vdpa)
   1304{
   1305	const struct vdpa_config_ops *ops = vdpa->config;
   1306	struct vhost_vdpa *v;
   1307	int minor;
   1308	int i, r;
   1309
   1310	/* We can't support platform IOMMU device with more than 1
   1311	 * group or as
   1312	 */
   1313	if (!ops->set_map && !ops->dma_map &&
   1314	    (vdpa->ngroups > 1 || vdpa->nas > 1))
   1315		return -EOPNOTSUPP;
   1316
   1317	v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
   1318	if (!v)
   1319		return -ENOMEM;
   1320
   1321	minor = ida_simple_get(&vhost_vdpa_ida, 0,
   1322			       VHOST_VDPA_DEV_MAX, GFP_KERNEL);
   1323	if (minor < 0) {
   1324		kfree(v);
   1325		return minor;
   1326	}
   1327
   1328	atomic_set(&v->opened, 0);
   1329	v->minor = minor;
   1330	v->vdpa = vdpa;
   1331	v->nvqs = vdpa->nvqs;
   1332	v->virtio_id = ops->get_device_id(vdpa);
   1333
   1334	device_initialize(&v->dev);
   1335	v->dev.release = vhost_vdpa_release_dev;
   1336	v->dev.parent = &vdpa->dev;
   1337	v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
   1338	v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue),
   1339			       GFP_KERNEL);
   1340	if (!v->vqs) {
   1341		r = -ENOMEM;
   1342		goto err;
   1343	}
   1344
   1345	r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
   1346	if (r)
   1347		goto err;
   1348
   1349	cdev_init(&v->cdev, &vhost_vdpa_fops);
   1350	v->cdev.owner = THIS_MODULE;
   1351
   1352	r = cdev_device_add(&v->cdev, &v->dev);
   1353	if (r)
   1354		goto err;
   1355
   1356	init_completion(&v->completion);
   1357	vdpa_set_drvdata(vdpa, v);
   1358
   1359	for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++)
   1360		INIT_HLIST_HEAD(&v->as[i]);
   1361
   1362	return 0;
   1363
   1364err:
   1365	put_device(&v->dev);
   1366	return r;
   1367}
   1368
   1369static void vhost_vdpa_remove(struct vdpa_device *vdpa)
   1370{
   1371	struct vhost_vdpa *v = vdpa_get_drvdata(vdpa);
   1372	int opened;
   1373
   1374	cdev_device_del(&v->cdev, &v->dev);
   1375
   1376	do {
   1377		opened = atomic_cmpxchg(&v->opened, 0, 1);
   1378		if (!opened)
   1379			break;
   1380		wait_for_completion(&v->completion);
   1381	} while (1);
   1382
   1383	put_device(&v->dev);
   1384}
   1385
   1386static struct vdpa_driver vhost_vdpa_driver = {
   1387	.driver = {
   1388		.name	= "vhost_vdpa",
   1389	},
   1390	.probe	= vhost_vdpa_probe,
   1391	.remove	= vhost_vdpa_remove,
   1392};
   1393
   1394static int __init vhost_vdpa_init(void)
   1395{
   1396	int r;
   1397
   1398	r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX,
   1399				"vhost-vdpa");
   1400	if (r)
   1401		goto err_alloc_chrdev;
   1402
   1403	r = vdpa_register_driver(&vhost_vdpa_driver);
   1404	if (r)
   1405		goto err_vdpa_register_driver;
   1406
   1407	return 0;
   1408
   1409err_vdpa_register_driver:
   1410	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
   1411err_alloc_chrdev:
   1412	return r;
   1413}
   1414module_init(vhost_vdpa_init);
   1415
   1416static void __exit vhost_vdpa_exit(void)
   1417{
   1418	vdpa_unregister_driver(&vhost_vdpa_driver);
   1419	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
   1420}
   1421module_exit(vhost_vdpa_exit);
   1422
   1423MODULE_VERSION("0.0.1");
   1424MODULE_LICENSE("GPL v2");
   1425MODULE_AUTHOR("Intel Corporation");
   1426MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");