cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vringh.c (39620B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Helpers for the host side of a virtio ring.
      4 *
      5 * Since these may be in userspace, we use (inline) accessors.
      6 */
      7#include <linux/compiler.h>
      8#include <linux/module.h>
      9#include <linux/vringh.h>
     10#include <linux/virtio_ring.h>
     11#include <linux/kernel.h>
     12#include <linux/ratelimit.h>
     13#include <linux/uaccess.h>
     14#include <linux/slab.h>
     15#include <linux/export.h>
     16#if IS_REACHABLE(CONFIG_VHOST_IOTLB)
     17#include <linux/bvec.h>
     18#include <linux/highmem.h>
     19#include <linux/vhost_iotlb.h>
     20#endif
     21#include <uapi/linux/virtio_config.h>
     22
     23static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
     24{
     25	static DEFINE_RATELIMIT_STATE(vringh_rs,
     26				      DEFAULT_RATELIMIT_INTERVAL,
     27				      DEFAULT_RATELIMIT_BURST);
     28	if (__ratelimit(&vringh_rs)) {
     29		va_list ap;
     30		va_start(ap, fmt);
     31		printk(KERN_NOTICE "vringh:");
     32		vprintk(fmt, ap);
     33		va_end(ap);
     34	}
     35}
     36
     37/* Returns vring->num if empty, -ve on error. */
     38static inline int __vringh_get_head(const struct vringh *vrh,
     39				    int (*getu16)(const struct vringh *vrh,
     40						  u16 *val, const __virtio16 *p),
     41				    u16 *last_avail_idx)
     42{
     43	u16 avail_idx, i, head;
     44	int err;
     45
     46	err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx);
     47	if (err) {
     48		vringh_bad("Failed to access avail idx at %p",
     49			   &vrh->vring.avail->idx);
     50		return err;
     51	}
     52
     53	if (*last_avail_idx == avail_idx)
     54		return vrh->vring.num;
     55
     56	/* Only get avail ring entries after they have been exposed by guest. */
     57	virtio_rmb(vrh->weak_barriers);
     58
     59	i = *last_avail_idx & (vrh->vring.num - 1);
     60
     61	err = getu16(vrh, &head, &vrh->vring.avail->ring[i]);
     62	if (err) {
     63		vringh_bad("Failed to read head: idx %d address %p",
     64			   *last_avail_idx, &vrh->vring.avail->ring[i]);
     65		return err;
     66	}
     67
     68	if (head >= vrh->vring.num) {
     69		vringh_bad("Guest says index %u > %u is available",
     70			   head, vrh->vring.num);
     71		return -EINVAL;
     72	}
     73
     74	(*last_avail_idx)++;
     75	return head;
     76}
     77
     78/**
     79 * vringh_kiov_advance - skip bytes from vring_kiov
     80 * @iov: an iov passed to vringh_getdesc_*() (updated as we consume)
     81 * @len: the maximum length to advance
     82 */
     83void vringh_kiov_advance(struct vringh_kiov *iov, size_t len)
     84{
     85	while (len && iov->i < iov->used) {
     86		size_t partlen = min(iov->iov[iov->i].iov_len, len);
     87
     88		iov->consumed += partlen;
     89		iov->iov[iov->i].iov_len -= partlen;
     90		iov->iov[iov->i].iov_base += partlen;
     91
     92		if (!iov->iov[iov->i].iov_len) {
     93			/* Fix up old iov element then increment. */
     94			iov->iov[iov->i].iov_len = iov->consumed;
     95			iov->iov[iov->i].iov_base -= iov->consumed;
     96
     97			iov->consumed = 0;
     98			iov->i++;
     99		}
    100
    101		len -= partlen;
    102	}
    103}
    104EXPORT_SYMBOL(vringh_kiov_advance);
    105
    106/* Copy some bytes to/from the iovec.  Returns num copied. */
    107static inline ssize_t vringh_iov_xfer(struct vringh *vrh,
    108				      struct vringh_kiov *iov,
    109				      void *ptr, size_t len,
    110				      int (*xfer)(const struct vringh *vrh,
    111						  void *addr, void *ptr,
    112						  size_t len))
    113{
    114	int err, done = 0;
    115
    116	while (len && iov->i < iov->used) {
    117		size_t partlen;
    118
    119		partlen = min(iov->iov[iov->i].iov_len, len);
    120		err = xfer(vrh, iov->iov[iov->i].iov_base, ptr, partlen);
    121		if (err)
    122			return err;
    123		done += partlen;
    124		len -= partlen;
    125		ptr += partlen;
    126
    127		vringh_kiov_advance(iov, partlen);
    128	}
    129	return done;
    130}
    131
    132/* May reduce *len if range is shorter. */
    133static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len,
    134			       struct vringh_range *range,
    135			       bool (*getrange)(struct vringh *,
    136						u64, struct vringh_range *))
    137{
    138	if (addr < range->start || addr > range->end_incl) {
    139		if (!getrange(vrh, addr, range))
    140			return false;
    141	}
    142	BUG_ON(addr < range->start || addr > range->end_incl);
    143
    144	/* To end of memory? */
    145	if (unlikely(addr + *len == 0)) {
    146		if (range->end_incl == -1ULL)
    147			return true;
    148		goto truncate;
    149	}
    150
    151	/* Otherwise, don't wrap. */
    152	if (addr + *len < addr) {
    153		vringh_bad("Wrapping descriptor %zu@0x%llx",
    154			   *len, (unsigned long long)addr);
    155		return false;
    156	}
    157
    158	if (unlikely(addr + *len - 1 > range->end_incl))
    159		goto truncate;
    160	return true;
    161
    162truncate:
    163	*len = range->end_incl + 1 - addr;
    164	return true;
    165}
    166
    167static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len,
    168				  struct vringh_range *range,
    169				  bool (*getrange)(struct vringh *,
    170						   u64, struct vringh_range *))
    171{
    172	return true;
    173}
    174
    175/* No reason for this code to be inline. */
    176static int move_to_indirect(const struct vringh *vrh,
    177			    int *up_next, u16 *i, void *addr,
    178			    const struct vring_desc *desc,
    179			    struct vring_desc **descs, int *desc_max)
    180{
    181	u32 len;
    182
    183	/* Indirect tables can't have indirect. */
    184	if (*up_next != -1) {
    185		vringh_bad("Multilevel indirect %u->%u", *up_next, *i);
    186		return -EINVAL;
    187	}
    188
    189	len = vringh32_to_cpu(vrh, desc->len);
    190	if (unlikely(len % sizeof(struct vring_desc))) {
    191		vringh_bad("Strange indirect len %u", desc->len);
    192		return -EINVAL;
    193	}
    194
    195	/* We will check this when we follow it! */
    196	if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT))
    197		*up_next = vringh16_to_cpu(vrh, desc->next);
    198	else
    199		*up_next = -2;
    200	*descs = addr;
    201	*desc_max = len / sizeof(struct vring_desc);
    202
    203	/* Now, start at the first indirect. */
    204	*i = 0;
    205	return 0;
    206}
    207
    208static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp)
    209{
    210	struct kvec *new;
    211	unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2;
    212
    213	if (new_num < 8)
    214		new_num = 8;
    215
    216	flag = (iov->max_num & VRINGH_IOV_ALLOCATED);
    217	if (flag)
    218		new = krealloc_array(iov->iov, new_num,
    219				     sizeof(struct iovec), gfp);
    220	else {
    221		new = kmalloc_array(new_num, sizeof(struct iovec), gfp);
    222		if (new) {
    223			memcpy(new, iov->iov,
    224			       iov->max_num * sizeof(struct iovec));
    225			flag = VRINGH_IOV_ALLOCATED;
    226		}
    227	}
    228	if (!new)
    229		return -ENOMEM;
    230	iov->iov = new;
    231	iov->max_num = (new_num | flag);
    232	return 0;
    233}
    234
    235static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next,
    236				       struct vring_desc **descs, int *desc_max)
    237{
    238	u16 i = *up_next;
    239
    240	*up_next = -1;
    241	*descs = vrh->vring.desc;
    242	*desc_max = vrh->vring.num;
    243	return i;
    244}
    245
    246static int slow_copy(struct vringh *vrh, void *dst, const void *src,
    247		     bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
    248				    struct vringh_range *range,
    249				    bool (*getrange)(struct vringh *vrh,
    250						     u64,
    251						     struct vringh_range *)),
    252		     bool (*getrange)(struct vringh *vrh,
    253				      u64 addr,
    254				      struct vringh_range *r),
    255		     struct vringh_range *range,
    256		     int (*copy)(const struct vringh *vrh,
    257				 void *dst, const void *src, size_t len))
    258{
    259	size_t part, len = sizeof(struct vring_desc);
    260
    261	do {
    262		u64 addr;
    263		int err;
    264
    265		part = len;
    266		addr = (u64)(unsigned long)src - range->offset;
    267
    268		if (!rcheck(vrh, addr, &part, range, getrange))
    269			return -EINVAL;
    270
    271		err = copy(vrh, dst, src, part);
    272		if (err)
    273			return err;
    274
    275		dst += part;
    276		src += part;
    277		len -= part;
    278	} while (len);
    279	return 0;
    280}
    281
    282static inline int
    283__vringh_iov(struct vringh *vrh, u16 i,
    284	     struct vringh_kiov *riov,
    285	     struct vringh_kiov *wiov,
    286	     bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
    287			    struct vringh_range *range,
    288			    bool (*getrange)(struct vringh *, u64,
    289					     struct vringh_range *)),
    290	     bool (*getrange)(struct vringh *, u64, struct vringh_range *),
    291	     gfp_t gfp,
    292	     int (*copy)(const struct vringh *vrh,
    293			 void *dst, const void *src, size_t len))
    294{
    295	int err, count = 0, indirect_count = 0, up_next, desc_max;
    296	struct vring_desc desc, *descs;
    297	struct vringh_range range = { -1ULL, 0 }, slowrange;
    298	bool slow = false;
    299
    300	/* We start traversing vring's descriptor table. */
    301	descs = vrh->vring.desc;
    302	desc_max = vrh->vring.num;
    303	up_next = -1;
    304
    305	/* You must want something! */
    306	if (WARN_ON(!riov && !wiov))
    307		return -EINVAL;
    308
    309	if (riov)
    310		riov->i = riov->used = riov->consumed = 0;
    311	if (wiov)
    312		wiov->i = wiov->used = wiov->consumed = 0;
    313
    314	for (;;) {
    315		void *addr;
    316		struct vringh_kiov *iov;
    317		size_t len;
    318
    319		if (unlikely(slow))
    320			err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange,
    321					&slowrange, copy);
    322		else
    323			err = copy(vrh, &desc, &descs[i], sizeof(desc));
    324		if (unlikely(err))
    325			goto fail;
    326
    327		if (unlikely(desc.flags &
    328			     cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) {
    329			u64 a = vringh64_to_cpu(vrh, desc.addr);
    330
    331			/* Make sure it's OK, and get offset. */
    332			len = vringh32_to_cpu(vrh, desc.len);
    333			if (!rcheck(vrh, a, &len, &range, getrange)) {
    334				err = -EINVAL;
    335				goto fail;
    336			}
    337
    338			if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
    339				slow = true;
    340				/* We need to save this range to use offset */
    341				slowrange = range;
    342			}
    343
    344			addr = (void *)(long)(a + range.offset);
    345			err = move_to_indirect(vrh, &up_next, &i, addr, &desc,
    346					       &descs, &desc_max);
    347			if (err)
    348				goto fail;
    349			continue;
    350		}
    351
    352		if (up_next == -1)
    353			count++;
    354		else
    355			indirect_count++;
    356
    357		if (count > vrh->vring.num || indirect_count > desc_max) {
    358			vringh_bad("Descriptor loop in %p", descs);
    359			err = -ELOOP;
    360			goto fail;
    361		}
    362
    363		if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE))
    364			iov = wiov;
    365		else {
    366			iov = riov;
    367			if (unlikely(wiov && wiov->used)) {
    368				vringh_bad("Readable desc %p after writable",
    369					   &descs[i]);
    370				err = -EINVAL;
    371				goto fail;
    372			}
    373		}
    374
    375		if (!iov) {
    376			vringh_bad("Unexpected %s desc",
    377				   !wiov ? "writable" : "readable");
    378			err = -EPROTO;
    379			goto fail;
    380		}
    381
    382	again:
    383		/* Make sure it's OK, and get offset. */
    384		len = vringh32_to_cpu(vrh, desc.len);
    385		if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range,
    386			    getrange)) {
    387			err = -EINVAL;
    388			goto fail;
    389		}
    390		addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) +
    391					       range.offset);
    392
    393		if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) {
    394			err = resize_iovec(iov, gfp);
    395			if (err)
    396				goto fail;
    397		}
    398
    399		iov->iov[iov->used].iov_base = addr;
    400		iov->iov[iov->used].iov_len = len;
    401		iov->used++;
    402
    403		if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
    404			desc.len = cpu_to_vringh32(vrh,
    405				   vringh32_to_cpu(vrh, desc.len) - len);
    406			desc.addr = cpu_to_vringh64(vrh,
    407				    vringh64_to_cpu(vrh, desc.addr) + len);
    408			goto again;
    409		}
    410
    411		if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) {
    412			i = vringh16_to_cpu(vrh, desc.next);
    413		} else {
    414			/* Just in case we need to finish traversing above. */
    415			if (unlikely(up_next > 0)) {
    416				i = return_from_indirect(vrh, &up_next,
    417							 &descs, &desc_max);
    418				slow = false;
    419				indirect_count = 0;
    420			} else
    421				break;
    422		}
    423
    424		if (i >= desc_max) {
    425			vringh_bad("Chained index %u > %u", i, desc_max);
    426			err = -EINVAL;
    427			goto fail;
    428		}
    429	}
    430
    431	return 0;
    432
    433fail:
    434	return err;
    435}
    436
    437static inline int __vringh_complete(struct vringh *vrh,
    438				    const struct vring_used_elem *used,
    439				    unsigned int num_used,
    440				    int (*putu16)(const struct vringh *vrh,
    441						  __virtio16 *p, u16 val),
    442				    int (*putused)(const struct vringh *vrh,
    443						   struct vring_used_elem *dst,
    444						   const struct vring_used_elem
    445						   *src, unsigned num))
    446{
    447	struct vring_used *used_ring;
    448	int err;
    449	u16 used_idx, off;
    450
    451	used_ring = vrh->vring.used;
    452	used_idx = vrh->last_used_idx + vrh->completed;
    453
    454	off = used_idx % vrh->vring.num;
    455
    456	/* Compiler knows num_used == 1 sometimes, hence extra check */
    457	if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) {
    458		u16 part = vrh->vring.num - off;
    459		err = putused(vrh, &used_ring->ring[off], used, part);
    460		if (!err)
    461			err = putused(vrh, &used_ring->ring[0], used + part,
    462				      num_used - part);
    463	} else
    464		err = putused(vrh, &used_ring->ring[off], used, num_used);
    465
    466	if (err) {
    467		vringh_bad("Failed to write %u used entries %u at %p",
    468			   num_used, off, &used_ring->ring[off]);
    469		return err;
    470	}
    471
    472	/* Make sure buffer is written before we update index. */
    473	virtio_wmb(vrh->weak_barriers);
    474
    475	err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used);
    476	if (err) {
    477		vringh_bad("Failed to update used index at %p",
    478			   &vrh->vring.used->idx);
    479		return err;
    480	}
    481
    482	vrh->completed += num_used;
    483	return 0;
    484}
    485
    486
    487static inline int __vringh_need_notify(struct vringh *vrh,
    488				       int (*getu16)(const struct vringh *vrh,
    489						     u16 *val,
    490						     const __virtio16 *p))
    491{
    492	bool notify;
    493	u16 used_event;
    494	int err;
    495
    496	/* Flush out used index update. This is paired with the
    497	 * barrier that the Guest executes when enabling
    498	 * interrupts. */
    499	virtio_mb(vrh->weak_barriers);
    500
    501	/* Old-style, without event indices. */
    502	if (!vrh->event_indices) {
    503		u16 flags;
    504		err = getu16(vrh, &flags, &vrh->vring.avail->flags);
    505		if (err) {
    506			vringh_bad("Failed to get flags at %p",
    507				   &vrh->vring.avail->flags);
    508			return err;
    509		}
    510		return (!(flags & VRING_AVAIL_F_NO_INTERRUPT));
    511	}
    512
    513	/* Modern: we know when other side wants to know. */
    514	err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring));
    515	if (err) {
    516		vringh_bad("Failed to get used event idx at %p",
    517			   &vring_used_event(&vrh->vring));
    518		return err;
    519	}
    520
    521	/* Just in case we added so many that we wrap. */
    522	if (unlikely(vrh->completed > 0xffff))
    523		notify = true;
    524	else
    525		notify = vring_need_event(used_event,
    526					  vrh->last_used_idx + vrh->completed,
    527					  vrh->last_used_idx);
    528
    529	vrh->last_used_idx += vrh->completed;
    530	vrh->completed = 0;
    531	return notify;
    532}
    533
    534static inline bool __vringh_notify_enable(struct vringh *vrh,
    535					  int (*getu16)(const struct vringh *vrh,
    536							u16 *val, const __virtio16 *p),
    537					  int (*putu16)(const struct vringh *vrh,
    538							__virtio16 *p, u16 val))
    539{
    540	u16 avail;
    541
    542	if (!vrh->event_indices) {
    543		/* Old-school; update flags. */
    544		if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) {
    545			vringh_bad("Clearing used flags %p",
    546				   &vrh->vring.used->flags);
    547			return true;
    548		}
    549	} else {
    550		if (putu16(vrh, &vring_avail_event(&vrh->vring),
    551			   vrh->last_avail_idx) != 0) {
    552			vringh_bad("Updating avail event index %p",
    553				   &vring_avail_event(&vrh->vring));
    554			return true;
    555		}
    556	}
    557
    558	/* They could have slipped one in as we were doing that: make
    559	 * sure it's written, then check again. */
    560	virtio_mb(vrh->weak_barriers);
    561
    562	if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) {
    563		vringh_bad("Failed to check avail idx at %p",
    564			   &vrh->vring.avail->idx);
    565		return true;
    566	}
    567
    568	/* This is unlikely, so we just leave notifications enabled
    569	 * (if we're using event_indices, we'll only get one
    570	 * notification anyway). */
    571	return avail == vrh->last_avail_idx;
    572}
    573
    574static inline void __vringh_notify_disable(struct vringh *vrh,
    575					   int (*putu16)(const struct vringh *vrh,
    576							 __virtio16 *p, u16 val))
    577{
    578	if (!vrh->event_indices) {
    579		/* Old-school; update flags. */
    580		if (putu16(vrh, &vrh->vring.used->flags,
    581			   VRING_USED_F_NO_NOTIFY)) {
    582			vringh_bad("Setting used flags %p",
    583				   &vrh->vring.used->flags);
    584		}
    585	}
    586}
    587
    588/* Userspace access helpers: in this case, addresses are really userspace. */
    589static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
    590{
    591	__virtio16 v = 0;
    592	int rc = get_user(v, (__force __virtio16 __user *)p);
    593	*val = vringh16_to_cpu(vrh, v);
    594	return rc;
    595}
    596
    597static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val)
    598{
    599	__virtio16 v = cpu_to_vringh16(vrh, val);
    600	return put_user(v, (__force __virtio16 __user *)p);
    601}
    602
    603static inline int copydesc_user(const struct vringh *vrh,
    604				void *dst, const void *src, size_t len)
    605{
    606	return copy_from_user(dst, (__force void __user *)src, len) ?
    607		-EFAULT : 0;
    608}
    609
    610static inline int putused_user(const struct vringh *vrh,
    611			       struct vring_used_elem *dst,
    612			       const struct vring_used_elem *src,
    613			       unsigned int num)
    614{
    615	return copy_to_user((__force void __user *)dst, src,
    616			    sizeof(*dst) * num) ? -EFAULT : 0;
    617}
    618
    619static inline int xfer_from_user(const struct vringh *vrh, void *src,
    620				 void *dst, size_t len)
    621{
    622	return copy_from_user(dst, (__force void __user *)src, len) ?
    623		-EFAULT : 0;
    624}
    625
    626static inline int xfer_to_user(const struct vringh *vrh,
    627			       void *dst, void *src, size_t len)
    628{
    629	return copy_to_user((__force void __user *)dst, src, len) ?
    630		-EFAULT : 0;
    631}
    632
    633/**
    634 * vringh_init_user - initialize a vringh for a userspace vring.
    635 * @vrh: the vringh to initialize.
    636 * @features: the feature bits for this ring.
    637 * @num: the number of elements.
    638 * @weak_barriers: true if we only need memory barriers, not I/O.
    639 * @desc: the userpace descriptor pointer.
    640 * @avail: the userpace avail pointer.
    641 * @used: the userpace used pointer.
    642 *
    643 * Returns an error if num is invalid: you should check pointers
    644 * yourself!
    645 */
    646int vringh_init_user(struct vringh *vrh, u64 features,
    647		     unsigned int num, bool weak_barriers,
    648		     vring_desc_t __user *desc,
    649		     vring_avail_t __user *avail,
    650		     vring_used_t __user *used)
    651{
    652	/* Sane power of 2 please! */
    653	if (!num || num > 0xffff || (num & (num - 1))) {
    654		vringh_bad("Bad ring size %u", num);
    655		return -EINVAL;
    656	}
    657
    658	vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
    659	vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
    660	vrh->weak_barriers = weak_barriers;
    661	vrh->completed = 0;
    662	vrh->last_avail_idx = 0;
    663	vrh->last_used_idx = 0;
    664	vrh->vring.num = num;
    665	/* vring expects kernel addresses, but only used via accessors. */
    666	vrh->vring.desc = (__force struct vring_desc *)desc;
    667	vrh->vring.avail = (__force struct vring_avail *)avail;
    668	vrh->vring.used = (__force struct vring_used *)used;
    669	return 0;
    670}
    671EXPORT_SYMBOL(vringh_init_user);
    672
    673/**
    674 * vringh_getdesc_user - get next available descriptor from userspace ring.
    675 * @vrh: the userspace vring.
    676 * @riov: where to put the readable descriptors (or NULL)
    677 * @wiov: where to put the writable descriptors (or NULL)
    678 * @getrange: function to call to check ranges.
    679 * @head: head index we received, for passing to vringh_complete_user().
    680 *
    681 * Returns 0 if there was no descriptor, 1 if there was, or -errno.
    682 *
    683 * Note that on error return, you can tell the difference between an
    684 * invalid ring and a single invalid descriptor: in the former case,
    685 * *head will be vrh->vring.num.  You may be able to ignore an invalid
    686 * descriptor, but there's not much you can do with an invalid ring.
    687 *
    688 * Note that you can reuse riov and wiov with subsequent calls. Content is
    689 * overwritten and memory reallocated if more space is needed.
    690 * When you don't have to use riov and wiov anymore, you should clean up them
    691 * calling vringh_iov_cleanup() to release the memory, even on error!
    692 */
    693int vringh_getdesc_user(struct vringh *vrh,
    694			struct vringh_iov *riov,
    695			struct vringh_iov *wiov,
    696			bool (*getrange)(struct vringh *vrh,
    697					 u64 addr, struct vringh_range *r),
    698			u16 *head)
    699{
    700	int err;
    701
    702	*head = vrh->vring.num;
    703	err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx);
    704	if (err < 0)
    705		return err;
    706
    707	/* Empty... */
    708	if (err == vrh->vring.num)
    709		return 0;
    710
    711	/* We need the layouts to be the identical for this to work */
    712	BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov));
    713	BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) !=
    714		     offsetof(struct vringh_iov, iov));
    715	BUILD_BUG_ON(offsetof(struct vringh_kiov, i) !=
    716		     offsetof(struct vringh_iov, i));
    717	BUILD_BUG_ON(offsetof(struct vringh_kiov, used) !=
    718		     offsetof(struct vringh_iov, used));
    719	BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) !=
    720		     offsetof(struct vringh_iov, max_num));
    721	BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
    722	BUILD_BUG_ON(offsetof(struct iovec, iov_base) !=
    723		     offsetof(struct kvec, iov_base));
    724	BUILD_BUG_ON(offsetof(struct iovec, iov_len) !=
    725		     offsetof(struct kvec, iov_len));
    726	BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base)
    727		     != sizeof(((struct kvec *)NULL)->iov_base));
    728	BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len)
    729		     != sizeof(((struct kvec *)NULL)->iov_len));
    730
    731	*head = err;
    732	err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov,
    733			   (struct vringh_kiov *)wiov,
    734			   range_check, getrange, GFP_KERNEL, copydesc_user);
    735	if (err)
    736		return err;
    737
    738	return 1;
    739}
    740EXPORT_SYMBOL(vringh_getdesc_user);
    741
    742/**
    743 * vringh_iov_pull_user - copy bytes from vring_iov.
    744 * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume)
    745 * @dst: the place to copy.
    746 * @len: the maximum length to copy.
    747 *
    748 * Returns the bytes copied <= len or a negative errno.
    749 */
    750ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len)
    751{
    752	return vringh_iov_xfer(NULL, (struct vringh_kiov *)riov,
    753			       dst, len, xfer_from_user);
    754}
    755EXPORT_SYMBOL(vringh_iov_pull_user);
    756
    757/**
    758 * vringh_iov_push_user - copy bytes into vring_iov.
    759 * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume)
    760 * @src: the place to copy from.
    761 * @len: the maximum length to copy.
    762 *
    763 * Returns the bytes copied <= len or a negative errno.
    764 */
    765ssize_t vringh_iov_push_user(struct vringh_iov *wiov,
    766			     const void *src, size_t len)
    767{
    768	return vringh_iov_xfer(NULL, (struct vringh_kiov *)wiov,
    769			       (void *)src, len, xfer_to_user);
    770}
    771EXPORT_SYMBOL(vringh_iov_push_user);
    772
    773/**
    774 * vringh_abandon_user - we've decided not to handle the descriptor(s).
    775 * @vrh: the vring.
    776 * @num: the number of descriptors to put back (ie. num
    777 *	 vringh_get_user() to undo).
    778 *
    779 * The next vringh_get_user() will return the old descriptor(s) again.
    780 */
    781void vringh_abandon_user(struct vringh *vrh, unsigned int num)
    782{
    783	/* We only update vring_avail_event(vr) when we want to be notified,
    784	 * so we haven't changed that yet. */
    785	vrh->last_avail_idx -= num;
    786}
    787EXPORT_SYMBOL(vringh_abandon_user);
    788
    789/**
    790 * vringh_complete_user - we've finished with descriptor, publish it.
    791 * @vrh: the vring.
    792 * @head: the head as filled in by vringh_getdesc_user.
    793 * @len: the length of data we have written.
    794 *
    795 * You should check vringh_need_notify_user() after one or more calls
    796 * to this function.
    797 */
    798int vringh_complete_user(struct vringh *vrh, u16 head, u32 len)
    799{
    800	struct vring_used_elem used;
    801
    802	used.id = cpu_to_vringh32(vrh, head);
    803	used.len = cpu_to_vringh32(vrh, len);
    804	return __vringh_complete(vrh, &used, 1, putu16_user, putused_user);
    805}
    806EXPORT_SYMBOL(vringh_complete_user);
    807
    808/**
    809 * vringh_complete_multi_user - we've finished with many descriptors.
    810 * @vrh: the vring.
    811 * @used: the head, length pairs.
    812 * @num_used: the number of used elements.
    813 *
    814 * You should check vringh_need_notify_user() after one or more calls
    815 * to this function.
    816 */
    817int vringh_complete_multi_user(struct vringh *vrh,
    818			       const struct vring_used_elem used[],
    819			       unsigned num_used)
    820{
    821	return __vringh_complete(vrh, used, num_used,
    822				 putu16_user, putused_user);
    823}
    824EXPORT_SYMBOL(vringh_complete_multi_user);
    825
    826/**
    827 * vringh_notify_enable_user - we want to know if something changes.
    828 * @vrh: the vring.
    829 *
    830 * This always enables notifications, but returns false if there are
    831 * now more buffers available in the vring.
    832 */
    833bool vringh_notify_enable_user(struct vringh *vrh)
    834{
    835	return __vringh_notify_enable(vrh, getu16_user, putu16_user);
    836}
    837EXPORT_SYMBOL(vringh_notify_enable_user);
    838
    839/**
    840 * vringh_notify_disable_user - don't tell us if something changes.
    841 * @vrh: the vring.
    842 *
    843 * This is our normal running state: we disable and then only enable when
    844 * we're going to sleep.
    845 */
    846void vringh_notify_disable_user(struct vringh *vrh)
    847{
    848	__vringh_notify_disable(vrh, putu16_user);
    849}
    850EXPORT_SYMBOL(vringh_notify_disable_user);
    851
    852/**
    853 * vringh_need_notify_user - must we tell the other side about used buffers?
    854 * @vrh: the vring we've called vringh_complete_user() on.
    855 *
    856 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
    857 */
    858int vringh_need_notify_user(struct vringh *vrh)
    859{
    860	return __vringh_need_notify(vrh, getu16_user);
    861}
    862EXPORT_SYMBOL(vringh_need_notify_user);
    863
    864/* Kernelspace access helpers. */
    865static inline int getu16_kern(const struct vringh *vrh,
    866			      u16 *val, const __virtio16 *p)
    867{
    868	*val = vringh16_to_cpu(vrh, READ_ONCE(*p));
    869	return 0;
    870}
    871
    872static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val)
    873{
    874	WRITE_ONCE(*p, cpu_to_vringh16(vrh, val));
    875	return 0;
    876}
    877
    878static inline int copydesc_kern(const struct vringh *vrh,
    879				void *dst, const void *src, size_t len)
    880{
    881	memcpy(dst, src, len);
    882	return 0;
    883}
    884
    885static inline int putused_kern(const struct vringh *vrh,
    886			       struct vring_used_elem *dst,
    887			       const struct vring_used_elem *src,
    888			       unsigned int num)
    889{
    890	memcpy(dst, src, num * sizeof(*dst));
    891	return 0;
    892}
    893
    894static inline int xfer_kern(const struct vringh *vrh, void *src,
    895			    void *dst, size_t len)
    896{
    897	memcpy(dst, src, len);
    898	return 0;
    899}
    900
    901static inline int kern_xfer(const struct vringh *vrh, void *dst,
    902			    void *src, size_t len)
    903{
    904	memcpy(dst, src, len);
    905	return 0;
    906}
    907
    908/**
    909 * vringh_init_kern - initialize a vringh for a kernelspace vring.
    910 * @vrh: the vringh to initialize.
    911 * @features: the feature bits for this ring.
    912 * @num: the number of elements.
    913 * @weak_barriers: true if we only need memory barriers, not I/O.
    914 * @desc: the userpace descriptor pointer.
    915 * @avail: the userpace avail pointer.
    916 * @used: the userpace used pointer.
    917 *
    918 * Returns an error if num is invalid.
    919 */
    920int vringh_init_kern(struct vringh *vrh, u64 features,
    921		     unsigned int num, bool weak_barriers,
    922		     struct vring_desc *desc,
    923		     struct vring_avail *avail,
    924		     struct vring_used *used)
    925{
    926	/* Sane power of 2 please! */
    927	if (!num || num > 0xffff || (num & (num - 1))) {
    928		vringh_bad("Bad ring size %u", num);
    929		return -EINVAL;
    930	}
    931
    932	vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
    933	vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
    934	vrh->weak_barriers = weak_barriers;
    935	vrh->completed = 0;
    936	vrh->last_avail_idx = 0;
    937	vrh->last_used_idx = 0;
    938	vrh->vring.num = num;
    939	vrh->vring.desc = desc;
    940	vrh->vring.avail = avail;
    941	vrh->vring.used = used;
    942	return 0;
    943}
    944EXPORT_SYMBOL(vringh_init_kern);
    945
    946/**
    947 * vringh_getdesc_kern - get next available descriptor from kernelspace ring.
    948 * @vrh: the kernelspace vring.
    949 * @riov: where to put the readable descriptors (or NULL)
    950 * @wiov: where to put the writable descriptors (or NULL)
    951 * @head: head index we received, for passing to vringh_complete_kern().
    952 * @gfp: flags for allocating larger riov/wiov.
    953 *
    954 * Returns 0 if there was no descriptor, 1 if there was, or -errno.
    955 *
    956 * Note that on error return, you can tell the difference between an
    957 * invalid ring and a single invalid descriptor: in the former case,
    958 * *head will be vrh->vring.num.  You may be able to ignore an invalid
    959 * descriptor, but there's not much you can do with an invalid ring.
    960 *
    961 * Note that you can reuse riov and wiov with subsequent calls. Content is
    962 * overwritten and memory reallocated if more space is needed.
    963 * When you don't have to use riov and wiov anymore, you should clean up them
    964 * calling vringh_kiov_cleanup() to release the memory, even on error!
    965 */
    966int vringh_getdesc_kern(struct vringh *vrh,
    967			struct vringh_kiov *riov,
    968			struct vringh_kiov *wiov,
    969			u16 *head,
    970			gfp_t gfp)
    971{
    972	int err;
    973
    974	err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx);
    975	if (err < 0)
    976		return err;
    977
    978	/* Empty... */
    979	if (err == vrh->vring.num)
    980		return 0;
    981
    982	*head = err;
    983	err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL,
    984			   gfp, copydesc_kern);
    985	if (err)
    986		return err;
    987
    988	return 1;
    989}
    990EXPORT_SYMBOL(vringh_getdesc_kern);
    991
    992/**
    993 * vringh_iov_pull_kern - copy bytes from vring_iov.
    994 * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume)
    995 * @dst: the place to copy.
    996 * @len: the maximum length to copy.
    997 *
    998 * Returns the bytes copied <= len or a negative errno.
    999 */
   1000ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len)
   1001{
   1002	return vringh_iov_xfer(NULL, riov, dst, len, xfer_kern);
   1003}
   1004EXPORT_SYMBOL(vringh_iov_pull_kern);
   1005
   1006/**
   1007 * vringh_iov_push_kern - copy bytes into vring_iov.
   1008 * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume)
   1009 * @src: the place to copy from.
   1010 * @len: the maximum length to copy.
   1011 *
   1012 * Returns the bytes copied <= len or a negative errno.
   1013 */
   1014ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov,
   1015			     const void *src, size_t len)
   1016{
   1017	return vringh_iov_xfer(NULL, wiov, (void *)src, len, kern_xfer);
   1018}
   1019EXPORT_SYMBOL(vringh_iov_push_kern);
   1020
   1021/**
   1022 * vringh_abandon_kern - we've decided not to handle the descriptor(s).
   1023 * @vrh: the vring.
   1024 * @num: the number of descriptors to put back (ie. num
   1025 *	 vringh_get_kern() to undo).
   1026 *
   1027 * The next vringh_get_kern() will return the old descriptor(s) again.
   1028 */
   1029void vringh_abandon_kern(struct vringh *vrh, unsigned int num)
   1030{
   1031	/* We only update vring_avail_event(vr) when we want to be notified,
   1032	 * so we haven't changed that yet. */
   1033	vrh->last_avail_idx -= num;
   1034}
   1035EXPORT_SYMBOL(vringh_abandon_kern);
   1036
   1037/**
   1038 * vringh_complete_kern - we've finished with descriptor, publish it.
   1039 * @vrh: the vring.
   1040 * @head: the head as filled in by vringh_getdesc_kern.
   1041 * @len: the length of data we have written.
   1042 *
   1043 * You should check vringh_need_notify_kern() after one or more calls
   1044 * to this function.
   1045 */
   1046int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len)
   1047{
   1048	struct vring_used_elem used;
   1049
   1050	used.id = cpu_to_vringh32(vrh, head);
   1051	used.len = cpu_to_vringh32(vrh, len);
   1052
   1053	return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern);
   1054}
   1055EXPORT_SYMBOL(vringh_complete_kern);
   1056
   1057/**
   1058 * vringh_notify_enable_kern - we want to know if something changes.
   1059 * @vrh: the vring.
   1060 *
   1061 * This always enables notifications, but returns false if there are
   1062 * now more buffers available in the vring.
   1063 */
   1064bool vringh_notify_enable_kern(struct vringh *vrh)
   1065{
   1066	return __vringh_notify_enable(vrh, getu16_kern, putu16_kern);
   1067}
   1068EXPORT_SYMBOL(vringh_notify_enable_kern);
   1069
   1070/**
   1071 * vringh_notify_disable_kern - don't tell us if something changes.
   1072 * @vrh: the vring.
   1073 *
   1074 * This is our normal running state: we disable and then only enable when
   1075 * we're going to sleep.
   1076 */
   1077void vringh_notify_disable_kern(struct vringh *vrh)
   1078{
   1079	__vringh_notify_disable(vrh, putu16_kern);
   1080}
   1081EXPORT_SYMBOL(vringh_notify_disable_kern);
   1082
   1083/**
   1084 * vringh_need_notify_kern - must we tell the other side about used buffers?
   1085 * @vrh: the vring we've called vringh_complete_kern() on.
   1086 *
   1087 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
   1088 */
   1089int vringh_need_notify_kern(struct vringh *vrh)
   1090{
   1091	return __vringh_need_notify(vrh, getu16_kern);
   1092}
   1093EXPORT_SYMBOL(vringh_need_notify_kern);
   1094
   1095#if IS_REACHABLE(CONFIG_VHOST_IOTLB)
   1096
   1097static int iotlb_translate(const struct vringh *vrh,
   1098			   u64 addr, u64 len, struct bio_vec iov[],
   1099			   int iov_size, u32 perm)
   1100{
   1101	struct vhost_iotlb_map *map;
   1102	struct vhost_iotlb *iotlb = vrh->iotlb;
   1103	int ret = 0;
   1104	u64 s = 0;
   1105
   1106	spin_lock(vrh->iotlb_lock);
   1107
   1108	while (len > s) {
   1109		u64 size, pa, pfn;
   1110
   1111		if (unlikely(ret >= iov_size)) {
   1112			ret = -ENOBUFS;
   1113			break;
   1114		}
   1115
   1116		map = vhost_iotlb_itree_first(iotlb, addr,
   1117					      addr + len - 1);
   1118		if (!map || map->start > addr) {
   1119			ret = -EINVAL;
   1120			break;
   1121		} else if (!(map->perm & perm)) {
   1122			ret = -EPERM;
   1123			break;
   1124		}
   1125
   1126		size = map->size - addr + map->start;
   1127		pa = map->addr + addr - map->start;
   1128		pfn = pa >> PAGE_SHIFT;
   1129		iov[ret].bv_page = pfn_to_page(pfn);
   1130		iov[ret].bv_len = min(len - s, size);
   1131		iov[ret].bv_offset = pa & (PAGE_SIZE - 1);
   1132		s += size;
   1133		addr += size;
   1134		++ret;
   1135	}
   1136
   1137	spin_unlock(vrh->iotlb_lock);
   1138
   1139	return ret;
   1140}
   1141
   1142static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
   1143				  void *src, size_t len)
   1144{
   1145	struct iov_iter iter;
   1146	struct bio_vec iov[16];
   1147	int ret;
   1148
   1149	ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
   1150			      len, iov, 16, VHOST_MAP_RO);
   1151	if (ret < 0)
   1152		return ret;
   1153
   1154	iov_iter_bvec(&iter, READ, iov, ret, len);
   1155
   1156	ret = copy_from_iter(dst, len, &iter);
   1157
   1158	return ret;
   1159}
   1160
   1161static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
   1162				void *src, size_t len)
   1163{
   1164	struct iov_iter iter;
   1165	struct bio_vec iov[16];
   1166	int ret;
   1167
   1168	ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
   1169			      len, iov, 16, VHOST_MAP_WO);
   1170	if (ret < 0)
   1171		return ret;
   1172
   1173	iov_iter_bvec(&iter, WRITE, iov, ret, len);
   1174
   1175	return copy_to_iter(src, len, &iter);
   1176}
   1177
   1178static inline int getu16_iotlb(const struct vringh *vrh,
   1179			       u16 *val, const __virtio16 *p)
   1180{
   1181	struct bio_vec iov;
   1182	void *kaddr, *from;
   1183	int ret;
   1184
   1185	/* Atomic read is needed for getu16 */
   1186	ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
   1187			      &iov, 1, VHOST_MAP_RO);
   1188	if (ret < 0)
   1189		return ret;
   1190
   1191	kaddr = kmap_atomic(iov.bv_page);
   1192	from = kaddr + iov.bv_offset;
   1193	*val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
   1194	kunmap_atomic(kaddr);
   1195
   1196	return 0;
   1197}
   1198
   1199static inline int putu16_iotlb(const struct vringh *vrh,
   1200			       __virtio16 *p, u16 val)
   1201{
   1202	struct bio_vec iov;
   1203	void *kaddr, *to;
   1204	int ret;
   1205
   1206	/* Atomic write is needed for putu16 */
   1207	ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
   1208			      &iov, 1, VHOST_MAP_WO);
   1209	if (ret < 0)
   1210		return ret;
   1211
   1212	kaddr = kmap_atomic(iov.bv_page);
   1213	to = kaddr + iov.bv_offset;
   1214	WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
   1215	kunmap_atomic(kaddr);
   1216
   1217	return 0;
   1218}
   1219
   1220static inline int copydesc_iotlb(const struct vringh *vrh,
   1221				 void *dst, const void *src, size_t len)
   1222{
   1223	int ret;
   1224
   1225	ret = copy_from_iotlb(vrh, dst, (void *)src, len);
   1226	if (ret != len)
   1227		return -EFAULT;
   1228
   1229	return 0;
   1230}
   1231
   1232static inline int xfer_from_iotlb(const struct vringh *vrh, void *src,
   1233				  void *dst, size_t len)
   1234{
   1235	int ret;
   1236
   1237	ret = copy_from_iotlb(vrh, dst, src, len);
   1238	if (ret != len)
   1239		return -EFAULT;
   1240
   1241	return 0;
   1242}
   1243
   1244static inline int xfer_to_iotlb(const struct vringh *vrh,
   1245			       void *dst, void *src, size_t len)
   1246{
   1247	int ret;
   1248
   1249	ret = copy_to_iotlb(vrh, dst, src, len);
   1250	if (ret != len)
   1251		return -EFAULT;
   1252
   1253	return 0;
   1254}
   1255
   1256static inline int putused_iotlb(const struct vringh *vrh,
   1257				struct vring_used_elem *dst,
   1258				const struct vring_used_elem *src,
   1259				unsigned int num)
   1260{
   1261	int size = num * sizeof(*dst);
   1262	int ret;
   1263
   1264	ret = copy_to_iotlb(vrh, dst, (void *)src, num * sizeof(*dst));
   1265	if (ret != size)
   1266		return -EFAULT;
   1267
   1268	return 0;
   1269}
   1270
   1271/**
   1272 * vringh_init_iotlb - initialize a vringh for a ring with IOTLB.
   1273 * @vrh: the vringh to initialize.
   1274 * @features: the feature bits for this ring.
   1275 * @num: the number of elements.
   1276 * @weak_barriers: true if we only need memory barriers, not I/O.
   1277 * @desc: the userpace descriptor pointer.
   1278 * @avail: the userpace avail pointer.
   1279 * @used: the userpace used pointer.
   1280 *
   1281 * Returns an error if num is invalid.
   1282 */
   1283int vringh_init_iotlb(struct vringh *vrh, u64 features,
   1284		      unsigned int num, bool weak_barriers,
   1285		      struct vring_desc *desc,
   1286		      struct vring_avail *avail,
   1287		      struct vring_used *used)
   1288{
   1289	return vringh_init_kern(vrh, features, num, weak_barriers,
   1290				desc, avail, used);
   1291}
   1292EXPORT_SYMBOL(vringh_init_iotlb);
   1293
   1294/**
   1295 * vringh_set_iotlb - initialize a vringh for a ring with IOTLB.
   1296 * @vrh: the vring
   1297 * @iotlb: iotlb associated with this vring
   1298 * @iotlb_lock: spinlock to synchronize the iotlb accesses
   1299 */
   1300void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
   1301		      spinlock_t *iotlb_lock)
   1302{
   1303	vrh->iotlb = iotlb;
   1304	vrh->iotlb_lock = iotlb_lock;
   1305}
   1306EXPORT_SYMBOL(vringh_set_iotlb);
   1307
   1308/**
   1309 * vringh_getdesc_iotlb - get next available descriptor from ring with
   1310 * IOTLB.
   1311 * @vrh: the kernelspace vring.
   1312 * @riov: where to put the readable descriptors (or NULL)
   1313 * @wiov: where to put the writable descriptors (or NULL)
   1314 * @head: head index we received, for passing to vringh_complete_iotlb().
   1315 * @gfp: flags for allocating larger riov/wiov.
   1316 *
   1317 * Returns 0 if there was no descriptor, 1 if there was, or -errno.
   1318 *
   1319 * Note that on error return, you can tell the difference between an
   1320 * invalid ring and a single invalid descriptor: in the former case,
   1321 * *head will be vrh->vring.num.  You may be able to ignore an invalid
   1322 * descriptor, but there's not much you can do with an invalid ring.
   1323 *
   1324 * Note that you can reuse riov and wiov with subsequent calls. Content is
   1325 * overwritten and memory reallocated if more space is needed.
   1326 * When you don't have to use riov and wiov anymore, you should clean up them
   1327 * calling vringh_kiov_cleanup() to release the memory, even on error!
   1328 */
   1329int vringh_getdesc_iotlb(struct vringh *vrh,
   1330			 struct vringh_kiov *riov,
   1331			 struct vringh_kiov *wiov,
   1332			 u16 *head,
   1333			 gfp_t gfp)
   1334{
   1335	int err;
   1336
   1337	err = __vringh_get_head(vrh, getu16_iotlb, &vrh->last_avail_idx);
   1338	if (err < 0)
   1339		return err;
   1340
   1341	/* Empty... */
   1342	if (err == vrh->vring.num)
   1343		return 0;
   1344
   1345	*head = err;
   1346	err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL,
   1347			   gfp, copydesc_iotlb);
   1348	if (err)
   1349		return err;
   1350
   1351	return 1;
   1352}
   1353EXPORT_SYMBOL(vringh_getdesc_iotlb);
   1354
   1355/**
   1356 * vringh_iov_pull_iotlb - copy bytes from vring_iov.
   1357 * @vrh: the vring.
   1358 * @riov: the riov as passed to vringh_getdesc_iotlb() (updated as we consume)
   1359 * @dst: the place to copy.
   1360 * @len: the maximum length to copy.
   1361 *
   1362 * Returns the bytes copied <= len or a negative errno.
   1363 */
   1364ssize_t vringh_iov_pull_iotlb(struct vringh *vrh,
   1365			      struct vringh_kiov *riov,
   1366			      void *dst, size_t len)
   1367{
   1368	return vringh_iov_xfer(vrh, riov, dst, len, xfer_from_iotlb);
   1369}
   1370EXPORT_SYMBOL(vringh_iov_pull_iotlb);
   1371
   1372/**
   1373 * vringh_iov_push_iotlb - copy bytes into vring_iov.
   1374 * @vrh: the vring.
   1375 * @wiov: the wiov as passed to vringh_getdesc_iotlb() (updated as we consume)
   1376 * @src: the place to copy from.
   1377 * @len: the maximum length to copy.
   1378 *
   1379 * Returns the bytes copied <= len or a negative errno.
   1380 */
   1381ssize_t vringh_iov_push_iotlb(struct vringh *vrh,
   1382			      struct vringh_kiov *wiov,
   1383			      const void *src, size_t len)
   1384{
   1385	return vringh_iov_xfer(vrh, wiov, (void *)src, len, xfer_to_iotlb);
   1386}
   1387EXPORT_SYMBOL(vringh_iov_push_iotlb);
   1388
   1389/**
   1390 * vringh_abandon_iotlb - we've decided not to handle the descriptor(s).
   1391 * @vrh: the vring.
   1392 * @num: the number of descriptors to put back (ie. num
   1393 *	 vringh_get_iotlb() to undo).
   1394 *
   1395 * The next vringh_get_iotlb() will return the old descriptor(s) again.
   1396 */
   1397void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num)
   1398{
   1399	/* We only update vring_avail_event(vr) when we want to be notified,
   1400	 * so we haven't changed that yet.
   1401	 */
   1402	vrh->last_avail_idx -= num;
   1403}
   1404EXPORT_SYMBOL(vringh_abandon_iotlb);
   1405
   1406/**
   1407 * vringh_complete_iotlb - we've finished with descriptor, publish it.
   1408 * @vrh: the vring.
   1409 * @head: the head as filled in by vringh_getdesc_iotlb.
   1410 * @len: the length of data we have written.
   1411 *
   1412 * You should check vringh_need_notify_iotlb() after one or more calls
   1413 * to this function.
   1414 */
   1415int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len)
   1416{
   1417	struct vring_used_elem used;
   1418
   1419	used.id = cpu_to_vringh32(vrh, head);
   1420	used.len = cpu_to_vringh32(vrh, len);
   1421
   1422	return __vringh_complete(vrh, &used, 1, putu16_iotlb, putused_iotlb);
   1423}
   1424EXPORT_SYMBOL(vringh_complete_iotlb);
   1425
   1426/**
   1427 * vringh_notify_enable_iotlb - we want to know if something changes.
   1428 * @vrh: the vring.
   1429 *
   1430 * This always enables notifications, but returns false if there are
   1431 * now more buffers available in the vring.
   1432 */
   1433bool vringh_notify_enable_iotlb(struct vringh *vrh)
   1434{
   1435	return __vringh_notify_enable(vrh, getu16_iotlb, putu16_iotlb);
   1436}
   1437EXPORT_SYMBOL(vringh_notify_enable_iotlb);
   1438
   1439/**
   1440 * vringh_notify_disable_iotlb - don't tell us if something changes.
   1441 * @vrh: the vring.
   1442 *
   1443 * This is our normal running state: we disable and then only enable when
   1444 * we're going to sleep.
   1445 */
   1446void vringh_notify_disable_iotlb(struct vringh *vrh)
   1447{
   1448	__vringh_notify_disable(vrh, putu16_iotlb);
   1449}
   1450EXPORT_SYMBOL(vringh_notify_disable_iotlb);
   1451
   1452/**
   1453 * vringh_need_notify_iotlb - must we tell the other side about used buffers?
   1454 * @vrh: the vring we've called vringh_complete_iotlb() on.
   1455 *
   1456 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
   1457 */
   1458int vringh_need_notify_iotlb(struct vringh *vrh)
   1459{
   1460	return __vringh_need_notify(vrh, getu16_iotlb);
   1461}
   1462EXPORT_SYMBOL(vringh_need_notify_iotlb);
   1463
   1464#endif
   1465
   1466MODULE_LICENSE("GPL");