cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ib_core_uverbs.c (11367B)


      1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
      2/*
      3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
      4 * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
      5 * Copyright 2019 Marvell. All rights reserved.
      6 */
      7#include <linux/xarray.h>
      8#include "uverbs.h"
      9#include "core_priv.h"
     10
     11/**
     12 * rdma_umap_priv_init() - Initialize the private data of a vma
     13 *
     14 * @priv: The already allocated private data
     15 * @vma: The vm area struct that needs private data
     16 * @entry: entry into the mmap_xa that needs to be linked with
     17 *       this vma
     18 *
     19 * Each time we map IO memory into user space this keeps track of the
     20 * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
     21 * to point to the zero page and allow the hot unplug to proceed.
     22 *
     23 * This is necessary for cases like PCI physical hot unplug as the actual BAR
     24 * memory may vanish after this and access to it from userspace could MCE.
     25 *
     26 * RDMA drivers supporting disassociation must have their user space designed
     27 * to cope in some way with their IO pages going to the zero page.
     28 *
     29 */
     30void rdma_umap_priv_init(struct rdma_umap_priv *priv,
     31			 struct vm_area_struct *vma,
     32			 struct rdma_user_mmap_entry *entry)
     33{
     34	struct ib_uverbs_file *ufile = vma->vm_file->private_data;
     35
     36	priv->vma = vma;
     37	if (entry) {
     38		kref_get(&entry->ref);
     39		priv->entry = entry;
     40	}
     41	vma->vm_private_data = priv;
     42	/* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
     43
     44	mutex_lock(&ufile->umap_lock);
     45	list_add(&priv->list, &ufile->umaps);
     46	mutex_unlock(&ufile->umap_lock);
     47}
     48EXPORT_SYMBOL(rdma_umap_priv_init);
     49
     50/**
     51 * rdma_user_mmap_io() - Map IO memory into a process
     52 *
     53 * @ucontext: associated user context
     54 * @vma: the vma related to the current mmap call
     55 * @pfn: pfn to map
     56 * @size: size to map
     57 * @prot: pgprot to use in remap call
     58 * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
     59 *         if mmap_entry is not used by the driver
     60 *
     61 * This is to be called by drivers as part of their mmap() functions if they
     62 * wish to send something like PCI-E BAR memory to userspace.
     63 *
     64 * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
     65 * success.
     66 */
     67int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
     68		      unsigned long pfn, unsigned long size, pgprot_t prot,
     69		      struct rdma_user_mmap_entry *entry)
     70{
     71	struct ib_uverbs_file *ufile = ucontext->ufile;
     72	struct rdma_umap_priv *priv;
     73
     74	if (!(vma->vm_flags & VM_SHARED))
     75		return -EINVAL;
     76
     77	if (vma->vm_end - vma->vm_start != size)
     78		return -EINVAL;
     79
     80	/* Driver is using this wrong, must be called by ib_uverbs_mmap */
     81	if (WARN_ON(!vma->vm_file ||
     82		    vma->vm_file->private_data != ufile))
     83		return -EINVAL;
     84	lockdep_assert_held(&ufile->device->disassociate_srcu);
     85
     86	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
     87	if (!priv)
     88		return -ENOMEM;
     89
     90	vma->vm_page_prot = prot;
     91	if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
     92		kfree(priv);
     93		return -EAGAIN;
     94	}
     95
     96	rdma_umap_priv_init(priv, vma, entry);
     97	return 0;
     98}
     99EXPORT_SYMBOL(rdma_user_mmap_io);
    100
    101/**
    102 * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
    103 *
    104 * @ucontext: associated user context
    105 * @pgoff: The mmap offset >> PAGE_SHIFT
    106 *
    107 * This function is called when a user tries to mmap with an offset (returned
    108 * by rdma_user_mmap_get_offset()) it initially received from the driver. The
    109 * rdma_user_mmap_entry was created by the function
    110 * rdma_user_mmap_entry_insert().  This function increases the refcnt of the
    111 * entry so that it won't be deleted from the xarray in the meantime.
    112 *
    113 * Return an reference to an entry if exists or NULL if there is no
    114 * match. rdma_user_mmap_entry_put() must be called to put the reference.
    115 */
    116struct rdma_user_mmap_entry *
    117rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
    118			       unsigned long pgoff)
    119{
    120	struct rdma_user_mmap_entry *entry;
    121
    122	if (pgoff > U32_MAX)
    123		return NULL;
    124
    125	xa_lock(&ucontext->mmap_xa);
    126
    127	entry = xa_load(&ucontext->mmap_xa, pgoff);
    128
    129	/*
    130	 * If refcount is zero, entry is already being deleted, driver_removed
    131	 * indicates that the no further mmaps are possible and we waiting for
    132	 * the active VMAs to be closed.
    133	 */
    134	if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
    135	    !kref_get_unless_zero(&entry->ref))
    136		goto err;
    137
    138	xa_unlock(&ucontext->mmap_xa);
    139
    140	ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n",
    141		  pgoff, entry->npages);
    142
    143	return entry;
    144
    145err:
    146	xa_unlock(&ucontext->mmap_xa);
    147	return NULL;
    148}
    149EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
    150
    151/**
    152 * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
    153 *
    154 * @ucontext: associated user context
    155 * @vma: the vma being mmap'd into
    156 *
    157 * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
    158 * checks that the VMA is correct.
    159 */
    160struct rdma_user_mmap_entry *
    161rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
    162			 struct vm_area_struct *vma)
    163{
    164	struct rdma_user_mmap_entry *entry;
    165
    166	if (!(vma->vm_flags & VM_SHARED))
    167		return NULL;
    168	entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
    169	if (!entry)
    170		return NULL;
    171	if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
    172		rdma_user_mmap_entry_put(entry);
    173		return NULL;
    174	}
    175	return entry;
    176}
    177EXPORT_SYMBOL(rdma_user_mmap_entry_get);
    178
    179static void rdma_user_mmap_entry_free(struct kref *kref)
    180{
    181	struct rdma_user_mmap_entry *entry =
    182		container_of(kref, struct rdma_user_mmap_entry, ref);
    183	struct ib_ucontext *ucontext = entry->ucontext;
    184	unsigned long i;
    185
    186	/*
    187	 * Erase all entries occupied by this single entry, this is deferred
    188	 * until all VMA are closed so that the mmap offsets remain unique.
    189	 */
    190	xa_lock(&ucontext->mmap_xa);
    191	for (i = 0; i < entry->npages; i++)
    192		__xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
    193	xa_unlock(&ucontext->mmap_xa);
    194
    195	ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n",
    196		  entry->start_pgoff, entry->npages);
    197
    198	if (ucontext->device->ops.mmap_free)
    199		ucontext->device->ops.mmap_free(entry);
    200}
    201
    202/**
    203 * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
    204 *
    205 * @entry: an entry in the mmap_xa
    206 *
    207 * This function is called when the mapping is closed if it was
    208 * an io mapping or when the driver is done with the entry for
    209 * some other reason.
    210 * Should be called after rdma_user_mmap_entry_get was called
    211 * and entry is no longer needed. This function will erase the
    212 * entry and free it if its refcnt reaches zero.
    213 */
    214void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
    215{
    216	kref_put(&entry->ref, rdma_user_mmap_entry_free);
    217}
    218EXPORT_SYMBOL(rdma_user_mmap_entry_put);
    219
    220/**
    221 * rdma_user_mmap_entry_remove() - Drop reference to entry and
    222 *				   mark it as unmmapable
    223 *
    224 * @entry: the entry to insert into the mmap_xa
    225 *
    226 * Drivers can call this to prevent userspace from creating more mappings for
    227 * entry, however existing mmaps continue to exist and ops->mmap_free() will
    228 * not be called until all user mmaps are destroyed.
    229 */
    230void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
    231{
    232	if (!entry)
    233		return;
    234
    235	xa_lock(&entry->ucontext->mmap_xa);
    236	entry->driver_removed = true;
    237	xa_unlock(&entry->ucontext->mmap_xa);
    238	kref_put(&entry->ref, rdma_user_mmap_entry_free);
    239}
    240EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
    241
    242/**
    243 * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
    244 *					 in a given range.
    245 *
    246 * @ucontext: associated user context.
    247 * @entry: the entry to insert into the mmap_xa
    248 * @length: length of the address that will be mmapped
    249 * @min_pgoff: minimum pgoff to be returned
    250 * @max_pgoff: maximum pgoff to be returned
    251 *
    252 * This function should be called by drivers that use the rdma_user_mmap
    253 * interface for implementing their mmap syscall A database of mmap offsets is
    254 * handled in the core and helper functions are provided to insert entries
    255 * into the database and extract entries when the user calls mmap with the
    256 * given offset. The function allocates a unique page offset in a given range
    257 * that should be provided to user, the user will use the offset to retrieve
    258 * information such as address to be mapped and how.
    259 *
    260 * Return: 0 on success and -ENOMEM on failure
    261 */
    262int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
    263				      struct rdma_user_mmap_entry *entry,
    264				      size_t length, u32 min_pgoff,
    265				      u32 max_pgoff)
    266{
    267	struct ib_uverbs_file *ufile = ucontext->ufile;
    268	XA_STATE(xas, &ucontext->mmap_xa, min_pgoff);
    269	u32 xa_first, xa_last, npages;
    270	int err;
    271	u32 i;
    272
    273	if (!entry)
    274		return -EINVAL;
    275
    276	kref_init(&entry->ref);
    277	entry->ucontext = ucontext;
    278
    279	/*
    280	 * We want the whole allocation to be done without interruption from a
    281	 * different thread. The allocation requires finding a free range and
    282	 * storing. During the xa_insert the lock could be released, possibly
    283	 * allowing another thread to choose the same range.
    284	 */
    285	mutex_lock(&ufile->umap_lock);
    286
    287	xa_lock(&ucontext->mmap_xa);
    288
    289	/* We want to find an empty range */
    290	npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
    291	entry->npages = npages;
    292	while (true) {
    293		/* First find an empty index */
    294		xas_find_marked(&xas, max_pgoff, XA_FREE_MARK);
    295		if (xas.xa_node == XAS_RESTART)
    296			goto err_unlock;
    297
    298		xa_first = xas.xa_index;
    299
    300		/* Is there enough room to have the range? */
    301		if (check_add_overflow(xa_first, npages, &xa_last))
    302			goto err_unlock;
    303
    304		/*
    305		 * Now look for the next present entry. If an entry doesn't
    306		 * exist, we found an empty range and can proceed.
    307		 */
    308		xas_next_entry(&xas, xa_last - 1);
    309		if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last)
    310			break;
    311	}
    312
    313	for (i = xa_first; i < xa_last; i++) {
    314		err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
    315		if (err)
    316			goto err_undo;
    317	}
    318
    319	/*
    320	 * Internally the kernel uses a page offset, in libc this is a byte
    321	 * offset. Drivers should not return pgoff to userspace.
    322	 */
    323	entry->start_pgoff = xa_first;
    324	xa_unlock(&ucontext->mmap_xa);
    325	mutex_unlock(&ufile->umap_lock);
    326
    327	ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n",
    328		  entry->start_pgoff, npages);
    329
    330	return 0;
    331
    332err_undo:
    333	for (; i > xa_first; i--)
    334		__xa_erase(&ucontext->mmap_xa, i - 1);
    335
    336err_unlock:
    337	xa_unlock(&ucontext->mmap_xa);
    338	mutex_unlock(&ufile->umap_lock);
    339	return -ENOMEM;
    340}
    341EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
    342
    343/**
    344 * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
    345 *
    346 * @ucontext: associated user context.
    347 * @entry: the entry to insert into the mmap_xa
    348 * @length: length of the address that will be mmapped
    349 *
    350 * This function should be called by drivers that use the rdma_user_mmap
    351 * interface for handling user mmapped addresses. The database is handled in
    352 * the core and helper functions are provided to insert entries into the
    353 * database and extract entries when the user calls mmap with the given offset.
    354 * The function allocates a unique page offset that should be provided to user,
    355 * the user will use the offset to retrieve information such as address to
    356 * be mapped and how.
    357 *
    358 * Return: 0 on success and -ENOMEM on failure
    359 */
    360int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
    361				struct rdma_user_mmap_entry *entry,
    362				size_t length)
    363{
    364	return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
    365						 U32_MAX);
    366}
    367EXPORT_SYMBOL(rdma_user_mmap_entry_insert);