ib_core_uverbs.c (11367B)
1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2/* 3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 4 * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. 5 * Copyright 2019 Marvell. All rights reserved. 6 */ 7#include <linux/xarray.h> 8#include "uverbs.h" 9#include "core_priv.h" 10 11/** 12 * rdma_umap_priv_init() - Initialize the private data of a vma 13 * 14 * @priv: The already allocated private data 15 * @vma: The vm area struct that needs private data 16 * @entry: entry into the mmap_xa that needs to be linked with 17 * this vma 18 * 19 * Each time we map IO memory into user space this keeps track of the 20 * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space 21 * to point to the zero page and allow the hot unplug to proceed. 22 * 23 * This is necessary for cases like PCI physical hot unplug as the actual BAR 24 * memory may vanish after this and access to it from userspace could MCE. 25 * 26 * RDMA drivers supporting disassociation must have their user space designed 27 * to cope in some way with their IO pages going to the zero page. 28 * 29 */ 30void rdma_umap_priv_init(struct rdma_umap_priv *priv, 31 struct vm_area_struct *vma, 32 struct rdma_user_mmap_entry *entry) 33{ 34 struct ib_uverbs_file *ufile = vma->vm_file->private_data; 35 36 priv->vma = vma; 37 if (entry) { 38 kref_get(&entry->ref); 39 priv->entry = entry; 40 } 41 vma->vm_private_data = priv; 42 /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */ 43 44 mutex_lock(&ufile->umap_lock); 45 list_add(&priv->list, &ufile->umaps); 46 mutex_unlock(&ufile->umap_lock); 47} 48EXPORT_SYMBOL(rdma_umap_priv_init); 49 50/** 51 * rdma_user_mmap_io() - Map IO memory into a process 52 * 53 * @ucontext: associated user context 54 * @vma: the vma related to the current mmap call 55 * @pfn: pfn to map 56 * @size: size to map 57 * @prot: pgprot to use in remap call 58 * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL 59 * if mmap_entry is not used by the driver 60 * 61 * This is to be called by drivers as part of their mmap() functions if they 62 * wish to send something like PCI-E BAR memory to userspace. 63 * 64 * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on 65 * success. 66 */ 67int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, 68 unsigned long pfn, unsigned long size, pgprot_t prot, 69 struct rdma_user_mmap_entry *entry) 70{ 71 struct ib_uverbs_file *ufile = ucontext->ufile; 72 struct rdma_umap_priv *priv; 73 74 if (!(vma->vm_flags & VM_SHARED)) 75 return -EINVAL; 76 77 if (vma->vm_end - vma->vm_start != size) 78 return -EINVAL; 79 80 /* Driver is using this wrong, must be called by ib_uverbs_mmap */ 81 if (WARN_ON(!vma->vm_file || 82 vma->vm_file->private_data != ufile)) 83 return -EINVAL; 84 lockdep_assert_held(&ufile->device->disassociate_srcu); 85 86 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 87 if (!priv) 88 return -ENOMEM; 89 90 vma->vm_page_prot = prot; 91 if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { 92 kfree(priv); 93 return -EAGAIN; 94 } 95 96 rdma_umap_priv_init(priv, vma, entry); 97 return 0; 98} 99EXPORT_SYMBOL(rdma_user_mmap_io); 100 101/** 102 * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa 103 * 104 * @ucontext: associated user context 105 * @pgoff: The mmap offset >> PAGE_SHIFT 106 * 107 * This function is called when a user tries to mmap with an offset (returned 108 * by rdma_user_mmap_get_offset()) it initially received from the driver. The 109 * rdma_user_mmap_entry was created by the function 110 * rdma_user_mmap_entry_insert(). This function increases the refcnt of the 111 * entry so that it won't be deleted from the xarray in the meantime. 112 * 113 * Return an reference to an entry if exists or NULL if there is no 114 * match. rdma_user_mmap_entry_put() must be called to put the reference. 115 */ 116struct rdma_user_mmap_entry * 117rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, 118 unsigned long pgoff) 119{ 120 struct rdma_user_mmap_entry *entry; 121 122 if (pgoff > U32_MAX) 123 return NULL; 124 125 xa_lock(&ucontext->mmap_xa); 126 127 entry = xa_load(&ucontext->mmap_xa, pgoff); 128 129 /* 130 * If refcount is zero, entry is already being deleted, driver_removed 131 * indicates that the no further mmaps are possible and we waiting for 132 * the active VMAs to be closed. 133 */ 134 if (!entry || entry->start_pgoff != pgoff || entry->driver_removed || 135 !kref_get_unless_zero(&entry->ref)) 136 goto err; 137 138 xa_unlock(&ucontext->mmap_xa); 139 140 ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n", 141 pgoff, entry->npages); 142 143 return entry; 144 145err: 146 xa_unlock(&ucontext->mmap_xa); 147 return NULL; 148} 149EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff); 150 151/** 152 * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa 153 * 154 * @ucontext: associated user context 155 * @vma: the vma being mmap'd into 156 * 157 * This function is like rdma_user_mmap_entry_get_pgoff() except that it also 158 * checks that the VMA is correct. 159 */ 160struct rdma_user_mmap_entry * 161rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, 162 struct vm_area_struct *vma) 163{ 164 struct rdma_user_mmap_entry *entry; 165 166 if (!(vma->vm_flags & VM_SHARED)) 167 return NULL; 168 entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff); 169 if (!entry) 170 return NULL; 171 if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) { 172 rdma_user_mmap_entry_put(entry); 173 return NULL; 174 } 175 return entry; 176} 177EXPORT_SYMBOL(rdma_user_mmap_entry_get); 178 179static void rdma_user_mmap_entry_free(struct kref *kref) 180{ 181 struct rdma_user_mmap_entry *entry = 182 container_of(kref, struct rdma_user_mmap_entry, ref); 183 struct ib_ucontext *ucontext = entry->ucontext; 184 unsigned long i; 185 186 /* 187 * Erase all entries occupied by this single entry, this is deferred 188 * until all VMA are closed so that the mmap offsets remain unique. 189 */ 190 xa_lock(&ucontext->mmap_xa); 191 for (i = 0; i < entry->npages; i++) 192 __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i); 193 xa_unlock(&ucontext->mmap_xa); 194 195 ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n", 196 entry->start_pgoff, entry->npages); 197 198 if (ucontext->device->ops.mmap_free) 199 ucontext->device->ops.mmap_free(entry); 200} 201 202/** 203 * rdma_user_mmap_entry_put() - Drop reference to the mmap entry 204 * 205 * @entry: an entry in the mmap_xa 206 * 207 * This function is called when the mapping is closed if it was 208 * an io mapping or when the driver is done with the entry for 209 * some other reason. 210 * Should be called after rdma_user_mmap_entry_get was called 211 * and entry is no longer needed. This function will erase the 212 * entry and free it if its refcnt reaches zero. 213 */ 214void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry) 215{ 216 kref_put(&entry->ref, rdma_user_mmap_entry_free); 217} 218EXPORT_SYMBOL(rdma_user_mmap_entry_put); 219 220/** 221 * rdma_user_mmap_entry_remove() - Drop reference to entry and 222 * mark it as unmmapable 223 * 224 * @entry: the entry to insert into the mmap_xa 225 * 226 * Drivers can call this to prevent userspace from creating more mappings for 227 * entry, however existing mmaps continue to exist and ops->mmap_free() will 228 * not be called until all user mmaps are destroyed. 229 */ 230void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry) 231{ 232 if (!entry) 233 return; 234 235 xa_lock(&entry->ucontext->mmap_xa); 236 entry->driver_removed = true; 237 xa_unlock(&entry->ucontext->mmap_xa); 238 kref_put(&entry->ref, rdma_user_mmap_entry_free); 239} 240EXPORT_SYMBOL(rdma_user_mmap_entry_remove); 241 242/** 243 * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa 244 * in a given range. 245 * 246 * @ucontext: associated user context. 247 * @entry: the entry to insert into the mmap_xa 248 * @length: length of the address that will be mmapped 249 * @min_pgoff: minimum pgoff to be returned 250 * @max_pgoff: maximum pgoff to be returned 251 * 252 * This function should be called by drivers that use the rdma_user_mmap 253 * interface for implementing their mmap syscall A database of mmap offsets is 254 * handled in the core and helper functions are provided to insert entries 255 * into the database and extract entries when the user calls mmap with the 256 * given offset. The function allocates a unique page offset in a given range 257 * that should be provided to user, the user will use the offset to retrieve 258 * information such as address to be mapped and how. 259 * 260 * Return: 0 on success and -ENOMEM on failure 261 */ 262int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, 263 struct rdma_user_mmap_entry *entry, 264 size_t length, u32 min_pgoff, 265 u32 max_pgoff) 266{ 267 struct ib_uverbs_file *ufile = ucontext->ufile; 268 XA_STATE(xas, &ucontext->mmap_xa, min_pgoff); 269 u32 xa_first, xa_last, npages; 270 int err; 271 u32 i; 272 273 if (!entry) 274 return -EINVAL; 275 276 kref_init(&entry->ref); 277 entry->ucontext = ucontext; 278 279 /* 280 * We want the whole allocation to be done without interruption from a 281 * different thread. The allocation requires finding a free range and 282 * storing. During the xa_insert the lock could be released, possibly 283 * allowing another thread to choose the same range. 284 */ 285 mutex_lock(&ufile->umap_lock); 286 287 xa_lock(&ucontext->mmap_xa); 288 289 /* We want to find an empty range */ 290 npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE); 291 entry->npages = npages; 292 while (true) { 293 /* First find an empty index */ 294 xas_find_marked(&xas, max_pgoff, XA_FREE_MARK); 295 if (xas.xa_node == XAS_RESTART) 296 goto err_unlock; 297 298 xa_first = xas.xa_index; 299 300 /* Is there enough room to have the range? */ 301 if (check_add_overflow(xa_first, npages, &xa_last)) 302 goto err_unlock; 303 304 /* 305 * Now look for the next present entry. If an entry doesn't 306 * exist, we found an empty range and can proceed. 307 */ 308 xas_next_entry(&xas, xa_last - 1); 309 if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last) 310 break; 311 } 312 313 for (i = xa_first; i < xa_last; i++) { 314 err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL); 315 if (err) 316 goto err_undo; 317 } 318 319 /* 320 * Internally the kernel uses a page offset, in libc this is a byte 321 * offset. Drivers should not return pgoff to userspace. 322 */ 323 entry->start_pgoff = xa_first; 324 xa_unlock(&ucontext->mmap_xa); 325 mutex_unlock(&ufile->umap_lock); 326 327 ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n", 328 entry->start_pgoff, npages); 329 330 return 0; 331 332err_undo: 333 for (; i > xa_first; i--) 334 __xa_erase(&ucontext->mmap_xa, i - 1); 335 336err_unlock: 337 xa_unlock(&ucontext->mmap_xa); 338 mutex_unlock(&ufile->umap_lock); 339 return -ENOMEM; 340} 341EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range); 342 343/** 344 * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa. 345 * 346 * @ucontext: associated user context. 347 * @entry: the entry to insert into the mmap_xa 348 * @length: length of the address that will be mmapped 349 * 350 * This function should be called by drivers that use the rdma_user_mmap 351 * interface for handling user mmapped addresses. The database is handled in 352 * the core and helper functions are provided to insert entries into the 353 * database and extract entries when the user calls mmap with the given offset. 354 * The function allocates a unique page offset that should be provided to user, 355 * the user will use the offset to retrieve information such as address to 356 * be mapped and how. 357 * 358 * Return: 0 on success and -ENOMEM on failure 359 */ 360int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, 361 struct rdma_user_mmap_entry *entry, 362 size_t length) 363{ 364 return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0, 365 U32_MAX); 366} 367EXPORT_SYMBOL(rdma_user_mmap_entry_insert);