cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

siw_mem.c (10211B)


      1// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
      2
      3/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
      4/* Copyright (c) 2008-2019, IBM Corporation */
      5
      6#include <linux/gfp.h>
      7#include <rdma/ib_verbs.h>
      8#include <linux/dma-mapping.h>
      9#include <linux/slab.h>
     10#include <linux/sched/mm.h>
     11#include <linux/resource.h>
     12
     13#include "siw.h"
     14#include "siw_mem.h"
     15
     16/*
     17 * Stag lookup is based on its index part only (24 bits).
     18 * The code avoids special Stag of zero and tries to randomize
     19 * STag values between 1 and SIW_STAG_MAX_INDEX.
     20 */
     21int siw_mem_add(struct siw_device *sdev, struct siw_mem *m)
     22{
     23	struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
     24	u32 id, next;
     25
     26	get_random_bytes(&next, 4);
     27	next &= 0x00ffffff;
     28
     29	if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next,
     30	    GFP_KERNEL) < 0)
     31		return -ENOMEM;
     32
     33	/* Set the STag index part */
     34	m->stag = id << 8;
     35
     36	siw_dbg_mem(m, "new MEM object\n");
     37
     38	return 0;
     39}
     40
     41/*
     42 * siw_mem_id2obj()
     43 *
     44 * resolves memory from stag given by id. might be called from:
     45 * o process context before sending out of sgl, or
     46 * o in softirq when resolving target memory
     47 */
     48struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
     49{
     50	struct siw_mem *mem;
     51
     52	rcu_read_lock();
     53	mem = xa_load(&sdev->mem_xa, stag_index);
     54	if (likely(mem && kref_get_unless_zero(&mem->ref))) {
     55		rcu_read_unlock();
     56		return mem;
     57	}
     58	rcu_read_unlock();
     59
     60	return NULL;
     61}
     62
     63static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
     64			   bool dirty)
     65{
     66	unpin_user_pages_dirty_lock(chunk->plist, num_pages, dirty);
     67}
     68
     69void siw_umem_release(struct siw_umem *umem, bool dirty)
     70{
     71	struct mm_struct *mm_s = umem->owning_mm;
     72	int i, num_pages = umem->num_pages;
     73
     74	for (i = 0; num_pages; i++) {
     75		int to_free = min_t(int, PAGES_PER_CHUNK, num_pages);
     76
     77		siw_free_plist(&umem->page_chunk[i], to_free,
     78			       umem->writable && dirty);
     79		kfree(umem->page_chunk[i].plist);
     80		num_pages -= to_free;
     81	}
     82	atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
     83
     84	mmdrop(mm_s);
     85	kfree(umem->page_chunk);
     86	kfree(umem);
     87}
     88
     89int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
     90		   u64 start, u64 len, int rights)
     91{
     92	struct siw_device *sdev = to_siw_dev(pd->device);
     93	struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
     94	struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
     95	u32 id, next;
     96
     97	if (!mem)
     98		return -ENOMEM;
     99
    100	mem->mem_obj = mem_obj;
    101	mem->stag_valid = 0;
    102	mem->sdev = sdev;
    103	mem->va = start;
    104	mem->len = len;
    105	mem->pd = pd;
    106	mem->perms = rights & IWARP_ACCESS_MASK;
    107	kref_init(&mem->ref);
    108
    109	get_random_bytes(&next, 4);
    110	next &= 0x00ffffff;
    111
    112	if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next,
    113	    GFP_KERNEL) < 0) {
    114		kfree(mem);
    115		return -ENOMEM;
    116	}
    117
    118	mr->mem = mem;
    119	/* Set the STag index part */
    120	mem->stag = id << 8;
    121	mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
    122
    123	return 0;
    124}
    125
    126void siw_mr_drop_mem(struct siw_mr *mr)
    127{
    128	struct siw_mem *mem = mr->mem, *found;
    129
    130	mem->stag_valid = 0;
    131
    132	/* make STag invalid visible asap */
    133	smp_mb();
    134
    135	found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8);
    136	WARN_ON(found != mem);
    137	siw_mem_put(mem);
    138}
    139
    140void siw_free_mem(struct kref *ref)
    141{
    142	struct siw_mem *mem = container_of(ref, struct siw_mem, ref);
    143
    144	siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n");
    145
    146	if (!mem->is_mw && mem->mem_obj) {
    147		if (mem->is_pbl == 0)
    148			siw_umem_release(mem->umem, true);
    149		else
    150			kfree(mem->pbl);
    151	}
    152	kfree(mem);
    153}
    154
    155/*
    156 * siw_check_mem()
    157 *
    158 * Check protection domain, STAG state, access permissions and
    159 * address range for memory object.
    160 *
    161 * @pd:		Protection Domain memory should belong to
    162 * @mem:	memory to be checked
    163 * @addr:	starting addr of mem
    164 * @perms:	requested access permissions
    165 * @len:	len of memory interval to be checked
    166 *
    167 */
    168int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
    169		  enum ib_access_flags perms, int len)
    170{
    171	if (!mem->stag_valid) {
    172		siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag);
    173		return -E_STAG_INVALID;
    174	}
    175	if (mem->pd != pd) {
    176		siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag);
    177		return -E_PD_MISMATCH;
    178	}
    179	/*
    180	 * check access permissions
    181	 */
    182	if ((mem->perms & perms) < perms) {
    183		siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n",
    184			   mem->perms, perms);
    185		return -E_ACCESS_PERM;
    186	}
    187	/*
    188	 * Check if access falls into valid memory interval.
    189	 */
    190	if (addr < mem->va || addr + len > mem->va + mem->len) {
    191		siw_dbg_pd(pd, "MEM interval len %d\n", len);
    192		siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n",
    193			   (void *)(uintptr_t)addr,
    194			   (void *)(uintptr_t)(addr + len));
    195		siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n",
    196			   (void *)(uintptr_t)mem->va,
    197			   (void *)(uintptr_t)(mem->va + mem->len),
    198			   mem->stag);
    199
    200		return -E_BASE_BOUNDS;
    201	}
    202	return E_ACCESS_OK;
    203}
    204
    205/*
    206 * siw_check_sge()
    207 *
    208 * Check SGE for access rights in given interval
    209 *
    210 * @pd:		Protection Domain memory should belong to
    211 * @sge:	SGE to be checked
    212 * @mem:	location of memory reference within array
    213 * @perms:	requested access permissions
    214 * @off:	starting offset in SGE
    215 * @len:	len of memory interval to be checked
    216 *
    217 * NOTE: Function references SGE's memory object (mem->obj)
    218 * if not yet done. New reference is kept if check went ok and
    219 * released if check failed. If mem->obj is already valid, no new
    220 * lookup is being done and mem is not released it check fails.
    221 */
    222int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
    223		  enum ib_access_flags perms, u32 off, int len)
    224{
    225	struct siw_device *sdev = to_siw_dev(pd->device);
    226	struct siw_mem *new = NULL;
    227	int rv = E_ACCESS_OK;
    228
    229	if (len + off > sge->length) {
    230		rv = -E_BASE_BOUNDS;
    231		goto fail;
    232	}
    233	if (*mem == NULL) {
    234		new = siw_mem_id2obj(sdev, sge->lkey >> 8);
    235		if (unlikely(!new)) {
    236			siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey);
    237			rv = -E_STAG_INVALID;
    238			goto fail;
    239		}
    240		*mem = new;
    241	}
    242	/* Check if user re-registered with different STag key */
    243	if (unlikely((*mem)->stag != sge->lkey)) {
    244		siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey);
    245		rv = -E_STAG_INVALID;
    246		goto fail;
    247	}
    248	rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len);
    249	if (unlikely(rv))
    250		goto fail;
    251
    252	return 0;
    253
    254fail:
    255	if (new) {
    256		*mem = NULL;
    257		siw_mem_put(new);
    258	}
    259	return rv;
    260}
    261
    262void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op)
    263{
    264	switch (op) {
    265	case SIW_OP_SEND:
    266	case SIW_OP_WRITE:
    267	case SIW_OP_SEND_WITH_IMM:
    268	case SIW_OP_SEND_REMOTE_INV:
    269	case SIW_OP_READ:
    270	case SIW_OP_READ_LOCAL_INV:
    271		if (!(wqe->sqe.flags & SIW_WQE_INLINE))
    272			siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge);
    273		break;
    274
    275	case SIW_OP_RECEIVE:
    276		siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge);
    277		break;
    278
    279	case SIW_OP_READ_RESPONSE:
    280		siw_unref_mem_sgl(wqe->mem, 1);
    281		break;
    282
    283	default:
    284		/*
    285		 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR
    286		 * do not hold memory references
    287		 */
    288		break;
    289	}
    290}
    291
    292int siw_invalidate_stag(struct ib_pd *pd, u32 stag)
    293{
    294	struct siw_device *sdev = to_siw_dev(pd->device);
    295	struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8);
    296	int rv = 0;
    297
    298	if (unlikely(!mem)) {
    299		siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag);
    300		return -EINVAL;
    301	}
    302	if (unlikely(mem->pd != pd)) {
    303		siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag);
    304		rv = -EACCES;
    305		goto out;
    306	}
    307	/*
    308	 * Per RDMA verbs definition, an STag may already be in invalid
    309	 * state if invalidation is requested. So no state check here.
    310	 */
    311	mem->stag_valid = 0;
    312
    313	siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag);
    314out:
    315	siw_mem_put(mem);
    316	return rv;
    317}
    318
    319/*
    320 * Gets physical address backed by PBL element. Address is referenced
    321 * by linear byte offset into list of variably sized PB elements.
    322 * Optionally, provides remaining len within current element, and
    323 * current PBL index for later resume at same element.
    324 */
    325dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx)
    326{
    327	int i = idx ? *idx : 0;
    328
    329	while (i < pbl->num_buf) {
    330		struct siw_pble *pble = &pbl->pbe[i];
    331
    332		if (pble->pbl_off + pble->size > off) {
    333			u64 pble_off = off - pble->pbl_off;
    334
    335			if (len)
    336				*len = pble->size - pble_off;
    337			if (idx)
    338				*idx = i;
    339
    340			return pble->addr + pble_off;
    341		}
    342		i++;
    343	}
    344	if (len)
    345		*len = 0;
    346	return 0;
    347}
    348
    349struct siw_pbl *siw_pbl_alloc(u32 num_buf)
    350{
    351	struct siw_pbl *pbl;
    352
    353	if (num_buf == 0)
    354		return ERR_PTR(-EINVAL);
    355
    356	pbl = kzalloc(struct_size(pbl, pbe, num_buf), GFP_KERNEL);
    357	if (!pbl)
    358		return ERR_PTR(-ENOMEM);
    359
    360	pbl->max_buf = num_buf;
    361
    362	return pbl;
    363}
    364
    365struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
    366{
    367	struct siw_umem *umem;
    368	struct mm_struct *mm_s;
    369	u64 first_page_va;
    370	unsigned long mlock_limit;
    371	unsigned int foll_flags = FOLL_WRITE;
    372	int num_pages, num_chunks, i, rv = 0;
    373
    374	if (!can_do_mlock())
    375		return ERR_PTR(-EPERM);
    376
    377	if (!len)
    378		return ERR_PTR(-EINVAL);
    379
    380	first_page_va = start & PAGE_MASK;
    381	num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
    382	num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
    383
    384	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
    385	if (!umem)
    386		return ERR_PTR(-ENOMEM);
    387
    388	mm_s = current->mm;
    389	umem->owning_mm = mm_s;
    390	umem->writable = writable;
    391
    392	mmgrab(mm_s);
    393
    394	if (!writable)
    395		foll_flags |= FOLL_FORCE;
    396
    397	mmap_read_lock(mm_s);
    398
    399	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
    400
    401	if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
    402		rv = -ENOMEM;
    403		goto out_sem_up;
    404	}
    405	umem->fp_addr = first_page_va;
    406
    407	umem->page_chunk =
    408		kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
    409	if (!umem->page_chunk) {
    410		rv = -ENOMEM;
    411		goto out_sem_up;
    412	}
    413	for (i = 0; num_pages; i++) {
    414		int got, nents = min_t(int, num_pages, PAGES_PER_CHUNK);
    415
    416		umem->page_chunk[i].plist =
    417			kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
    418		if (!umem->page_chunk[i].plist) {
    419			rv = -ENOMEM;
    420			goto out_sem_up;
    421		}
    422		got = 0;
    423		while (nents) {
    424			struct page **plist = &umem->page_chunk[i].plist[got];
    425
    426			rv = pin_user_pages(first_page_va, nents,
    427					    foll_flags | FOLL_LONGTERM,
    428					    plist, NULL);
    429			if (rv < 0)
    430				goto out_sem_up;
    431
    432			umem->num_pages += rv;
    433			atomic64_add(rv, &mm_s->pinned_vm);
    434			first_page_va += rv * PAGE_SIZE;
    435			nents -= rv;
    436			got += rv;
    437		}
    438		num_pages -= got;
    439	}
    440out_sem_up:
    441	mmap_read_unlock(mm_s);
    442
    443	if (rv > 0)
    444		return umem;
    445
    446	siw_umem_release(umem, false);
    447
    448	return ERR_PTR(rv);
    449}