cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

rxe_mr.c (14185B)


      1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
      2/*
      3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
      4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
      5 */
      6
      7#include "rxe.h"
      8#include "rxe_loc.h"
      9
     10/* Return a random 8 bit key value that is
     11 * different than the last_key. Set last_key to -1
     12 * if this is the first key for an MR or MW
     13 */
     14u8 rxe_get_next_key(u32 last_key)
     15{
     16	u8 key;
     17
     18	do {
     19		get_random_bytes(&key, 1);
     20	} while (key == last_key);
     21
     22	return key;
     23}
     24
     25int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
     26{
     27	struct rxe_map_set *set = mr->cur_map_set;
     28
     29	switch (mr->type) {
     30	case IB_MR_TYPE_DMA:
     31		return 0;
     32
     33	case IB_MR_TYPE_USER:
     34	case IB_MR_TYPE_MEM_REG:
     35		if (iova < set->iova || length > set->length ||
     36		    iova > set->iova + set->length - length)
     37			return -EFAULT;
     38		return 0;
     39
     40	default:
     41		pr_warn("%s: mr type (%d) not supported\n",
     42			__func__, mr->type);
     43		return -EFAULT;
     44	}
     45}
     46
     47#define IB_ACCESS_REMOTE	(IB_ACCESS_REMOTE_READ		\
     48				| IB_ACCESS_REMOTE_WRITE	\
     49				| IB_ACCESS_REMOTE_ATOMIC)
     50
     51static void rxe_mr_init(int access, struct rxe_mr *mr)
     52{
     53	u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1);
     54	u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
     55
     56	/* set ibmr->l/rkey and also copy into private l/rkey
     57	 * for user MRs these will always be the same
     58	 * for cases where caller 'owns' the key portion
     59	 * they may be different until REG_MR WQE is executed.
     60	 */
     61	mr->lkey = mr->ibmr.lkey = lkey;
     62	mr->rkey = mr->ibmr.rkey = rkey;
     63
     64	mr->state = RXE_MR_STATE_INVALID;
     65	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
     66}
     67
     68static void rxe_mr_free_map_set(int num_map, struct rxe_map_set *set)
     69{
     70	int i;
     71
     72	for (i = 0; i < num_map; i++)
     73		kfree(set->map[i]);
     74
     75	kfree(set->map);
     76	kfree(set);
     77}
     78
     79static int rxe_mr_alloc_map_set(int num_map, struct rxe_map_set **setp)
     80{
     81	int i;
     82	struct rxe_map_set *set;
     83
     84	set = kmalloc(sizeof(*set), GFP_KERNEL);
     85	if (!set)
     86		goto err_out;
     87
     88	set->map = kmalloc_array(num_map, sizeof(struct rxe_map *), GFP_KERNEL);
     89	if (!set->map)
     90		goto err_free_set;
     91
     92	for (i = 0; i < num_map; i++) {
     93		set->map[i] = kmalloc(sizeof(struct rxe_map), GFP_KERNEL);
     94		if (!set->map[i])
     95			goto err_free_map;
     96	}
     97
     98	*setp = set;
     99
    100	return 0;
    101
    102err_free_map:
    103	for (i--; i >= 0; i--)
    104		kfree(set->map[i]);
    105
    106	kfree(set->map);
    107err_free_set:
    108	kfree(set);
    109err_out:
    110	return -ENOMEM;
    111}
    112
    113/**
    114 * rxe_mr_alloc() - Allocate memory map array(s) for MR
    115 * @mr: Memory region
    116 * @num_buf: Number of buffer descriptors to support
    117 * @both: If non zero allocate both mr->map and mr->next_map
    118 *	  else just allocate mr->map. Used for fast MRs
    119 *
    120 * Return: 0 on success else an error
    121 */
    122static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf, int both)
    123{
    124	int ret;
    125	int num_map;
    126
    127	BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
    128	num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
    129
    130	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
    131	mr->map_mask = RXE_BUF_PER_MAP - 1;
    132	mr->num_buf = num_buf;
    133	mr->max_buf = num_map * RXE_BUF_PER_MAP;
    134	mr->num_map = num_map;
    135
    136	ret = rxe_mr_alloc_map_set(num_map, &mr->cur_map_set);
    137	if (ret)
    138		return -ENOMEM;
    139
    140	if (both) {
    141		ret = rxe_mr_alloc_map_set(num_map, &mr->next_map_set);
    142		if (ret)
    143			goto err_free;
    144	}
    145
    146	return 0;
    147
    148err_free:
    149	rxe_mr_free_map_set(mr->num_map, mr->cur_map_set);
    150	mr->cur_map_set = NULL;
    151	return -ENOMEM;
    152}
    153
    154void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
    155{
    156	rxe_mr_init(access, mr);
    157
    158	mr->ibmr.pd = &pd->ibpd;
    159	mr->access = access;
    160	mr->state = RXE_MR_STATE_VALID;
    161	mr->type = IB_MR_TYPE_DMA;
    162}
    163
    164int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
    165		     int access, struct rxe_mr *mr)
    166{
    167	struct rxe_map_set	*set;
    168	struct rxe_map		**map;
    169	struct rxe_phys_buf	*buf = NULL;
    170	struct ib_umem		*umem;
    171	struct sg_page_iter	sg_iter;
    172	int			num_buf;
    173	void			*vaddr;
    174	int err;
    175
    176	umem = ib_umem_get(pd->ibpd.device, start, length, access);
    177	if (IS_ERR(umem)) {
    178		pr_warn("%s: Unable to pin memory region err = %d\n",
    179			__func__, (int)PTR_ERR(umem));
    180		err = PTR_ERR(umem);
    181		goto err_out;
    182	}
    183
    184	num_buf = ib_umem_num_pages(umem);
    185
    186	rxe_mr_init(access, mr);
    187
    188	err = rxe_mr_alloc(mr, num_buf, 0);
    189	if (err) {
    190		pr_warn("%s: Unable to allocate memory for map\n",
    191				__func__);
    192		goto err_release_umem;
    193	}
    194
    195	set = mr->cur_map_set;
    196	set->page_shift = PAGE_SHIFT;
    197	set->page_mask = PAGE_SIZE - 1;
    198
    199	num_buf = 0;
    200	map = set->map;
    201
    202	if (length > 0) {
    203		buf = map[0]->buf;
    204
    205		for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
    206			if (num_buf >= RXE_BUF_PER_MAP) {
    207				map++;
    208				buf = map[0]->buf;
    209				num_buf = 0;
    210			}
    211
    212			vaddr = page_address(sg_page_iter_page(&sg_iter));
    213			if (!vaddr) {
    214				pr_warn("%s: Unable to get virtual address\n",
    215						__func__);
    216				err = -ENOMEM;
    217				goto err_release_umem;
    218			}
    219
    220			buf->addr = (uintptr_t)vaddr;
    221			buf->size = PAGE_SIZE;
    222			num_buf++;
    223			buf++;
    224		}
    225	}
    226
    227	mr->ibmr.pd = &pd->ibpd;
    228	mr->umem = umem;
    229	mr->access = access;
    230	mr->state = RXE_MR_STATE_VALID;
    231	mr->type = IB_MR_TYPE_USER;
    232
    233	set->length = length;
    234	set->iova = iova;
    235	set->va = start;
    236	set->offset = ib_umem_offset(umem);
    237
    238	return 0;
    239
    240err_release_umem:
    241	ib_umem_release(umem);
    242err_out:
    243	return err;
    244}
    245
    246int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
    247{
    248	int err;
    249
    250	/* always allow remote access for FMRs */
    251	rxe_mr_init(IB_ACCESS_REMOTE, mr);
    252
    253	err = rxe_mr_alloc(mr, max_pages, 1);
    254	if (err)
    255		goto err1;
    256
    257	mr->ibmr.pd = &pd->ibpd;
    258	mr->max_buf = max_pages;
    259	mr->state = RXE_MR_STATE_FREE;
    260	mr->type = IB_MR_TYPE_MEM_REG;
    261
    262	return 0;
    263
    264err1:
    265	return err;
    266}
    267
    268static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
    269			size_t *offset_out)
    270{
    271	struct rxe_map_set *set = mr->cur_map_set;
    272	size_t offset = iova - set->iova + set->offset;
    273	int			map_index;
    274	int			buf_index;
    275	u64			length;
    276	struct rxe_map *map;
    277
    278	if (likely(set->page_shift)) {
    279		*offset_out = offset & set->page_mask;
    280		offset >>= set->page_shift;
    281		*n_out = offset & mr->map_mask;
    282		*m_out = offset >> mr->map_shift;
    283	} else {
    284		map_index = 0;
    285		buf_index = 0;
    286
    287		map = set->map[map_index];
    288		length = map->buf[buf_index].size;
    289
    290		while (offset >= length) {
    291			offset -= length;
    292			buf_index++;
    293
    294			if (buf_index == RXE_BUF_PER_MAP) {
    295				map_index++;
    296				buf_index = 0;
    297			}
    298			map = set->map[map_index];
    299			length = map->buf[buf_index].size;
    300		}
    301
    302		*m_out = map_index;
    303		*n_out = buf_index;
    304		*offset_out = offset;
    305	}
    306}
    307
    308void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
    309{
    310	size_t offset;
    311	int m, n;
    312	void *addr;
    313
    314	if (mr->state != RXE_MR_STATE_VALID) {
    315		pr_warn("mr not in valid state\n");
    316		addr = NULL;
    317		goto out;
    318	}
    319
    320	if (!mr->cur_map_set) {
    321		addr = (void *)(uintptr_t)iova;
    322		goto out;
    323	}
    324
    325	if (mr_check_range(mr, iova, length)) {
    326		pr_warn("range violation\n");
    327		addr = NULL;
    328		goto out;
    329	}
    330
    331	lookup_iova(mr, iova, &m, &n, &offset);
    332
    333	if (offset + length > mr->cur_map_set->map[m]->buf[n].size) {
    334		pr_warn("crosses page boundary\n");
    335		addr = NULL;
    336		goto out;
    337	}
    338
    339	addr = (void *)(uintptr_t)mr->cur_map_set->map[m]->buf[n].addr + offset;
    340
    341out:
    342	return addr;
    343}
    344
    345/* copy data from a range (vaddr, vaddr+length-1) to or from
    346 * a mr object starting at iova.
    347 */
    348int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
    349		enum rxe_mr_copy_dir dir)
    350{
    351	int			err;
    352	int			bytes;
    353	u8			*va;
    354	struct rxe_map		**map;
    355	struct rxe_phys_buf	*buf;
    356	int			m;
    357	int			i;
    358	size_t			offset;
    359
    360	if (length == 0)
    361		return 0;
    362
    363	if (mr->type == IB_MR_TYPE_DMA) {
    364		u8 *src, *dest;
    365
    366		src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
    367
    368		dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
    369
    370		memcpy(dest, src, length);
    371
    372		return 0;
    373	}
    374
    375	WARN_ON_ONCE(!mr->cur_map_set);
    376
    377	err = mr_check_range(mr, iova, length);
    378	if (err) {
    379		err = -EFAULT;
    380		goto err1;
    381	}
    382
    383	lookup_iova(mr, iova, &m, &i, &offset);
    384
    385	map = mr->cur_map_set->map + m;
    386	buf	= map[0]->buf + i;
    387
    388	while (length > 0) {
    389		u8 *src, *dest;
    390
    391		va	= (u8 *)(uintptr_t)buf->addr + offset;
    392		src = (dir == RXE_TO_MR_OBJ) ? addr : va;
    393		dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
    394
    395		bytes	= buf->size - offset;
    396
    397		if (bytes > length)
    398			bytes = length;
    399
    400		memcpy(dest, src, bytes);
    401
    402		length	-= bytes;
    403		addr	+= bytes;
    404
    405		offset	= 0;
    406		buf++;
    407		i++;
    408
    409		if (i == RXE_BUF_PER_MAP) {
    410			i = 0;
    411			map++;
    412			buf = map[0]->buf;
    413		}
    414	}
    415
    416	return 0;
    417
    418err1:
    419	return err;
    420}
    421
    422/* copy data in or out of a wqe, i.e. sg list
    423 * under the control of a dma descriptor
    424 */
    425int copy_data(
    426	struct rxe_pd		*pd,
    427	int			access,
    428	struct rxe_dma_info	*dma,
    429	void			*addr,
    430	int			length,
    431	enum rxe_mr_copy_dir	dir)
    432{
    433	int			bytes;
    434	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
    435	int			offset	= dma->sge_offset;
    436	int			resid	= dma->resid;
    437	struct rxe_mr		*mr	= NULL;
    438	u64			iova;
    439	int			err;
    440
    441	if (length == 0)
    442		return 0;
    443
    444	if (length > resid) {
    445		err = -EINVAL;
    446		goto err2;
    447	}
    448
    449	if (sge->length && (offset < sge->length)) {
    450		mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
    451		if (!mr) {
    452			err = -EINVAL;
    453			goto err1;
    454		}
    455	}
    456
    457	while (length > 0) {
    458		bytes = length;
    459
    460		if (offset >= sge->length) {
    461			if (mr) {
    462				rxe_put(mr);
    463				mr = NULL;
    464			}
    465			sge++;
    466			dma->cur_sge++;
    467			offset = 0;
    468
    469			if (dma->cur_sge >= dma->num_sge) {
    470				err = -ENOSPC;
    471				goto err2;
    472			}
    473
    474			if (sge->length) {
    475				mr = lookup_mr(pd, access, sge->lkey,
    476					       RXE_LOOKUP_LOCAL);
    477				if (!mr) {
    478					err = -EINVAL;
    479					goto err1;
    480				}
    481			} else {
    482				continue;
    483			}
    484		}
    485
    486		if (bytes > sge->length - offset)
    487			bytes = sge->length - offset;
    488
    489		if (bytes > 0) {
    490			iova = sge->addr + offset;
    491
    492			err = rxe_mr_copy(mr, iova, addr, bytes, dir);
    493			if (err)
    494				goto err2;
    495
    496			offset	+= bytes;
    497			resid	-= bytes;
    498			length	-= bytes;
    499			addr	+= bytes;
    500		}
    501	}
    502
    503	dma->sge_offset = offset;
    504	dma->resid	= resid;
    505
    506	if (mr)
    507		rxe_put(mr);
    508
    509	return 0;
    510
    511err2:
    512	if (mr)
    513		rxe_put(mr);
    514err1:
    515	return err;
    516}
    517
    518int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
    519{
    520	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
    521	int			offset	= dma->sge_offset;
    522	int			resid	= dma->resid;
    523
    524	while (length) {
    525		unsigned int bytes;
    526
    527		if (offset >= sge->length) {
    528			sge++;
    529			dma->cur_sge++;
    530			offset = 0;
    531			if (dma->cur_sge >= dma->num_sge)
    532				return -ENOSPC;
    533		}
    534
    535		bytes = length;
    536
    537		if (bytes > sge->length - offset)
    538			bytes = sge->length - offset;
    539
    540		offset	+= bytes;
    541		resid	-= bytes;
    542		length	-= bytes;
    543	}
    544
    545	dma->sge_offset = offset;
    546	dma->resid	= resid;
    547
    548	return 0;
    549}
    550
    551/* (1) find the mr corresponding to lkey/rkey
    552 *     depending on lookup_type
    553 * (2) verify that the (qp) pd matches the mr pd
    554 * (3) verify that the mr can support the requested access
    555 * (4) verify that mr state is valid
    556 */
    557struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
    558			 enum rxe_mr_lookup_type type)
    559{
    560	struct rxe_mr *mr;
    561	struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
    562	int index = key >> 8;
    563
    564	mr = rxe_pool_get_index(&rxe->mr_pool, index);
    565	if (!mr)
    566		return NULL;
    567
    568	if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
    569		     (type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
    570		     mr_pd(mr) != pd || (access && !(access & mr->access)) ||
    571		     mr->state != RXE_MR_STATE_VALID)) {
    572		rxe_put(mr);
    573		mr = NULL;
    574	}
    575
    576	return mr;
    577}
    578
    579int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
    580{
    581	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
    582	struct rxe_mr *mr;
    583	int ret;
    584
    585	mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
    586	if (!mr) {
    587		pr_err("%s: No MR for rkey %#x\n", __func__, rkey);
    588		ret = -EINVAL;
    589		goto err;
    590	}
    591
    592	if (rkey != mr->rkey) {
    593		pr_err("%s: rkey (%#x) doesn't match mr->rkey (%#x)\n",
    594			__func__, rkey, mr->rkey);
    595		ret = -EINVAL;
    596		goto err_drop_ref;
    597	}
    598
    599	if (atomic_read(&mr->num_mw) > 0) {
    600		pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
    601			__func__);
    602		ret = -EINVAL;
    603		goto err_drop_ref;
    604	}
    605
    606	if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) {
    607		pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type);
    608		ret = -EINVAL;
    609		goto err_drop_ref;
    610	}
    611
    612	mr->state = RXE_MR_STATE_FREE;
    613	ret = 0;
    614
    615err_drop_ref:
    616	rxe_put(mr);
    617err:
    618	return ret;
    619}
    620
    621/* user can (re)register fast MR by executing a REG_MR WQE.
    622 * user is expected to hold a reference on the ib mr until the
    623 * WQE completes.
    624 * Once a fast MR is created this is the only way to change the
    625 * private keys. It is the responsibility of the user to maintain
    626 * the ib mr keys in sync with rxe mr keys.
    627 */
    628int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
    629{
    630	struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
    631	u32 key = wqe->wr.wr.reg.key & 0xff;
    632	u32 access = wqe->wr.wr.reg.access;
    633	struct rxe_map_set *set;
    634
    635	/* user can only register MR in free state */
    636	if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
    637		pr_warn("%s: mr->lkey = 0x%x not free\n",
    638			__func__, mr->lkey);
    639		return -EINVAL;
    640	}
    641
    642	/* user can only register mr with qp in same protection domain */
    643	if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
    644		pr_warn("%s: qp->pd and mr->pd don't match\n",
    645			__func__);
    646		return -EINVAL;
    647	}
    648
    649	mr->access = access;
    650	mr->lkey = (mr->lkey & ~0xff) | key;
    651	mr->rkey = (access & IB_ACCESS_REMOTE) ? mr->lkey : 0;
    652	mr->state = RXE_MR_STATE_VALID;
    653
    654	set = mr->cur_map_set;
    655	mr->cur_map_set = mr->next_map_set;
    656	mr->cur_map_set->iova = wqe->wr.wr.reg.mr->iova;
    657	mr->next_map_set = set;
    658
    659	return 0;
    660}
    661
    662int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr)
    663{
    664	struct rxe_mr *mr = to_rmr(ibmr);
    665	struct rxe_map_set *set = mr->next_map_set;
    666	struct rxe_map *map;
    667	struct rxe_phys_buf *buf;
    668
    669	if (unlikely(set->nbuf == mr->num_buf))
    670		return -ENOMEM;
    671
    672	map = set->map[set->nbuf / RXE_BUF_PER_MAP];
    673	buf = &map->buf[set->nbuf % RXE_BUF_PER_MAP];
    674
    675	buf->addr = addr;
    676	buf->size = ibmr->page_size;
    677	set->nbuf++;
    678
    679	return 0;
    680}
    681
    682int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
    683{
    684	struct rxe_mr *mr = to_rmr(ibmr);
    685
    686	/* See IBA 10.6.7.2.6 */
    687	if (atomic_read(&mr->num_mw) > 0)
    688		return -EINVAL;
    689
    690	rxe_put(mr);
    691
    692	return 0;
    693}
    694
    695void rxe_mr_cleanup(struct rxe_pool_elem *elem)
    696{
    697	struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
    698
    699	rxe_put(mr_pd(mr));
    700
    701	ib_umem_release(mr->umem);
    702
    703	if (mr->cur_map_set)
    704		rxe_mr_free_map_set(mr->num_map, mr->cur_map_set);
    705
    706	if (mr->next_map_set)
    707		rxe_mr_free_map_set(mr->num_map, mr->next_map_set);
    708}