cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ib_frmr.c (12141B)


      1/*
      2 * Copyright (c) 2016 Oracle.  All rights reserved.
      3 *
      4 * This software is available to you under a choice of one of two
      5 * licenses.  You may choose to be licensed under the terms of the GNU
      6 * General Public License (GPL) Version 2, available from the file
      7 * COPYING in the main directory of this source tree, or the
      8 * OpenIB.org BSD license below:
      9 *
     10 *     Redistribution and use in source and binary forms, with or
     11 *     without modification, are permitted provided that the following
     12 *     conditions are met:
     13 *
     14 *      - Redistributions of source code must retain the above
     15 *        copyright notice, this list of conditions and the following
     16 *        disclaimer.
     17 *
     18 *      - Redistributions in binary form must reproduce the above
     19 *        copyright notice, this list of conditions and the following
     20 *        disclaimer in the documentation and/or other materials
     21 *        provided with the distribution.
     22 *
     23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     30 * SOFTWARE.
     31 */
     32
     33#include "ib_mr.h"
     34
     35static inline void
     36rds_transition_frwr_state(struct rds_ib_mr *ibmr,
     37			  enum rds_ib_fr_state old_state,
     38			  enum rds_ib_fr_state new_state)
     39{
     40	if (cmpxchg(&ibmr->u.frmr.fr_state,
     41		    old_state, new_state) == old_state &&
     42	    old_state == FRMR_IS_INUSE) {
     43		/* enforce order of ibmr->u.frmr.fr_state update
     44		 * before decrementing i_fastreg_inuse_count
     45		 */
     46		smp_mb__before_atomic();
     47		atomic_dec(&ibmr->ic->i_fastreg_inuse_count);
     48		if (waitqueue_active(&rds_ib_ring_empty_wait))
     49			wake_up(&rds_ib_ring_empty_wait);
     50	}
     51}
     52
     53static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
     54					   int npages)
     55{
     56	struct rds_ib_mr_pool *pool;
     57	struct rds_ib_mr *ibmr = NULL;
     58	struct rds_ib_frmr *frmr;
     59	int err = 0;
     60
     61	if (npages <= RDS_MR_8K_MSG_SIZE)
     62		pool = rds_ibdev->mr_8k_pool;
     63	else
     64		pool = rds_ibdev->mr_1m_pool;
     65
     66	ibmr = rds_ib_try_reuse_ibmr(pool);
     67	if (ibmr)
     68		return ibmr;
     69
     70	ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL,
     71			    rdsibdev_to_node(rds_ibdev));
     72	if (!ibmr) {
     73		err = -ENOMEM;
     74		goto out_no_cigar;
     75	}
     76
     77	frmr = &ibmr->u.frmr;
     78	frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG,
     79			 pool->max_pages);
     80	if (IS_ERR(frmr->mr)) {
     81		pr_warn("RDS/IB: %s failed to allocate MR", __func__);
     82		err = PTR_ERR(frmr->mr);
     83		goto out_no_cigar;
     84	}
     85
     86	ibmr->pool = pool;
     87	if (pool->pool_type == RDS_IB_MR_8K_POOL)
     88		rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
     89	else
     90		rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
     91
     92	if (atomic_read(&pool->item_count) > pool->max_items_soft)
     93		pool->max_items_soft = pool->max_items;
     94
     95	frmr->fr_state = FRMR_IS_FREE;
     96	init_waitqueue_head(&frmr->fr_inv_done);
     97	init_waitqueue_head(&frmr->fr_reg_done);
     98	return ibmr;
     99
    100out_no_cigar:
    101	kfree(ibmr);
    102	atomic_dec(&pool->item_count);
    103	return ERR_PTR(err);
    104}
    105
    106static void rds_ib_free_frmr(struct rds_ib_mr *ibmr, bool drop)
    107{
    108	struct rds_ib_mr_pool *pool = ibmr->pool;
    109
    110	if (drop)
    111		llist_add(&ibmr->llnode, &pool->drop_list);
    112	else
    113		llist_add(&ibmr->llnode, &pool->free_list);
    114	atomic_add(ibmr->sg_len, &pool->free_pinned);
    115	atomic_inc(&pool->dirty_count);
    116
    117	/* If we've pinned too many pages, request a flush */
    118	if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
    119	    atomic_read(&pool->dirty_count) >= pool->max_items / 5)
    120		queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
    121}
    122
    123static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
    124{
    125	struct rds_ib_frmr *frmr = &ibmr->u.frmr;
    126	struct ib_reg_wr reg_wr;
    127	int ret, off = 0;
    128
    129	while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
    130		atomic_inc(&ibmr->ic->i_fastreg_wrs);
    131		cpu_relax();
    132	}
    133
    134	ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len,
    135				&off, PAGE_SIZE);
    136	if (unlikely(ret != ibmr->sg_dma_len))
    137		return ret < 0 ? ret : -EINVAL;
    138
    139	if (cmpxchg(&frmr->fr_state,
    140		    FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE)
    141		return -EBUSY;
    142
    143	atomic_inc(&ibmr->ic->i_fastreg_inuse_count);
    144
    145	/* Perform a WR for the fast_reg_mr. Each individual page
    146	 * in the sg list is added to the fast reg page list and placed
    147	 * inside the fast_reg_mr WR.  The key used is a rolling 8bit
    148	 * counter, which should guarantee uniqueness.
    149	 */
    150	ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
    151	frmr->fr_reg = true;
    152
    153	memset(&reg_wr, 0, sizeof(reg_wr));
    154	reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
    155	reg_wr.wr.opcode = IB_WR_REG_MR;
    156	reg_wr.wr.num_sge = 0;
    157	reg_wr.mr = frmr->mr;
    158	reg_wr.key = frmr->mr->rkey;
    159	reg_wr.access = IB_ACCESS_LOCAL_WRITE |
    160			IB_ACCESS_REMOTE_READ |
    161			IB_ACCESS_REMOTE_WRITE;
    162	reg_wr.wr.send_flags = IB_SEND_SIGNALED;
    163
    164	ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, NULL);
    165	if (unlikely(ret)) {
    166		/* Failure here can be because of -ENOMEM as well */
    167		rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
    168
    169		atomic_inc(&ibmr->ic->i_fastreg_wrs);
    170		if (printk_ratelimit())
    171			pr_warn("RDS/IB: %s returned error(%d)\n",
    172				__func__, ret);
    173		goto out;
    174	}
    175
    176	/* Wait for the registration to complete in order to prevent an invalid
    177	 * access error resulting from a race between the memory region already
    178	 * being accessed while registration is still pending.
    179	 */
    180	wait_event(frmr->fr_reg_done, !frmr->fr_reg);
    181
    182out:
    183
    184	return ret;
    185}
    186
    187static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev,
    188			   struct rds_ib_mr_pool *pool,
    189			   struct rds_ib_mr *ibmr,
    190			   struct scatterlist *sg, unsigned int sg_len)
    191{
    192	struct ib_device *dev = rds_ibdev->dev;
    193	struct rds_ib_frmr *frmr = &ibmr->u.frmr;
    194	int i;
    195	u32 len;
    196	int ret = 0;
    197
    198	/* We want to teardown old ibmr values here and fill it up with
    199	 * new sg values
    200	 */
    201	rds_ib_teardown_mr(ibmr);
    202
    203	ibmr->sg = sg;
    204	ibmr->sg_len = sg_len;
    205	ibmr->sg_dma_len = 0;
    206	frmr->sg_byte_len = 0;
    207	WARN_ON(ibmr->sg_dma_len);
    208	ibmr->sg_dma_len = ib_dma_map_sg(dev, ibmr->sg, ibmr->sg_len,
    209					 DMA_BIDIRECTIONAL);
    210	if (unlikely(!ibmr->sg_dma_len)) {
    211		pr_warn("RDS/IB: %s failed!\n", __func__);
    212		return -EBUSY;
    213	}
    214
    215	frmr->sg_byte_len = 0;
    216	frmr->dma_npages = 0;
    217	len = 0;
    218
    219	ret = -EINVAL;
    220	for (i = 0; i < ibmr->sg_dma_len; ++i) {
    221		unsigned int dma_len = sg_dma_len(&ibmr->sg[i]);
    222		u64 dma_addr = sg_dma_address(&ibmr->sg[i]);
    223
    224		frmr->sg_byte_len += dma_len;
    225		if (dma_addr & ~PAGE_MASK) {
    226			if (i > 0)
    227				goto out_unmap;
    228			else
    229				++frmr->dma_npages;
    230		}
    231
    232		if ((dma_addr + dma_len) & ~PAGE_MASK) {
    233			if (i < ibmr->sg_dma_len - 1)
    234				goto out_unmap;
    235			else
    236				++frmr->dma_npages;
    237		}
    238
    239		len += dma_len;
    240	}
    241	frmr->dma_npages += len >> PAGE_SHIFT;
    242
    243	if (frmr->dma_npages > ibmr->pool->max_pages) {
    244		ret = -EMSGSIZE;
    245		goto out_unmap;
    246	}
    247
    248	ret = rds_ib_post_reg_frmr(ibmr);
    249	if (ret)
    250		goto out_unmap;
    251
    252	if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
    253		rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
    254	else
    255		rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
    256
    257	return ret;
    258
    259out_unmap:
    260	ib_dma_unmap_sg(rds_ibdev->dev, ibmr->sg, ibmr->sg_len,
    261			DMA_BIDIRECTIONAL);
    262	ibmr->sg_dma_len = 0;
    263	return ret;
    264}
    265
    266static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
    267{
    268	struct ib_send_wr *s_wr;
    269	struct rds_ib_frmr *frmr = &ibmr->u.frmr;
    270	struct rdma_cm_id *i_cm_id = ibmr->ic->i_cm_id;
    271	int ret = -EINVAL;
    272
    273	if (!i_cm_id || !i_cm_id->qp || !frmr->mr)
    274		goto out;
    275
    276	if (frmr->fr_state != FRMR_IS_INUSE)
    277		goto out;
    278
    279	while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
    280		atomic_inc(&ibmr->ic->i_fastreg_wrs);
    281		cpu_relax();
    282	}
    283
    284	frmr->fr_inv = true;
    285	s_wr = &frmr->fr_wr;
    286
    287	memset(s_wr, 0, sizeof(*s_wr));
    288	s_wr->wr_id = (unsigned long)(void *)ibmr;
    289	s_wr->opcode = IB_WR_LOCAL_INV;
    290	s_wr->ex.invalidate_rkey = frmr->mr->rkey;
    291	s_wr->send_flags = IB_SEND_SIGNALED;
    292
    293	ret = ib_post_send(i_cm_id->qp, s_wr, NULL);
    294	if (unlikely(ret)) {
    295		rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
    296		frmr->fr_inv = false;
    297		/* enforce order of frmr->fr_inv update
    298		 * before incrementing i_fastreg_wrs
    299		 */
    300		smp_mb__before_atomic();
    301		atomic_inc(&ibmr->ic->i_fastreg_wrs);
    302		pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
    303		goto out;
    304	}
    305
    306	/* Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition in order to
    307	 * 1) avoid a silly bouncing between "clean_list" and "drop_list"
    308	 *    triggered by function "rds_ib_reg_frmr" as it is releases frmr
    309	 *    regions whose state is not "FRMR_IS_FREE" right away.
    310	 * 2) prevents an invalid access error in a race
    311	 *    from a pending "IB_WR_LOCAL_INV" operation
    312	 *    with a teardown ("dma_unmap_sg", "put_page")
    313	 *    and de-registration ("ib_dereg_mr") of the corresponding
    314	 *    memory region.
    315	 */
    316	wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE);
    317
    318out:
    319	return ret;
    320}
    321
    322void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
    323{
    324	struct rds_ib_mr *ibmr = (void *)(unsigned long)wc->wr_id;
    325	struct rds_ib_frmr *frmr = &ibmr->u.frmr;
    326
    327	if (wc->status != IB_WC_SUCCESS) {
    328		rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
    329		if (rds_conn_up(ic->conn))
    330			rds_ib_conn_error(ic->conn,
    331					  "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n",
    332					  &ic->conn->c_laddr,
    333					  &ic->conn->c_faddr,
    334					  wc->status,
    335					  ib_wc_status_msg(wc->status),
    336					  wc->vendor_err);
    337	}
    338
    339	if (frmr->fr_inv) {
    340		rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_FREE);
    341		frmr->fr_inv = false;
    342		wake_up(&frmr->fr_inv_done);
    343	}
    344
    345	if (frmr->fr_reg) {
    346		frmr->fr_reg = false;
    347		wake_up(&frmr->fr_reg_done);
    348	}
    349
    350	/* enforce order of frmr->{fr_reg,fr_inv} update
    351	 * before incrementing i_fastreg_wrs
    352	 */
    353	smp_mb__before_atomic();
    354	atomic_inc(&ic->i_fastreg_wrs);
    355}
    356
    357void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
    358		       unsigned long *unpinned, unsigned int goal)
    359{
    360	struct rds_ib_mr *ibmr, *next;
    361	struct rds_ib_frmr *frmr;
    362	int ret = 0, ret2;
    363	unsigned int freed = *nfreed;
    364
    365	/* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
    366	list_for_each_entry(ibmr, list, unmap_list) {
    367		if (ibmr->sg_dma_len) {
    368			ret2 = rds_ib_post_inv(ibmr);
    369			if (ret2 && !ret)
    370				ret = ret2;
    371		}
    372	}
    373
    374	if (ret)
    375		pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
    376
    377	/* Now we can destroy the DMA mapping and unpin any pages */
    378	list_for_each_entry_safe(ibmr, next, list, unmap_list) {
    379		*unpinned += ibmr->sg_len;
    380		frmr = &ibmr->u.frmr;
    381		__rds_ib_teardown_mr(ibmr);
    382		if (freed < goal || frmr->fr_state == FRMR_IS_STALE) {
    383			/* Don't de-allocate if the MR is not free yet */
    384			if (frmr->fr_state == FRMR_IS_INUSE)
    385				continue;
    386
    387			if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
    388				rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
    389			else
    390				rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
    391			list_del(&ibmr->unmap_list);
    392			if (frmr->mr)
    393				ib_dereg_mr(frmr->mr);
    394			kfree(ibmr);
    395			freed++;
    396		}
    397	}
    398	*nfreed = freed;
    399}
    400
    401struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
    402				  struct rds_ib_connection *ic,
    403				  struct scatterlist *sg,
    404				  unsigned long nents, u32 *key)
    405{
    406	struct rds_ib_mr *ibmr = NULL;
    407	struct rds_ib_frmr *frmr;
    408	int ret;
    409
    410	if (!ic) {
    411		/* TODO: Add FRWR support for RDS_GET_MR using proxy qp*/
    412		return ERR_PTR(-EOPNOTSUPP);
    413	}
    414
    415	do {
    416		if (ibmr)
    417			rds_ib_free_frmr(ibmr, true);
    418		ibmr = rds_ib_alloc_frmr(rds_ibdev, nents);
    419		if (IS_ERR(ibmr))
    420			return ibmr;
    421		frmr = &ibmr->u.frmr;
    422	} while (frmr->fr_state != FRMR_IS_FREE);
    423
    424	ibmr->ic = ic;
    425	ibmr->device = rds_ibdev;
    426	ret = rds_ib_map_frmr(rds_ibdev, ibmr->pool, ibmr, sg, nents);
    427	if (ret == 0) {
    428		*key = frmr->mr->rkey;
    429	} else {
    430		rds_ib_free_frmr(ibmr, false);
    431		ibmr = ERR_PTR(ret);
    432	}
    433
    434	return ibmr;
    435}
    436
    437void rds_ib_free_frmr_list(struct rds_ib_mr *ibmr)
    438{
    439	struct rds_ib_mr_pool *pool = ibmr->pool;
    440	struct rds_ib_frmr *frmr = &ibmr->u.frmr;
    441
    442	if (frmr->fr_state == FRMR_IS_STALE)
    443		llist_add(&ibmr->llnode, &pool->drop_list);
    444	else
    445		llist_add(&ibmr->llnode, &pool->free_list);
    446}