cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

mr.c (60239B)


      1/*
      2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
      3 * Copyright (c) 2020, Intel Corporation. All rights reserved.
      4 *
      5 * This software is available to you under a choice of one of two
      6 * licenses.  You may choose to be licensed under the terms of the GNU
      7 * General Public License (GPL) Version 2, available from the file
      8 * COPYING in the main directory of this source tree, or the
      9 * OpenIB.org BSD license below:
     10 *
     11 *     Redistribution and use in source and binary forms, with or
     12 *     without modification, are permitted provided that the following
     13 *     conditions are met:
     14 *
     15 *      - Redistributions of source code must retain the above
     16 *        copyright notice, this list of conditions and the following
     17 *        disclaimer.
     18 *
     19 *      - Redistributions in binary form must reproduce the above
     20 *        copyright notice, this list of conditions and the following
     21 *        disclaimer in the documentation and/or other materials
     22 *        provided with the distribution.
     23 *
     24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     31 * SOFTWARE.
     32 */
     33
     34
     35#include <linux/kref.h>
     36#include <linux/random.h>
     37#include <linux/debugfs.h>
     38#include <linux/export.h>
     39#include <linux/delay.h>
     40#include <linux/dma-buf.h>
     41#include <linux/dma-resv.h>
     42#include <rdma/ib_umem.h>
     43#include <rdma/ib_umem_odp.h>
     44#include <rdma/ib_verbs.h>
     45#include "dm.h"
     46#include "mlx5_ib.h"
     47#include "umr.h"
     48
     49enum {
     50	MAX_PENDING_REG_MR = 8,
     51};
     52
     53#define MLX5_UMR_ALIGN 2048
     54
     55static void
     56create_mkey_callback(int status, struct mlx5_async_work *context);
     57static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
     58				     u64 iova, int access_flags,
     59				     unsigned int page_size, bool populate);
     60
     61static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
     62					  struct ib_pd *pd)
     63{
     64	struct mlx5_ib_dev *dev = to_mdev(pd->device);
     65
     66	MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
     67	MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
     68	MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
     69	MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
     70	MLX5_SET(mkc, mkc, lr, 1);
     71
     72	if ((acc & IB_ACCESS_RELAXED_ORDERING) &&
     73	    pcie_relaxed_ordering_enabled(dev->mdev->pdev)) {
     74		if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
     75			MLX5_SET(mkc, mkc, relaxed_ordering_write, 1);
     76		if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
     77			MLX5_SET(mkc, mkc, relaxed_ordering_read, 1);
     78	}
     79
     80	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
     81	MLX5_SET(mkc, mkc, qpn, 0xffffff);
     82	MLX5_SET64(mkc, mkc, start_addr, start_addr);
     83}
     84
     85static void assign_mkey_variant(struct mlx5_ib_dev *dev,
     86				struct mlx5_ib_mkey *mkey, u32 *in)
     87{
     88	u8 key = atomic_inc_return(&dev->mkey_var);
     89	void *mkc;
     90
     91	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
     92	MLX5_SET(mkc, mkc, mkey_7_0, key);
     93	mkey->key = key;
     94}
     95
     96static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev,
     97			       struct mlx5_ib_mkey *mkey, u32 *in, int inlen)
     98{
     99	int ret;
    100
    101	assign_mkey_variant(dev, mkey, in);
    102	ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen);
    103	if (!ret)
    104		init_waitqueue_head(&mkey->wait);
    105
    106	return ret;
    107}
    108
    109static int
    110mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
    111		       struct mlx5_ib_mkey *mkey,
    112		       struct mlx5_async_ctx *async_ctx,
    113		       u32 *in, int inlen, u32 *out, int outlen,
    114		       struct mlx5_async_work *context)
    115{
    116	MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
    117	assign_mkey_variant(dev, mkey, in);
    118	return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
    119				create_mkey_callback, context);
    120}
    121
    122static int mr_cache_max_order(struct mlx5_ib_dev *dev);
    123static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
    124
    125static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
    126{
    127	WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
    128
    129	return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key);
    130}
    131
    132static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out)
    133{
    134	if (status == -ENXIO) /* core driver is not available */
    135		return;
    136
    137	mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
    138	if (status != -EREMOTEIO) /* driver specific failure */
    139		return;
    140
    141	/* Failed in FW, print cmd out failure details */
    142	mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out);
    143}
    144
    145static void create_mkey_callback(int status, struct mlx5_async_work *context)
    146{
    147	struct mlx5_ib_mr *mr =
    148		container_of(context, struct mlx5_ib_mr, cb_work);
    149	struct mlx5_cache_ent *ent = mr->cache_ent;
    150	struct mlx5_ib_dev *dev = ent->dev;
    151	unsigned long flags;
    152
    153	if (status) {
    154		create_mkey_warn(dev, status, mr->out);
    155		kfree(mr);
    156		spin_lock_irqsave(&ent->lock, flags);
    157		ent->pending--;
    158		WRITE_ONCE(dev->fill_delay, 1);
    159		spin_unlock_irqrestore(&ent->lock, flags);
    160		mod_timer(&dev->delay_timer, jiffies + HZ);
    161		return;
    162	}
    163
    164	mr->mmkey.type = MLX5_MKEY_MR;
    165	mr->mmkey.key |= mlx5_idx_to_mkey(
    166		MLX5_GET(create_mkey_out, mr->out, mkey_index));
    167	init_waitqueue_head(&mr->mmkey.wait);
    168
    169	WRITE_ONCE(dev->cache.last_add, jiffies);
    170
    171	spin_lock_irqsave(&ent->lock, flags);
    172	list_add_tail(&mr->list, &ent->head);
    173	ent->available_mrs++;
    174	ent->total_mrs++;
    175	/* If we are doing fill_to_high_water then keep going. */
    176	queue_adjust_cache_locked(ent);
    177	ent->pending--;
    178	spin_unlock_irqrestore(&ent->lock, flags);
    179}
    180
    181static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs)
    182{
    183	int ret = 0;
    184
    185	switch (access_mode) {
    186	case MLX5_MKC_ACCESS_MODE_MTT:
    187		ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD /
    188						   sizeof(struct mlx5_mtt));
    189		break;
    190	case MLX5_MKC_ACCESS_MODE_KSM:
    191		ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD /
    192						   sizeof(struct mlx5_klm));
    193		break;
    194	default:
    195		WARN_ON(1);
    196	}
    197	return ret;
    198}
    199
    200static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
    201{
    202	struct mlx5_ib_mr *mr;
    203
    204	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
    205	if (!mr)
    206		return NULL;
    207	mr->cache_ent = ent;
    208
    209	set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
    210	MLX5_SET(mkc, mkc, free, 1);
    211	MLX5_SET(mkc, mkc, umr_en, 1);
    212	MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
    213	MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
    214
    215	MLX5_SET(mkc, mkc, translations_octword_size,
    216		 get_mkc_octo_size(ent->access_mode, ent->ndescs));
    217	MLX5_SET(mkc, mkc, log_page_size, ent->page);
    218	return mr;
    219}
    220
    221/* Asynchronously schedule new MRs to be populated in the cache. */
    222static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
    223{
    224	size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
    225	struct mlx5_ib_mr *mr;
    226	void *mkc;
    227	u32 *in;
    228	int err = 0;
    229	int i;
    230
    231	in = kzalloc(inlen, GFP_KERNEL);
    232	if (!in)
    233		return -ENOMEM;
    234
    235	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
    236	for (i = 0; i < num; i++) {
    237		mr = alloc_cache_mr(ent, mkc);
    238		if (!mr) {
    239			err = -ENOMEM;
    240			break;
    241		}
    242		spin_lock_irq(&ent->lock);
    243		if (ent->pending >= MAX_PENDING_REG_MR) {
    244			err = -EAGAIN;
    245			spin_unlock_irq(&ent->lock);
    246			kfree(mr);
    247			break;
    248		}
    249		ent->pending++;
    250		spin_unlock_irq(&ent->lock);
    251		err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey,
    252					     &ent->dev->async_ctx, in, inlen,
    253					     mr->out, sizeof(mr->out),
    254					     &mr->cb_work);
    255		if (err) {
    256			spin_lock_irq(&ent->lock);
    257			ent->pending--;
    258			spin_unlock_irq(&ent->lock);
    259			mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
    260			kfree(mr);
    261			break;
    262		}
    263	}
    264
    265	kfree(in);
    266	return err;
    267}
    268
    269/* Synchronously create a MR in the cache */
    270static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent)
    271{
    272	size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
    273	struct mlx5_ib_mr *mr;
    274	void *mkc;
    275	u32 *in;
    276	int err;
    277
    278	in = kzalloc(inlen, GFP_KERNEL);
    279	if (!in)
    280		return ERR_PTR(-ENOMEM);
    281	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
    282
    283	mr = alloc_cache_mr(ent, mkc);
    284	if (!mr) {
    285		err = -ENOMEM;
    286		goto free_in;
    287	}
    288
    289	err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey.key, in, inlen);
    290	if (err)
    291		goto free_mr;
    292
    293	init_waitqueue_head(&mr->mmkey.wait);
    294	mr->mmkey.type = MLX5_MKEY_MR;
    295	WRITE_ONCE(ent->dev->cache.last_add, jiffies);
    296	spin_lock_irq(&ent->lock);
    297	ent->total_mrs++;
    298	spin_unlock_irq(&ent->lock);
    299	kfree(in);
    300	return mr;
    301free_mr:
    302	kfree(mr);
    303free_in:
    304	kfree(in);
    305	return ERR_PTR(err);
    306}
    307
    308static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
    309{
    310	struct mlx5_ib_mr *mr;
    311
    312	lockdep_assert_held(&ent->lock);
    313	if (list_empty(&ent->head))
    314		return;
    315	mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
    316	list_del(&mr->list);
    317	ent->available_mrs--;
    318	ent->total_mrs--;
    319	spin_unlock_irq(&ent->lock);
    320	mlx5_core_destroy_mkey(ent->dev->mdev, mr->mmkey.key);
    321	kfree(mr);
    322	spin_lock_irq(&ent->lock);
    323}
    324
    325static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
    326				bool limit_fill)
    327{
    328	int err;
    329
    330	lockdep_assert_held(&ent->lock);
    331
    332	while (true) {
    333		if (limit_fill)
    334			target = ent->limit * 2;
    335		if (target == ent->available_mrs + ent->pending)
    336			return 0;
    337		if (target > ent->available_mrs + ent->pending) {
    338			u32 todo = target - (ent->available_mrs + ent->pending);
    339
    340			spin_unlock_irq(&ent->lock);
    341			err = add_keys(ent, todo);
    342			if (err == -EAGAIN)
    343				usleep_range(3000, 5000);
    344			spin_lock_irq(&ent->lock);
    345			if (err) {
    346				if (err != -EAGAIN)
    347					return err;
    348			} else
    349				return 0;
    350		} else {
    351			remove_cache_mr_locked(ent);
    352		}
    353	}
    354}
    355
    356static ssize_t size_write(struct file *filp, const char __user *buf,
    357			  size_t count, loff_t *pos)
    358{
    359	struct mlx5_cache_ent *ent = filp->private_data;
    360	u32 target;
    361	int err;
    362
    363	err = kstrtou32_from_user(buf, count, 0, &target);
    364	if (err)
    365		return err;
    366
    367	/*
    368	 * Target is the new value of total_mrs the user requests, however we
    369	 * cannot free MRs that are in use. Compute the target value for
    370	 * available_mrs.
    371	 */
    372	spin_lock_irq(&ent->lock);
    373	if (target < ent->total_mrs - ent->available_mrs) {
    374		err = -EINVAL;
    375		goto err_unlock;
    376	}
    377	target = target - (ent->total_mrs - ent->available_mrs);
    378	if (target < ent->limit || target > ent->limit*2) {
    379		err = -EINVAL;
    380		goto err_unlock;
    381	}
    382	err = resize_available_mrs(ent, target, false);
    383	if (err)
    384		goto err_unlock;
    385	spin_unlock_irq(&ent->lock);
    386
    387	return count;
    388
    389err_unlock:
    390	spin_unlock_irq(&ent->lock);
    391	return err;
    392}
    393
    394static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
    395			 loff_t *pos)
    396{
    397	struct mlx5_cache_ent *ent = filp->private_data;
    398	char lbuf[20];
    399	int err;
    400
    401	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs);
    402	if (err < 0)
    403		return err;
    404
    405	return simple_read_from_buffer(buf, count, pos, lbuf, err);
    406}
    407
    408static const struct file_operations size_fops = {
    409	.owner	= THIS_MODULE,
    410	.open	= simple_open,
    411	.write	= size_write,
    412	.read	= size_read,
    413};
    414
    415static ssize_t limit_write(struct file *filp, const char __user *buf,
    416			   size_t count, loff_t *pos)
    417{
    418	struct mlx5_cache_ent *ent = filp->private_data;
    419	u32 var;
    420	int err;
    421
    422	err = kstrtou32_from_user(buf, count, 0, &var);
    423	if (err)
    424		return err;
    425
    426	/*
    427	 * Upon set we immediately fill the cache to high water mark implied by
    428	 * the limit.
    429	 */
    430	spin_lock_irq(&ent->lock);
    431	ent->limit = var;
    432	err = resize_available_mrs(ent, 0, true);
    433	spin_unlock_irq(&ent->lock);
    434	if (err)
    435		return err;
    436	return count;
    437}
    438
    439static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
    440			  loff_t *pos)
    441{
    442	struct mlx5_cache_ent *ent = filp->private_data;
    443	char lbuf[20];
    444	int err;
    445
    446	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
    447	if (err < 0)
    448		return err;
    449
    450	return simple_read_from_buffer(buf, count, pos, lbuf, err);
    451}
    452
    453static const struct file_operations limit_fops = {
    454	.owner	= THIS_MODULE,
    455	.open	= simple_open,
    456	.write	= limit_write,
    457	.read	= limit_read,
    458};
    459
    460static bool someone_adding(struct mlx5_mr_cache *cache)
    461{
    462	unsigned int i;
    463
    464	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
    465		struct mlx5_cache_ent *ent = &cache->ent[i];
    466		bool ret;
    467
    468		spin_lock_irq(&ent->lock);
    469		ret = ent->available_mrs < ent->limit;
    470		spin_unlock_irq(&ent->lock);
    471		if (ret)
    472			return true;
    473	}
    474	return false;
    475}
    476
    477/*
    478 * Check if the bucket is outside the high/low water mark and schedule an async
    479 * update. The cache refill has hysteresis, once the low water mark is hit it is
    480 * refilled up to the high mark.
    481 */
    482static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
    483{
    484	lockdep_assert_held(&ent->lock);
    485
    486	if (ent->disabled || READ_ONCE(ent->dev->fill_delay))
    487		return;
    488	if (ent->available_mrs < ent->limit) {
    489		ent->fill_to_high_water = true;
    490		mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
    491	} else if (ent->fill_to_high_water &&
    492		   ent->available_mrs + ent->pending < 2 * ent->limit) {
    493		/*
    494		 * Once we start populating due to hitting a low water mark
    495		 * continue until we pass the high water mark.
    496		 */
    497		mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
    498	} else if (ent->available_mrs == 2 * ent->limit) {
    499		ent->fill_to_high_water = false;
    500	} else if (ent->available_mrs > 2 * ent->limit) {
    501		/* Queue deletion of excess entries */
    502		ent->fill_to_high_water = false;
    503		if (ent->pending)
    504			queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
    505					   msecs_to_jiffies(1000));
    506		else
    507			mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
    508	}
    509}
    510
    511static void __cache_work_func(struct mlx5_cache_ent *ent)
    512{
    513	struct mlx5_ib_dev *dev = ent->dev;
    514	struct mlx5_mr_cache *cache = &dev->cache;
    515	int err;
    516
    517	spin_lock_irq(&ent->lock);
    518	if (ent->disabled)
    519		goto out;
    520
    521	if (ent->fill_to_high_water &&
    522	    ent->available_mrs + ent->pending < 2 * ent->limit &&
    523	    !READ_ONCE(dev->fill_delay)) {
    524		spin_unlock_irq(&ent->lock);
    525		err = add_keys(ent, 1);
    526		spin_lock_irq(&ent->lock);
    527		if (ent->disabled)
    528			goto out;
    529		if (err) {
    530			/*
    531			 * EAGAIN only happens if pending is positive, so we
    532			 * will be rescheduled from reg_mr_callback(). The only
    533			 * failure path here is ENOMEM.
    534			 */
    535			if (err != -EAGAIN) {
    536				mlx5_ib_warn(
    537					dev,
    538					"command failed order %d, err %d\n",
    539					ent->order, err);
    540				queue_delayed_work(cache->wq, &ent->dwork,
    541						   msecs_to_jiffies(1000));
    542			}
    543		}
    544	} else if (ent->available_mrs > 2 * ent->limit) {
    545		bool need_delay;
    546
    547		/*
    548		 * The remove_cache_mr() logic is performed as garbage
    549		 * collection task. Such task is intended to be run when no
    550		 * other active processes are running.
    551		 *
    552		 * The need_resched() will return TRUE if there are user tasks
    553		 * to be activated in near future.
    554		 *
    555		 * In such case, we don't execute remove_cache_mr() and postpone
    556		 * the garbage collection work to try to run in next cycle, in
    557		 * order to free CPU resources to other tasks.
    558		 */
    559		spin_unlock_irq(&ent->lock);
    560		need_delay = need_resched() || someone_adding(cache) ||
    561			     !time_after(jiffies,
    562					 READ_ONCE(cache->last_add) + 300 * HZ);
    563		spin_lock_irq(&ent->lock);
    564		if (ent->disabled)
    565			goto out;
    566		if (need_delay) {
    567			queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
    568			goto out;
    569		}
    570		remove_cache_mr_locked(ent);
    571		queue_adjust_cache_locked(ent);
    572	}
    573out:
    574	spin_unlock_irq(&ent->lock);
    575}
    576
    577static void delayed_cache_work_func(struct work_struct *work)
    578{
    579	struct mlx5_cache_ent *ent;
    580
    581	ent = container_of(work, struct mlx5_cache_ent, dwork.work);
    582	__cache_work_func(ent);
    583}
    584
    585struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
    586				       struct mlx5_cache_ent *ent,
    587				       int access_flags)
    588{
    589	struct mlx5_ib_mr *mr;
    590
    591	/* Matches access in alloc_cache_mr() */
    592	if (!mlx5r_umr_can_reconfig(dev, 0, access_flags))
    593		return ERR_PTR(-EOPNOTSUPP);
    594
    595	spin_lock_irq(&ent->lock);
    596	if (list_empty(&ent->head)) {
    597		queue_adjust_cache_locked(ent);
    598		ent->miss++;
    599		spin_unlock_irq(&ent->lock);
    600		mr = create_cache_mr(ent);
    601		if (IS_ERR(mr))
    602			return mr;
    603	} else {
    604		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
    605		list_del(&mr->list);
    606		ent->available_mrs--;
    607		queue_adjust_cache_locked(ent);
    608		spin_unlock_irq(&ent->lock);
    609
    610		mlx5_clear_mr(mr);
    611	}
    612	return mr;
    613}
    614
    615static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
    616{
    617	struct mlx5_cache_ent *ent = mr->cache_ent;
    618
    619	WRITE_ONCE(dev->cache.last_add, jiffies);
    620	spin_lock_irq(&ent->lock);
    621	list_add_tail(&mr->list, &ent->head);
    622	ent->available_mrs++;
    623	queue_adjust_cache_locked(ent);
    624	spin_unlock_irq(&ent->lock);
    625}
    626
    627static void clean_keys(struct mlx5_ib_dev *dev, int c)
    628{
    629	struct mlx5_mr_cache *cache = &dev->cache;
    630	struct mlx5_cache_ent *ent = &cache->ent[c];
    631	struct mlx5_ib_mr *tmp_mr;
    632	struct mlx5_ib_mr *mr;
    633	LIST_HEAD(del_list);
    634
    635	cancel_delayed_work(&ent->dwork);
    636	while (1) {
    637		spin_lock_irq(&ent->lock);
    638		if (list_empty(&ent->head)) {
    639			spin_unlock_irq(&ent->lock);
    640			break;
    641		}
    642		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
    643		list_move(&mr->list, &del_list);
    644		ent->available_mrs--;
    645		ent->total_mrs--;
    646		spin_unlock_irq(&ent->lock);
    647		mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key);
    648	}
    649
    650	list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
    651		list_del(&mr->list);
    652		kfree(mr);
    653	}
    654}
    655
    656static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
    657{
    658	if (!mlx5_debugfs_root || dev->is_rep)
    659		return;
    660
    661	debugfs_remove_recursive(dev->cache.root);
    662	dev->cache.root = NULL;
    663}
    664
    665static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
    666{
    667	struct mlx5_mr_cache *cache = &dev->cache;
    668	struct mlx5_cache_ent *ent;
    669	struct dentry *dir;
    670	int i;
    671
    672	if (!mlx5_debugfs_root || dev->is_rep)
    673		return;
    674
    675	cache->root = debugfs_create_dir("mr_cache", mlx5_debugfs_get_dev_root(dev->mdev));
    676
    677	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
    678		ent = &cache->ent[i];
    679		sprintf(ent->name, "%d", ent->order);
    680		dir = debugfs_create_dir(ent->name, cache->root);
    681		debugfs_create_file("size", 0600, dir, ent, &size_fops);
    682		debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
    683		debugfs_create_u32("cur", 0400, dir, &ent->available_mrs);
    684		debugfs_create_u32("miss", 0600, dir, &ent->miss);
    685	}
    686}
    687
    688static void delay_time_func(struct timer_list *t)
    689{
    690	struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
    691
    692	WRITE_ONCE(dev->fill_delay, 0);
    693}
    694
    695int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
    696{
    697	struct mlx5_mr_cache *cache = &dev->cache;
    698	struct mlx5_cache_ent *ent;
    699	int i;
    700
    701	mutex_init(&dev->slow_path_mutex);
    702	cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
    703	if (!cache->wq) {
    704		mlx5_ib_warn(dev, "failed to create work queue\n");
    705		return -ENOMEM;
    706	}
    707
    708	mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
    709	timer_setup(&dev->delay_timer, delay_time_func, 0);
    710	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
    711		ent = &cache->ent[i];
    712		INIT_LIST_HEAD(&ent->head);
    713		spin_lock_init(&ent->lock);
    714		ent->order = i + 2;
    715		ent->dev = dev;
    716		ent->limit = 0;
    717
    718		INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
    719
    720		if (i > MR_CACHE_LAST_STD_ENTRY) {
    721			mlx5_odp_init_mr_cache_entry(ent);
    722			continue;
    723		}
    724
    725		if (ent->order > mr_cache_max_order(dev))
    726			continue;
    727
    728		ent->page = PAGE_SHIFT;
    729		ent->ndescs = 1 << ent->order;
    730		ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
    731		if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
    732		    !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
    733		    mlx5r_umr_can_load_pas(dev, 0))
    734			ent->limit = dev->mdev->profile.mr_cache[i].limit;
    735		else
    736			ent->limit = 0;
    737		spin_lock_irq(&ent->lock);
    738		queue_adjust_cache_locked(ent);
    739		spin_unlock_irq(&ent->lock);
    740	}
    741
    742	mlx5_mr_cache_debugfs_init(dev);
    743
    744	return 0;
    745}
    746
    747int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
    748{
    749	unsigned int i;
    750
    751	if (!dev->cache.wq)
    752		return 0;
    753
    754	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
    755		struct mlx5_cache_ent *ent = &dev->cache.ent[i];
    756
    757		spin_lock_irq(&ent->lock);
    758		ent->disabled = true;
    759		spin_unlock_irq(&ent->lock);
    760		cancel_delayed_work_sync(&ent->dwork);
    761	}
    762
    763	mlx5_mr_cache_debugfs_cleanup(dev);
    764	mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
    765
    766	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
    767		clean_keys(dev, i);
    768
    769	destroy_workqueue(dev->cache.wq);
    770	del_timer_sync(&dev->delay_timer);
    771
    772	return 0;
    773}
    774
    775struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
    776{
    777	struct mlx5_ib_dev *dev = to_mdev(pd->device);
    778	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
    779	struct mlx5_ib_mr *mr;
    780	void *mkc;
    781	u32 *in;
    782	int err;
    783
    784	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
    785	if (!mr)
    786		return ERR_PTR(-ENOMEM);
    787
    788	in = kzalloc(inlen, GFP_KERNEL);
    789	if (!in) {
    790		err = -ENOMEM;
    791		goto err_free;
    792	}
    793
    794	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
    795
    796	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
    797	MLX5_SET(mkc, mkc, length64, 1);
    798	set_mkc_access_pd_addr_fields(mkc, acc | IB_ACCESS_RELAXED_ORDERING, 0,
    799				      pd);
    800
    801	err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
    802	if (err)
    803		goto err_in;
    804
    805	kfree(in);
    806	mr->mmkey.type = MLX5_MKEY_MR;
    807	mr->ibmr.lkey = mr->mmkey.key;
    808	mr->ibmr.rkey = mr->mmkey.key;
    809	mr->umem = NULL;
    810
    811	return &mr->ibmr;
    812
    813err_in:
    814	kfree(in);
    815
    816err_free:
    817	kfree(mr);
    818
    819	return ERR_PTR(err);
    820}
    821
    822static int get_octo_len(u64 addr, u64 len, int page_shift)
    823{
    824	u64 page_size = 1ULL << page_shift;
    825	u64 offset;
    826	int npages;
    827
    828	offset = addr & (page_size - 1);
    829	npages = ALIGN(len + offset, page_size) >> page_shift;
    830	return (npages + 1) / 2;
    831}
    832
    833static int mr_cache_max_order(struct mlx5_ib_dev *dev)
    834{
    835	if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
    836		return MR_CACHE_LAST_STD_ENTRY + 2;
    837	return MLX5_MAX_UMR_SHIFT;
    838}
    839
    840static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
    841						      unsigned int order)
    842{
    843	struct mlx5_mr_cache *cache = &dev->cache;
    844
    845	if (order < cache->ent[0].order)
    846		return &cache->ent[0];
    847	order = order - cache->ent[0].order;
    848	if (order > MR_CACHE_LAST_STD_ENTRY)
    849		return NULL;
    850	return &cache->ent[order];
    851}
    852
    853static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
    854			  u64 length, int access_flags, u64 iova)
    855{
    856	mr->ibmr.lkey = mr->mmkey.key;
    857	mr->ibmr.rkey = mr->mmkey.key;
    858	mr->ibmr.length = length;
    859	mr->ibmr.device = &dev->ib_dev;
    860	mr->ibmr.iova = iova;
    861	mr->access_flags = access_flags;
    862}
    863
    864static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem,
    865						  u64 iova)
    866{
    867	/*
    868	 * The alignment of iova has already been checked upon entering
    869	 * UVERBS_METHOD_REG_DMABUF_MR
    870	 */
    871	umem->iova = iova;
    872	return PAGE_SIZE;
    873}
    874
    875static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
    876					     struct ib_umem *umem, u64 iova,
    877					     int access_flags)
    878{
    879	struct mlx5_ib_dev *dev = to_mdev(pd->device);
    880	struct mlx5_cache_ent *ent;
    881	struct mlx5_ib_mr *mr;
    882	unsigned int page_size;
    883
    884	if (umem->is_dmabuf)
    885		page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
    886	else
    887		page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size,
    888						     0, iova);
    889	if (WARN_ON(!page_size))
    890		return ERR_PTR(-EINVAL);
    891	ent = mr_cache_ent_from_order(
    892		dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
    893	/*
    894	 * Matches access in alloc_cache_mr(). If the MR can't come from the
    895	 * cache then synchronously create an uncached one.
    896	 */
    897	if (!ent || ent->limit == 0 ||
    898	    !mlx5r_umr_can_reconfig(dev, 0, access_flags)) {
    899		mutex_lock(&dev->slow_path_mutex);
    900		mr = reg_create(pd, umem, iova, access_flags, page_size, false);
    901		mutex_unlock(&dev->slow_path_mutex);
    902		return mr;
    903	}
    904
    905	mr = mlx5_mr_cache_alloc(dev, ent, access_flags);
    906	if (IS_ERR(mr))
    907		return mr;
    908
    909	mr->ibmr.pd = pd;
    910	mr->umem = umem;
    911	mr->page_shift = order_base_2(page_size);
    912	set_mr_fields(dev, mr, umem->length, access_flags, iova);
    913
    914	return mr;
    915}
    916
    917/*
    918 * If ibmr is NULL it will be allocated by reg_create.
    919 * Else, the given ibmr will be used.
    920 */
    921static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
    922				     u64 iova, int access_flags,
    923				     unsigned int page_size, bool populate)
    924{
    925	struct mlx5_ib_dev *dev = to_mdev(pd->device);
    926	struct mlx5_ib_mr *mr;
    927	__be64 *pas;
    928	void *mkc;
    929	int inlen;
    930	u32 *in;
    931	int err;
    932	bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
    933
    934	if (!page_size)
    935		return ERR_PTR(-EINVAL);
    936	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
    937	if (!mr)
    938		return ERR_PTR(-ENOMEM);
    939
    940	mr->ibmr.pd = pd;
    941	mr->access_flags = access_flags;
    942	mr->page_shift = order_base_2(page_size);
    943
    944	inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
    945	if (populate)
    946		inlen += sizeof(*pas) *
    947			 roundup(ib_umem_num_dma_blocks(umem, page_size), 2);
    948	in = kvzalloc(inlen, GFP_KERNEL);
    949	if (!in) {
    950		err = -ENOMEM;
    951		goto err_1;
    952	}
    953	pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
    954	if (populate) {
    955		if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) {
    956			err = -EINVAL;
    957			goto err_2;
    958		}
    959		mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas,
    960				     pg_cap ? MLX5_IB_MTT_PRESENT : 0);
    961	}
    962
    963	/* The pg_access bit allows setting the access flags
    964	 * in the page list submitted with the command. */
    965	MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
    966
    967	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
    968	set_mkc_access_pd_addr_fields(mkc, access_flags, iova,
    969				      populate ? pd : dev->umrc.pd);
    970	MLX5_SET(mkc, mkc, free, !populate);
    971	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
    972	MLX5_SET(mkc, mkc, umr_en, 1);
    973
    974	MLX5_SET64(mkc, mkc, len, umem->length);
    975	MLX5_SET(mkc, mkc, bsf_octword_size, 0);
    976	MLX5_SET(mkc, mkc, translations_octword_size,
    977		 get_octo_len(iova, umem->length, mr->page_shift));
    978	MLX5_SET(mkc, mkc, log_page_size, mr->page_shift);
    979	if (populate) {
    980		MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
    981			 get_octo_len(iova, umem->length, mr->page_shift));
    982	}
    983
    984	err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
    985	if (err) {
    986		mlx5_ib_warn(dev, "create mkey failed\n");
    987		goto err_2;
    988	}
    989	mr->mmkey.type = MLX5_MKEY_MR;
    990	mr->umem = umem;
    991	set_mr_fields(dev, mr, umem->length, access_flags, iova);
    992	kvfree(in);
    993
    994	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
    995
    996	return mr;
    997
    998err_2:
    999	kvfree(in);
   1000err_1:
   1001	kfree(mr);
   1002	return ERR_PTR(err);
   1003}
   1004
   1005static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
   1006				       u64 length, int acc, int mode)
   1007{
   1008	struct mlx5_ib_dev *dev = to_mdev(pd->device);
   1009	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
   1010	struct mlx5_ib_mr *mr;
   1011	void *mkc;
   1012	u32 *in;
   1013	int err;
   1014
   1015	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
   1016	if (!mr)
   1017		return ERR_PTR(-ENOMEM);
   1018
   1019	in = kzalloc(inlen, GFP_KERNEL);
   1020	if (!in) {
   1021		err = -ENOMEM;
   1022		goto err_free;
   1023	}
   1024
   1025	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
   1026
   1027	MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
   1028	MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
   1029	MLX5_SET64(mkc, mkc, len, length);
   1030	set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd);
   1031
   1032	err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
   1033	if (err)
   1034		goto err_in;
   1035
   1036	kfree(in);
   1037
   1038	set_mr_fields(dev, mr, length, acc, start_addr);
   1039
   1040	return &mr->ibmr;
   1041
   1042err_in:
   1043	kfree(in);
   1044
   1045err_free:
   1046	kfree(mr);
   1047
   1048	return ERR_PTR(err);
   1049}
   1050
   1051int mlx5_ib_advise_mr(struct ib_pd *pd,
   1052		      enum ib_uverbs_advise_mr_advice advice,
   1053		      u32 flags,
   1054		      struct ib_sge *sg_list,
   1055		      u32 num_sge,
   1056		      struct uverbs_attr_bundle *attrs)
   1057{
   1058	if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
   1059	    advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
   1060	    advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
   1061		return -EOPNOTSUPP;
   1062
   1063	return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
   1064					 sg_list, num_sge);
   1065}
   1066
   1067struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
   1068				struct ib_dm_mr_attr *attr,
   1069				struct uverbs_attr_bundle *attrs)
   1070{
   1071	struct mlx5_ib_dm *mdm = to_mdm(dm);
   1072	struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev;
   1073	u64 start_addr = mdm->dev_addr + attr->offset;
   1074	int mode;
   1075
   1076	switch (mdm->type) {
   1077	case MLX5_IB_UAPI_DM_TYPE_MEMIC:
   1078		if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS)
   1079			return ERR_PTR(-EINVAL);
   1080
   1081		mode = MLX5_MKC_ACCESS_MODE_MEMIC;
   1082		start_addr -= pci_resource_start(dev->pdev, 0);
   1083		break;
   1084	case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
   1085	case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
   1086		if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
   1087			return ERR_PTR(-EINVAL);
   1088
   1089		mode = MLX5_MKC_ACCESS_MODE_SW_ICM;
   1090		break;
   1091	default:
   1092		return ERR_PTR(-EINVAL);
   1093	}
   1094
   1095	return mlx5_ib_get_dm_mr(pd, start_addr, attr->length,
   1096				 attr->access_flags, mode);
   1097}
   1098
   1099static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
   1100				    u64 iova, int access_flags)
   1101{
   1102	struct mlx5_ib_dev *dev = to_mdev(pd->device);
   1103	struct mlx5_ib_mr *mr = NULL;
   1104	bool xlt_with_umr;
   1105	int err;
   1106
   1107	xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length);
   1108	if (xlt_with_umr) {
   1109		mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
   1110	} else {
   1111		unsigned int page_size = mlx5_umem_find_best_pgsz(
   1112			umem, mkc, log_page_size, 0, iova);
   1113
   1114		mutex_lock(&dev->slow_path_mutex);
   1115		mr = reg_create(pd, umem, iova, access_flags, page_size, true);
   1116		mutex_unlock(&dev->slow_path_mutex);
   1117	}
   1118	if (IS_ERR(mr)) {
   1119		ib_umem_release(umem);
   1120		return ERR_CAST(mr);
   1121	}
   1122
   1123	mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
   1124
   1125	atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
   1126
   1127	if (xlt_with_umr) {
   1128		/*
   1129		 * If the MR was created with reg_create then it will be
   1130		 * configured properly but left disabled. It is safe to go ahead
   1131		 * and configure it again via UMR while enabling it.
   1132		 */
   1133		err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
   1134		if (err) {
   1135			mlx5_ib_dereg_mr(&mr->ibmr, NULL);
   1136			return ERR_PTR(err);
   1137		}
   1138	}
   1139	return &mr->ibmr;
   1140}
   1141
   1142static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
   1143					u64 iova, int access_flags,
   1144					struct ib_udata *udata)
   1145{
   1146	struct mlx5_ib_dev *dev = to_mdev(pd->device);
   1147	struct ib_umem_odp *odp;
   1148	struct mlx5_ib_mr *mr;
   1149	int err;
   1150
   1151	if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
   1152		return ERR_PTR(-EOPNOTSUPP);
   1153
   1154	err = mlx5r_odp_create_eq(dev, &dev->odp_pf_eq);
   1155	if (err)
   1156		return ERR_PTR(err);
   1157	if (!start && length == U64_MAX) {
   1158		if (iova != 0)
   1159			return ERR_PTR(-EINVAL);
   1160		if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
   1161			return ERR_PTR(-EINVAL);
   1162
   1163		mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
   1164		if (IS_ERR(mr))
   1165			return ERR_CAST(mr);
   1166		return &mr->ibmr;
   1167	}
   1168
   1169	/* ODP requires xlt update via umr to work. */
   1170	if (!mlx5r_umr_can_load_pas(dev, length))
   1171		return ERR_PTR(-EINVAL);
   1172
   1173	odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
   1174			      &mlx5_mn_ops);
   1175	if (IS_ERR(odp))
   1176		return ERR_CAST(odp);
   1177
   1178	mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags);
   1179	if (IS_ERR(mr)) {
   1180		ib_umem_release(&odp->umem);
   1181		return ERR_CAST(mr);
   1182	}
   1183	xa_init(&mr->implicit_children);
   1184
   1185	odp->private = mr;
   1186	err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
   1187	if (err)
   1188		goto err_dereg_mr;
   1189
   1190	err = mlx5_ib_init_odp_mr(mr);
   1191	if (err)
   1192		goto err_dereg_mr;
   1193	return &mr->ibmr;
   1194
   1195err_dereg_mr:
   1196	mlx5_ib_dereg_mr(&mr->ibmr, NULL);
   1197	return ERR_PTR(err);
   1198}
   1199
   1200struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
   1201				  u64 iova, int access_flags,
   1202				  struct ib_udata *udata)
   1203{
   1204	struct mlx5_ib_dev *dev = to_mdev(pd->device);
   1205	struct ib_umem *umem;
   1206
   1207	if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
   1208		return ERR_PTR(-EOPNOTSUPP);
   1209
   1210	mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
   1211		    start, iova, length, access_flags);
   1212
   1213	if (access_flags & IB_ACCESS_ON_DEMAND)
   1214		return create_user_odp_mr(pd, start, length, iova, access_flags,
   1215					  udata);
   1216	umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
   1217	if (IS_ERR(umem))
   1218		return ERR_CAST(umem);
   1219	return create_real_mr(pd, umem, iova, access_flags);
   1220}
   1221
   1222static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
   1223{
   1224	struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv;
   1225	struct mlx5_ib_mr *mr = umem_dmabuf->private;
   1226
   1227	dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
   1228
   1229	if (!umem_dmabuf->sgt)
   1230		return;
   1231
   1232	mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
   1233	ib_umem_dmabuf_unmap_pages(umem_dmabuf);
   1234}
   1235
   1236static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = {
   1237	.allow_peer2peer = 1,
   1238	.move_notify = mlx5_ib_dmabuf_invalidate_cb,
   1239};
   1240
   1241struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
   1242					 u64 length, u64 virt_addr,
   1243					 int fd, int access_flags,
   1244					 struct ib_udata *udata)
   1245{
   1246	struct mlx5_ib_dev *dev = to_mdev(pd->device);
   1247	struct mlx5_ib_mr *mr = NULL;
   1248	struct ib_umem_dmabuf *umem_dmabuf;
   1249	int err;
   1250
   1251	if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
   1252	    !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
   1253		return ERR_PTR(-EOPNOTSUPP);
   1254
   1255	mlx5_ib_dbg(dev,
   1256		    "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x\n",
   1257		    offset, virt_addr, length, fd, access_flags);
   1258
   1259	/* dmabuf requires xlt update via umr to work. */
   1260	if (!mlx5r_umr_can_load_pas(dev, length))
   1261		return ERR_PTR(-EINVAL);
   1262
   1263	umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd,
   1264					 access_flags,
   1265					 &mlx5_ib_dmabuf_attach_ops);
   1266	if (IS_ERR(umem_dmabuf)) {
   1267		mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n",
   1268			    PTR_ERR(umem_dmabuf));
   1269		return ERR_CAST(umem_dmabuf);
   1270	}
   1271
   1272	mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr,
   1273				access_flags);
   1274	if (IS_ERR(mr)) {
   1275		ib_umem_release(&umem_dmabuf->umem);
   1276		return ERR_CAST(mr);
   1277	}
   1278
   1279	mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
   1280
   1281	atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages);
   1282	umem_dmabuf->private = mr;
   1283	err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
   1284	if (err)
   1285		goto err_dereg_mr;
   1286
   1287	err = mlx5_ib_init_dmabuf_mr(mr);
   1288	if (err)
   1289		goto err_dereg_mr;
   1290	return &mr->ibmr;
   1291
   1292err_dereg_mr:
   1293	mlx5_ib_dereg_mr(&mr->ibmr, NULL);
   1294	return ERR_PTR(err);
   1295}
   1296
   1297/*
   1298 * True if the change in access flags can be done via UMR, only some access
   1299 * flags can be updated.
   1300 */
   1301static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
   1302				     unsigned int current_access_flags,
   1303				     unsigned int target_access_flags)
   1304{
   1305	unsigned int diffs = current_access_flags ^ target_access_flags;
   1306
   1307	if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
   1308		      IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING))
   1309		return false;
   1310	return mlx5r_umr_can_reconfig(dev, current_access_flags,
   1311				      target_access_flags);
   1312}
   1313
   1314static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
   1315				  struct ib_umem *new_umem,
   1316				  int new_access_flags, u64 iova,
   1317				  unsigned long *page_size)
   1318{
   1319	struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
   1320
   1321	/* We only track the allocated sizes of MRs from the cache */
   1322	if (!mr->cache_ent)
   1323		return false;
   1324	if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
   1325		return false;
   1326
   1327	*page_size =
   1328		mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova);
   1329	if (WARN_ON(!*page_size))
   1330		return false;
   1331	return (1ULL << mr->cache_ent->order) >=
   1332	       ib_umem_num_dma_blocks(new_umem, *page_size);
   1333}
   1334
   1335static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
   1336			 int access_flags, int flags, struct ib_umem *new_umem,
   1337			 u64 iova, unsigned long page_size)
   1338{
   1339	struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
   1340	int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE;
   1341	struct ib_umem *old_umem = mr->umem;
   1342	int err;
   1343
   1344	/*
   1345	 * To keep everything simple the MR is revoked before we start to mess
   1346	 * with it. This ensure the change is atomic relative to any use of the
   1347	 * MR.
   1348	 */
   1349	err = mlx5r_umr_revoke_mr(mr);
   1350	if (err)
   1351		return err;
   1352
   1353	if (flags & IB_MR_REREG_PD) {
   1354		mr->ibmr.pd = pd;
   1355		upd_flags |= MLX5_IB_UPD_XLT_PD;
   1356	}
   1357	if (flags & IB_MR_REREG_ACCESS) {
   1358		mr->access_flags = access_flags;
   1359		upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
   1360	}
   1361
   1362	mr->ibmr.length = new_umem->length;
   1363	mr->ibmr.iova = iova;
   1364	mr->ibmr.length = new_umem->length;
   1365	mr->page_shift = order_base_2(page_size);
   1366	mr->umem = new_umem;
   1367	err = mlx5r_umr_update_mr_pas(mr, upd_flags);
   1368	if (err) {
   1369		/*
   1370		 * The MR is revoked at this point so there is no issue to free
   1371		 * new_umem.
   1372		 */
   1373		mr->umem = old_umem;
   1374		return err;
   1375	}
   1376
   1377	atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages);
   1378	ib_umem_release(old_umem);
   1379	atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages);
   1380	return 0;
   1381}
   1382
   1383struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
   1384				    u64 length, u64 iova, int new_access_flags,
   1385				    struct ib_pd *new_pd,
   1386				    struct ib_udata *udata)
   1387{
   1388	struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
   1389	struct mlx5_ib_mr *mr = to_mmr(ib_mr);
   1390	int err;
   1391
   1392	if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
   1393		return ERR_PTR(-EOPNOTSUPP);
   1394
   1395	mlx5_ib_dbg(
   1396		dev,
   1397		"start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
   1398		start, iova, length, new_access_flags);
   1399
   1400	if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS))
   1401		return ERR_PTR(-EOPNOTSUPP);
   1402
   1403	if (!(flags & IB_MR_REREG_ACCESS))
   1404		new_access_flags = mr->access_flags;
   1405	if (!(flags & IB_MR_REREG_PD))
   1406		new_pd = ib_mr->pd;
   1407
   1408	if (!(flags & IB_MR_REREG_TRANS)) {
   1409		struct ib_umem *umem;
   1410
   1411		/* Fast path for PD/access change */
   1412		if (can_use_umr_rereg_access(dev, mr->access_flags,
   1413					     new_access_flags)) {
   1414			err = mlx5r_umr_rereg_pd_access(mr, new_pd,
   1415							new_access_flags);
   1416			if (err)
   1417				return ERR_PTR(err);
   1418			return NULL;
   1419		}
   1420		/* DM or ODP MR's don't have a normal umem so we can't re-use it */
   1421		if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr))
   1422			goto recreate;
   1423
   1424		/*
   1425		 * Only one active MR can refer to a umem at one time, revoke
   1426		 * the old MR before assigning the umem to the new one.
   1427		 */
   1428		err = mlx5r_umr_revoke_mr(mr);
   1429		if (err)
   1430			return ERR_PTR(err);
   1431		umem = mr->umem;
   1432		mr->umem = NULL;
   1433		atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
   1434
   1435		return create_real_mr(new_pd, umem, mr->ibmr.iova,
   1436				      new_access_flags);
   1437	}
   1438
   1439	/*
   1440	 * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does
   1441	 * but the logic around releasing the umem is different
   1442	 */
   1443	if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr))
   1444		goto recreate;
   1445
   1446	if (!(new_access_flags & IB_ACCESS_ON_DEMAND) &&
   1447	    can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) {
   1448		struct ib_umem *new_umem;
   1449		unsigned long page_size;
   1450
   1451		new_umem = ib_umem_get(&dev->ib_dev, start, length,
   1452				       new_access_flags);
   1453		if (IS_ERR(new_umem))
   1454			return ERR_CAST(new_umem);
   1455
   1456		/* Fast path for PAS change */
   1457		if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova,
   1458					  &page_size)) {
   1459			err = umr_rereg_pas(mr, new_pd, new_access_flags, flags,
   1460					    new_umem, iova, page_size);
   1461			if (err) {
   1462				ib_umem_release(new_umem);
   1463				return ERR_PTR(err);
   1464			}
   1465			return NULL;
   1466		}
   1467		return create_real_mr(new_pd, new_umem, iova, new_access_flags);
   1468	}
   1469
   1470	/*
   1471	 * Everything else has no state we can preserve, just create a new MR
   1472	 * from scratch
   1473	 */
   1474recreate:
   1475	return mlx5_ib_reg_user_mr(new_pd, start, length, iova,
   1476				   new_access_flags, udata);
   1477}
   1478
   1479static int
   1480mlx5_alloc_priv_descs(struct ib_device *device,
   1481		      struct mlx5_ib_mr *mr,
   1482		      int ndescs,
   1483		      int desc_size)
   1484{
   1485	struct mlx5_ib_dev *dev = to_mdev(device);
   1486	struct device *ddev = &dev->mdev->pdev->dev;
   1487	int size = ndescs * desc_size;
   1488	int add_size;
   1489	int ret;
   1490
   1491	add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
   1492
   1493	mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
   1494	if (!mr->descs_alloc)
   1495		return -ENOMEM;
   1496
   1497	mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
   1498
   1499	mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE);
   1500	if (dma_mapping_error(ddev, mr->desc_map)) {
   1501		ret = -ENOMEM;
   1502		goto err;
   1503	}
   1504
   1505	return 0;
   1506err:
   1507	kfree(mr->descs_alloc);
   1508
   1509	return ret;
   1510}
   1511
   1512static void
   1513mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
   1514{
   1515	if (!mr->umem && mr->descs) {
   1516		struct ib_device *device = mr->ibmr.device;
   1517		int size = mr->max_descs * mr->desc_size;
   1518		struct mlx5_ib_dev *dev = to_mdev(device);
   1519
   1520		dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
   1521				 DMA_TO_DEVICE);
   1522		kfree(mr->descs_alloc);
   1523		mr->descs = NULL;
   1524	}
   1525}
   1526
   1527int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
   1528{
   1529	struct mlx5_ib_mr *mr = to_mmr(ibmr);
   1530	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
   1531	int rc;
   1532
   1533	/*
   1534	 * Any async use of the mr must hold the refcount, once the refcount
   1535	 * goes to zero no other thread, such as ODP page faults, prefetch, any
   1536	 * UMR activity, etc can touch the mkey. Thus it is safe to destroy it.
   1537	 */
   1538	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) &&
   1539	    refcount_read(&mr->mmkey.usecount) != 0 &&
   1540	    xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)))
   1541		mlx5r_deref_wait_odp_mkey(&mr->mmkey);
   1542
   1543	if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
   1544		xa_cmpxchg(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
   1545			   mr->sig, NULL, GFP_KERNEL);
   1546
   1547		if (mr->mtt_mr) {
   1548			rc = mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL);
   1549			if (rc)
   1550				return rc;
   1551			mr->mtt_mr = NULL;
   1552		}
   1553		if (mr->klm_mr) {
   1554			rc = mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL);
   1555			if (rc)
   1556				return rc;
   1557			mr->klm_mr = NULL;
   1558		}
   1559
   1560		if (mlx5_core_destroy_psv(dev->mdev,
   1561					  mr->sig->psv_memory.psv_idx))
   1562			mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
   1563				     mr->sig->psv_memory.psv_idx);
   1564		if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
   1565			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
   1566				     mr->sig->psv_wire.psv_idx);
   1567		kfree(mr->sig);
   1568		mr->sig = NULL;
   1569	}
   1570
   1571	/* Stop DMA */
   1572	if (mr->cache_ent) {
   1573		if (mlx5r_umr_revoke_mr(mr)) {
   1574			spin_lock_irq(&mr->cache_ent->lock);
   1575			mr->cache_ent->total_mrs--;
   1576			spin_unlock_irq(&mr->cache_ent->lock);
   1577			mr->cache_ent = NULL;
   1578		}
   1579	}
   1580	if (!mr->cache_ent) {
   1581		rc = destroy_mkey(to_mdev(mr->ibmr.device), mr);
   1582		if (rc)
   1583			return rc;
   1584	}
   1585
   1586	if (mr->umem) {
   1587		bool is_odp = is_odp_mr(mr);
   1588
   1589		if (!is_odp)
   1590			atomic_sub(ib_umem_num_pages(mr->umem),
   1591				   &dev->mdev->priv.reg_pages);
   1592		ib_umem_release(mr->umem);
   1593		if (is_odp)
   1594			mlx5_ib_free_odp_mr(mr);
   1595	}
   1596
   1597	if (mr->cache_ent) {
   1598		mlx5_mr_cache_free(dev, mr);
   1599	} else {
   1600		mlx5_free_priv_descs(mr);
   1601		kfree(mr);
   1602	}
   1603	return 0;
   1604}
   1605
   1606static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
   1607				   int access_mode, int page_shift)
   1608{
   1609	void *mkc;
   1610
   1611	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
   1612
   1613	/* This is only used from the kernel, so setting the PD is OK. */
   1614	set_mkc_access_pd_addr_fields(mkc, IB_ACCESS_RELAXED_ORDERING, 0, pd);
   1615	MLX5_SET(mkc, mkc, free, 1);
   1616	MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
   1617	MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
   1618	MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
   1619	MLX5_SET(mkc, mkc, umr_en, 1);
   1620	MLX5_SET(mkc, mkc, log_page_size, page_shift);
   1621}
   1622
   1623static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
   1624				  int ndescs, int desc_size, int page_shift,
   1625				  int access_mode, u32 *in, int inlen)
   1626{
   1627	struct mlx5_ib_dev *dev = to_mdev(pd->device);
   1628	int err;
   1629
   1630	mr->access_mode = access_mode;
   1631	mr->desc_size = desc_size;
   1632	mr->max_descs = ndescs;
   1633
   1634	err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size);
   1635	if (err)
   1636		return err;
   1637
   1638	mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift);
   1639
   1640	err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
   1641	if (err)
   1642		goto err_free_descs;
   1643
   1644	mr->mmkey.type = MLX5_MKEY_MR;
   1645	mr->ibmr.lkey = mr->mmkey.key;
   1646	mr->ibmr.rkey = mr->mmkey.key;
   1647
   1648	return 0;
   1649
   1650err_free_descs:
   1651	mlx5_free_priv_descs(mr);
   1652	return err;
   1653}
   1654
   1655static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
   1656				u32 max_num_sg, u32 max_num_meta_sg,
   1657				int desc_size, int access_mode)
   1658{
   1659	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
   1660	int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4);
   1661	int page_shift = 0;
   1662	struct mlx5_ib_mr *mr;
   1663	u32 *in;
   1664	int err;
   1665
   1666	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
   1667	if (!mr)
   1668		return ERR_PTR(-ENOMEM);
   1669
   1670	mr->ibmr.pd = pd;
   1671	mr->ibmr.device = pd->device;
   1672
   1673	in = kzalloc(inlen, GFP_KERNEL);
   1674	if (!in) {
   1675		err = -ENOMEM;
   1676		goto err_free;
   1677	}
   1678
   1679	if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
   1680		page_shift = PAGE_SHIFT;
   1681
   1682	err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift,
   1683				     access_mode, in, inlen);
   1684	if (err)
   1685		goto err_free_in;
   1686
   1687	mr->umem = NULL;
   1688	kfree(in);
   1689
   1690	return mr;
   1691
   1692err_free_in:
   1693	kfree(in);
   1694err_free:
   1695	kfree(mr);
   1696	return ERR_PTR(err);
   1697}
   1698
   1699static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
   1700				    int ndescs, u32 *in, int inlen)
   1701{
   1702	return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt),
   1703				      PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in,
   1704				      inlen);
   1705}
   1706
   1707static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
   1708				    int ndescs, u32 *in, int inlen)
   1709{
   1710	return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm),
   1711				      0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
   1712}
   1713
   1714static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
   1715				      int max_num_sg, int max_num_meta_sg,
   1716				      u32 *in, int inlen)
   1717{
   1718	struct mlx5_ib_dev *dev = to_mdev(pd->device);
   1719	u32 psv_index[2];
   1720	void *mkc;
   1721	int err;
   1722
   1723	mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
   1724	if (!mr->sig)
   1725		return -ENOMEM;
   1726
   1727	/* create mem & wire PSVs */
   1728	err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index);
   1729	if (err)
   1730		goto err_free_sig;
   1731
   1732	mr->sig->psv_memory.psv_idx = psv_index[0];
   1733	mr->sig->psv_wire.psv_idx = psv_index[1];
   1734
   1735	mr->sig->sig_status_checked = true;
   1736	mr->sig->sig_err_exists = false;
   1737	/* Next UMR, Arm SIGERR */
   1738	++mr->sig->sigerr_count;
   1739	mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
   1740					 sizeof(struct mlx5_klm),
   1741					 MLX5_MKC_ACCESS_MODE_KLMS);
   1742	if (IS_ERR(mr->klm_mr)) {
   1743		err = PTR_ERR(mr->klm_mr);
   1744		goto err_destroy_psv;
   1745	}
   1746	mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
   1747					 sizeof(struct mlx5_mtt),
   1748					 MLX5_MKC_ACCESS_MODE_MTT);
   1749	if (IS_ERR(mr->mtt_mr)) {
   1750		err = PTR_ERR(mr->mtt_mr);
   1751		goto err_free_klm_mr;
   1752	}
   1753
   1754	/* Set bsf descriptors for mkey */
   1755	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
   1756	MLX5_SET(mkc, mkc, bsf_en, 1);
   1757	MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
   1758
   1759	err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0,
   1760				     MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
   1761	if (err)
   1762		goto err_free_mtt_mr;
   1763
   1764	err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
   1765			      mr->sig, GFP_KERNEL));
   1766	if (err)
   1767		goto err_free_descs;
   1768	return 0;
   1769
   1770err_free_descs:
   1771	destroy_mkey(dev, mr);
   1772	mlx5_free_priv_descs(mr);
   1773err_free_mtt_mr:
   1774	mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL);
   1775	mr->mtt_mr = NULL;
   1776err_free_klm_mr:
   1777	mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL);
   1778	mr->klm_mr = NULL;
   1779err_destroy_psv:
   1780	if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx))
   1781		mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
   1782			     mr->sig->psv_memory.psv_idx);
   1783	if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
   1784		mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
   1785			     mr->sig->psv_wire.psv_idx);
   1786err_free_sig:
   1787	kfree(mr->sig);
   1788
   1789	return err;
   1790}
   1791
   1792static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
   1793					enum ib_mr_type mr_type, u32 max_num_sg,
   1794					u32 max_num_meta_sg)
   1795{
   1796	struct mlx5_ib_dev *dev = to_mdev(pd->device);
   1797	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
   1798	int ndescs = ALIGN(max_num_sg, 4);
   1799	struct mlx5_ib_mr *mr;
   1800	u32 *in;
   1801	int err;
   1802
   1803	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
   1804	if (!mr)
   1805		return ERR_PTR(-ENOMEM);
   1806
   1807	in = kzalloc(inlen, GFP_KERNEL);
   1808	if (!in) {
   1809		err = -ENOMEM;
   1810		goto err_free;
   1811	}
   1812
   1813	mr->ibmr.device = pd->device;
   1814	mr->umem = NULL;
   1815
   1816	switch (mr_type) {
   1817	case IB_MR_TYPE_MEM_REG:
   1818		err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen);
   1819		break;
   1820	case IB_MR_TYPE_SG_GAPS:
   1821		err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen);
   1822		break;
   1823	case IB_MR_TYPE_INTEGRITY:
   1824		err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg,
   1825						 max_num_meta_sg, in, inlen);
   1826		break;
   1827	default:
   1828		mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
   1829		err = -EINVAL;
   1830	}
   1831
   1832	if (err)
   1833		goto err_free_in;
   1834
   1835	kfree(in);
   1836
   1837	return &mr->ibmr;
   1838
   1839err_free_in:
   1840	kfree(in);
   1841err_free:
   1842	kfree(mr);
   1843	return ERR_PTR(err);
   1844}
   1845
   1846struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
   1847			       u32 max_num_sg)
   1848{
   1849	return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
   1850}
   1851
   1852struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
   1853					 u32 max_num_sg, u32 max_num_meta_sg)
   1854{
   1855	return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg,
   1856				  max_num_meta_sg);
   1857}
   1858
   1859int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
   1860{
   1861	struct mlx5_ib_dev *dev = to_mdev(ibmw->device);
   1862	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
   1863	struct mlx5_ib_mw *mw = to_mmw(ibmw);
   1864	unsigned int ndescs;
   1865	u32 *in = NULL;
   1866	void *mkc;
   1867	int err;
   1868	struct mlx5_ib_alloc_mw req = {};
   1869	struct {
   1870		__u32	comp_mask;
   1871		__u32	response_length;
   1872	} resp = {};
   1873
   1874	err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
   1875	if (err)
   1876		return err;
   1877
   1878	if (req.comp_mask || req.reserved1 || req.reserved2)
   1879		return -EOPNOTSUPP;
   1880
   1881	if (udata->inlen > sizeof(req) &&
   1882	    !ib_is_udata_cleared(udata, sizeof(req),
   1883				 udata->inlen - sizeof(req)))
   1884		return -EOPNOTSUPP;
   1885
   1886	ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
   1887
   1888	in = kzalloc(inlen, GFP_KERNEL);
   1889	if (!in) {
   1890		err = -ENOMEM;
   1891		goto free;
   1892	}
   1893
   1894	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
   1895
   1896	MLX5_SET(mkc, mkc, free, 1);
   1897	MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
   1898	MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn);
   1899	MLX5_SET(mkc, mkc, umr_en, 1);
   1900	MLX5_SET(mkc, mkc, lr, 1);
   1901	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
   1902	MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2)));
   1903	MLX5_SET(mkc, mkc, qpn, 0xffffff);
   1904
   1905	err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen);
   1906	if (err)
   1907		goto free;
   1908
   1909	mw->mmkey.type = MLX5_MKEY_MW;
   1910	ibmw->rkey = mw->mmkey.key;
   1911	mw->mmkey.ndescs = ndescs;
   1912
   1913	resp.response_length =
   1914		min(offsetofend(typeof(resp), response_length), udata->outlen);
   1915	if (resp.response_length) {
   1916		err = ib_copy_to_udata(udata, &resp, resp.response_length);
   1917		if (err)
   1918			goto free_mkey;
   1919	}
   1920
   1921	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
   1922		err = mlx5r_store_odp_mkey(dev, &mw->mmkey);
   1923		if (err)
   1924			goto free_mkey;
   1925	}
   1926
   1927	kfree(in);
   1928	return 0;
   1929
   1930free_mkey:
   1931	mlx5_core_destroy_mkey(dev->mdev, mw->mmkey.key);
   1932free:
   1933	kfree(in);
   1934	return err;
   1935}
   1936
   1937int mlx5_ib_dealloc_mw(struct ib_mw *mw)
   1938{
   1939	struct mlx5_ib_dev *dev = to_mdev(mw->device);
   1940	struct mlx5_ib_mw *mmw = to_mmw(mw);
   1941
   1942	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) &&
   1943	    xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)))
   1944		/*
   1945		 * pagefault_single_data_segment() may be accessing mmw
   1946		 * if the user bound an ODP MR to this MW.
   1947		 */
   1948		mlx5r_deref_wait_odp_mkey(&mmw->mmkey);
   1949
   1950	return mlx5_core_destroy_mkey(dev->mdev, mmw->mmkey.key);
   1951}
   1952
   1953int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
   1954			    struct ib_mr_status *mr_status)
   1955{
   1956	struct mlx5_ib_mr *mmr = to_mmr(ibmr);
   1957	int ret = 0;
   1958
   1959	if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
   1960		pr_err("Invalid status check mask\n");
   1961		ret = -EINVAL;
   1962		goto done;
   1963	}
   1964
   1965	mr_status->fail_status = 0;
   1966	if (check_mask & IB_MR_CHECK_SIG_STATUS) {
   1967		if (!mmr->sig) {
   1968			ret = -EINVAL;
   1969			pr_err("signature status check requested on a non-signature enabled MR\n");
   1970			goto done;
   1971		}
   1972
   1973		mmr->sig->sig_status_checked = true;
   1974		if (!mmr->sig->sig_err_exists)
   1975			goto done;
   1976
   1977		if (ibmr->lkey == mmr->sig->err_item.key)
   1978			memcpy(&mr_status->sig_err, &mmr->sig->err_item,
   1979			       sizeof(mr_status->sig_err));
   1980		else {
   1981			mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
   1982			mr_status->sig_err.sig_err_offset = 0;
   1983			mr_status->sig_err.key = mmr->sig->err_item.key;
   1984		}
   1985
   1986		mmr->sig->sig_err_exists = false;
   1987		mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
   1988	}
   1989
   1990done:
   1991	return ret;
   1992}
   1993
   1994static int
   1995mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
   1996			int data_sg_nents, unsigned int *data_sg_offset,
   1997			struct scatterlist *meta_sg, int meta_sg_nents,
   1998			unsigned int *meta_sg_offset)
   1999{
   2000	struct mlx5_ib_mr *mr = to_mmr(ibmr);
   2001	unsigned int sg_offset = 0;
   2002	int n = 0;
   2003
   2004	mr->meta_length = 0;
   2005	if (data_sg_nents == 1) {
   2006		n++;
   2007		mr->mmkey.ndescs = 1;
   2008		if (data_sg_offset)
   2009			sg_offset = *data_sg_offset;
   2010		mr->data_length = sg_dma_len(data_sg) - sg_offset;
   2011		mr->data_iova = sg_dma_address(data_sg) + sg_offset;
   2012		if (meta_sg_nents == 1) {
   2013			n++;
   2014			mr->meta_ndescs = 1;
   2015			if (meta_sg_offset)
   2016				sg_offset = *meta_sg_offset;
   2017			else
   2018				sg_offset = 0;
   2019			mr->meta_length = sg_dma_len(meta_sg) - sg_offset;
   2020			mr->pi_iova = sg_dma_address(meta_sg) + sg_offset;
   2021		}
   2022		ibmr->length = mr->data_length + mr->meta_length;
   2023	}
   2024
   2025	return n;
   2026}
   2027
   2028static int
   2029mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
   2030		   struct scatterlist *sgl,
   2031		   unsigned short sg_nents,
   2032		   unsigned int *sg_offset_p,
   2033		   struct scatterlist *meta_sgl,
   2034		   unsigned short meta_sg_nents,
   2035		   unsigned int *meta_sg_offset_p)
   2036{
   2037	struct scatterlist *sg = sgl;
   2038	struct mlx5_klm *klms = mr->descs;
   2039	unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
   2040	u32 lkey = mr->ibmr.pd->local_dma_lkey;
   2041	int i, j = 0;
   2042
   2043	mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
   2044	mr->ibmr.length = 0;
   2045
   2046	for_each_sg(sgl, sg, sg_nents, i) {
   2047		if (unlikely(i >= mr->max_descs))
   2048			break;
   2049		klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
   2050		klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
   2051		klms[i].key = cpu_to_be32(lkey);
   2052		mr->ibmr.length += sg_dma_len(sg) - sg_offset;
   2053
   2054		sg_offset = 0;
   2055	}
   2056
   2057	if (sg_offset_p)
   2058		*sg_offset_p = sg_offset;
   2059
   2060	mr->mmkey.ndescs = i;
   2061	mr->data_length = mr->ibmr.length;
   2062
   2063	if (meta_sg_nents) {
   2064		sg = meta_sgl;
   2065		sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0;
   2066		for_each_sg(meta_sgl, sg, meta_sg_nents, j) {
   2067			if (unlikely(i + j >= mr->max_descs))
   2068				break;
   2069			klms[i + j].va = cpu_to_be64(sg_dma_address(sg) +
   2070						     sg_offset);
   2071			klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) -
   2072							 sg_offset);
   2073			klms[i + j].key = cpu_to_be32(lkey);
   2074			mr->ibmr.length += sg_dma_len(sg) - sg_offset;
   2075
   2076			sg_offset = 0;
   2077		}
   2078		if (meta_sg_offset_p)
   2079			*meta_sg_offset_p = sg_offset;
   2080
   2081		mr->meta_ndescs = j;
   2082		mr->meta_length = mr->ibmr.length - mr->data_length;
   2083	}
   2084
   2085	return i + j;
   2086}
   2087
   2088static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
   2089{
   2090	struct mlx5_ib_mr *mr = to_mmr(ibmr);
   2091	__be64 *descs;
   2092
   2093	if (unlikely(mr->mmkey.ndescs == mr->max_descs))
   2094		return -ENOMEM;
   2095
   2096	descs = mr->descs;
   2097	descs[mr->mmkey.ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
   2098
   2099	return 0;
   2100}
   2101
   2102static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr)
   2103{
   2104	struct mlx5_ib_mr *mr = to_mmr(ibmr);
   2105	__be64 *descs;
   2106
   2107	if (unlikely(mr->mmkey.ndescs + mr->meta_ndescs == mr->max_descs))
   2108		return -ENOMEM;
   2109
   2110	descs = mr->descs;
   2111	descs[mr->mmkey.ndescs + mr->meta_ndescs++] =
   2112		cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
   2113
   2114	return 0;
   2115}
   2116
   2117static int
   2118mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
   2119			 int data_sg_nents, unsigned int *data_sg_offset,
   2120			 struct scatterlist *meta_sg, int meta_sg_nents,
   2121			 unsigned int *meta_sg_offset)
   2122{
   2123	struct mlx5_ib_mr *mr = to_mmr(ibmr);
   2124	struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
   2125	int n;
   2126
   2127	pi_mr->mmkey.ndescs = 0;
   2128	pi_mr->meta_ndescs = 0;
   2129	pi_mr->meta_length = 0;
   2130
   2131	ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
   2132				   pi_mr->desc_size * pi_mr->max_descs,
   2133				   DMA_TO_DEVICE);
   2134
   2135	pi_mr->ibmr.page_size = ibmr->page_size;
   2136	n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset,
   2137			   mlx5_set_page);
   2138	if (n != data_sg_nents)
   2139		return n;
   2140
   2141	pi_mr->data_iova = pi_mr->ibmr.iova;
   2142	pi_mr->data_length = pi_mr->ibmr.length;
   2143	pi_mr->ibmr.length = pi_mr->data_length;
   2144	ibmr->length = pi_mr->data_length;
   2145
   2146	if (meta_sg_nents) {
   2147		u64 page_mask = ~((u64)ibmr->page_size - 1);
   2148		u64 iova = pi_mr->data_iova;
   2149
   2150		n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents,
   2151				    meta_sg_offset, mlx5_set_page_pi);
   2152
   2153		pi_mr->meta_length = pi_mr->ibmr.length;
   2154		/*
   2155		 * PI address for the HW is the offset of the metadata address
   2156		 * relative to the first data page address.
   2157		 * It equals to first data page address + size of data pages +
   2158		 * metadata offset at the first metadata page
   2159		 */
   2160		pi_mr->pi_iova = (iova & page_mask) +
   2161				 pi_mr->mmkey.ndescs * ibmr->page_size +
   2162				 (pi_mr->ibmr.iova & ~page_mask);
   2163		/*
   2164		 * In order to use one MTT MR for data and metadata, we register
   2165		 * also the gaps between the end of the data and the start of
   2166		 * the metadata (the sig MR will verify that the HW will access
   2167		 * to right addresses). This mapping is safe because we use
   2168		 * internal mkey for the registration.
   2169		 */
   2170		pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova;
   2171		pi_mr->ibmr.iova = iova;
   2172		ibmr->length += pi_mr->meta_length;
   2173	}
   2174
   2175	ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
   2176				      pi_mr->desc_size * pi_mr->max_descs,
   2177				      DMA_TO_DEVICE);
   2178
   2179	return n;
   2180}
   2181
   2182static int
   2183mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
   2184			 int data_sg_nents, unsigned int *data_sg_offset,
   2185			 struct scatterlist *meta_sg, int meta_sg_nents,
   2186			 unsigned int *meta_sg_offset)
   2187{
   2188	struct mlx5_ib_mr *mr = to_mmr(ibmr);
   2189	struct mlx5_ib_mr *pi_mr = mr->klm_mr;
   2190	int n;
   2191
   2192	pi_mr->mmkey.ndescs = 0;
   2193	pi_mr->meta_ndescs = 0;
   2194	pi_mr->meta_length = 0;
   2195
   2196	ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
   2197				   pi_mr->desc_size * pi_mr->max_descs,
   2198				   DMA_TO_DEVICE);
   2199
   2200	n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset,
   2201			       meta_sg, meta_sg_nents, meta_sg_offset);
   2202
   2203	ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
   2204				      pi_mr->desc_size * pi_mr->max_descs,
   2205				      DMA_TO_DEVICE);
   2206
   2207	/* This is zero-based memory region */
   2208	pi_mr->data_iova = 0;
   2209	pi_mr->ibmr.iova = 0;
   2210	pi_mr->pi_iova = pi_mr->data_length;
   2211	ibmr->length = pi_mr->ibmr.length;
   2212
   2213	return n;
   2214}
   2215
   2216int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
   2217			 int data_sg_nents, unsigned int *data_sg_offset,
   2218			 struct scatterlist *meta_sg, int meta_sg_nents,
   2219			 unsigned int *meta_sg_offset)
   2220{
   2221	struct mlx5_ib_mr *mr = to_mmr(ibmr);
   2222	struct mlx5_ib_mr *pi_mr = NULL;
   2223	int n;
   2224
   2225	WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
   2226
   2227	mr->mmkey.ndescs = 0;
   2228	mr->data_length = 0;
   2229	mr->data_iova = 0;
   2230	mr->meta_ndescs = 0;
   2231	mr->pi_iova = 0;
   2232	/*
   2233	 * As a performance optimization, if possible, there is no need to
   2234	 * perform UMR operation to register the data/metadata buffers.
   2235	 * First try to map the sg lists to PA descriptors with local_dma_lkey.
   2236	 * Fallback to UMR only in case of a failure.
   2237	 */
   2238	n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents,
   2239				    data_sg_offset, meta_sg, meta_sg_nents,
   2240				    meta_sg_offset);
   2241	if (n == data_sg_nents + meta_sg_nents)
   2242		goto out;
   2243	/*
   2244	 * As a performance optimization, if possible, there is no need to map
   2245	 * the sg lists to KLM descriptors. First try to map the sg lists to MTT
   2246	 * descriptors and fallback to KLM only in case of a failure.
   2247	 * It's more efficient for the HW to work with MTT descriptors
   2248	 * (especially in high load).
   2249	 * Use KLM (indirect access) only if it's mandatory.
   2250	 */
   2251	pi_mr = mr->mtt_mr;
   2252	n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents,
   2253				     data_sg_offset, meta_sg, meta_sg_nents,
   2254				     meta_sg_offset);
   2255	if (n == data_sg_nents + meta_sg_nents)
   2256		goto out;
   2257
   2258	pi_mr = mr->klm_mr;
   2259	n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents,
   2260				     data_sg_offset, meta_sg, meta_sg_nents,
   2261				     meta_sg_offset);
   2262	if (unlikely(n != data_sg_nents + meta_sg_nents))
   2263		return -ENOMEM;
   2264
   2265out:
   2266	/* This is zero-based memory region */
   2267	ibmr->iova = 0;
   2268	mr->pi_mr = pi_mr;
   2269	if (pi_mr)
   2270		ibmr->sig_attrs->meta_length = pi_mr->meta_length;
   2271	else
   2272		ibmr->sig_attrs->meta_length = mr->meta_length;
   2273
   2274	return 0;
   2275}
   2276
   2277int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
   2278		      unsigned int *sg_offset)
   2279{
   2280	struct mlx5_ib_mr *mr = to_mmr(ibmr);
   2281	int n;
   2282
   2283	mr->mmkey.ndescs = 0;
   2284
   2285	ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
   2286				   mr->desc_size * mr->max_descs,
   2287				   DMA_TO_DEVICE);
   2288
   2289	if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
   2290		n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0,
   2291				       NULL);
   2292	else
   2293		n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
   2294				mlx5_set_page);
   2295
   2296	ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
   2297				      mr->desc_size * mr->max_descs,
   2298				      DMA_TO_DEVICE);
   2299
   2300	return n;
   2301}