cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

umr.c (18811B)


      1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
      2/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
      3
      4#include <rdma/ib_umem_odp.h>
      5#include "mlx5_ib.h"
      6#include "umr.h"
      7#include "wr.h"
      8
      9/*
     10 * We can't use an array for xlt_emergency_page because dma_map_single doesn't
     11 * work on kernel modules memory
     12 */
     13void *xlt_emergency_page;
     14static DEFINE_MUTEX(xlt_emergency_page_mutex);
     15
     16static __be64 get_umr_enable_mr_mask(void)
     17{
     18	u64 result;
     19
     20	result = MLX5_MKEY_MASK_KEY |
     21		 MLX5_MKEY_MASK_FREE;
     22
     23	return cpu_to_be64(result);
     24}
     25
     26static __be64 get_umr_disable_mr_mask(void)
     27{
     28	u64 result;
     29
     30	result = MLX5_MKEY_MASK_FREE;
     31
     32	return cpu_to_be64(result);
     33}
     34
     35static __be64 get_umr_update_translation_mask(void)
     36{
     37	u64 result;
     38
     39	result = MLX5_MKEY_MASK_LEN |
     40		 MLX5_MKEY_MASK_PAGE_SIZE |
     41		 MLX5_MKEY_MASK_START_ADDR;
     42
     43	return cpu_to_be64(result);
     44}
     45
     46static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev)
     47{
     48	u64 result;
     49
     50	result = MLX5_MKEY_MASK_LR |
     51		 MLX5_MKEY_MASK_LW |
     52		 MLX5_MKEY_MASK_RR |
     53		 MLX5_MKEY_MASK_RW;
     54
     55	if (MLX5_CAP_GEN(dev->mdev, atomic))
     56		result |= MLX5_MKEY_MASK_A;
     57
     58	if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
     59		result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
     60
     61	if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
     62		result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
     63
     64	return cpu_to_be64(result);
     65}
     66
     67static __be64 get_umr_update_pd_mask(void)
     68{
     69	u64 result;
     70
     71	result = MLX5_MKEY_MASK_PD;
     72
     73	return cpu_to_be64(result);
     74}
     75
     76static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
     77{
     78	if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
     79	    MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
     80		return -EPERM;
     81
     82	if (mask & MLX5_MKEY_MASK_A &&
     83	    MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
     84		return -EPERM;
     85
     86	if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
     87	    !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
     88		return -EPERM;
     89
     90	if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
     91	    !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
     92		return -EPERM;
     93
     94	return 0;
     95}
     96
     97enum {
     98	MAX_UMR_WR = 128,
     99};
    100
    101static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp)
    102{
    103	struct ib_qp_attr attr = {};
    104	int ret;
    105
    106	attr.qp_state = IB_QPS_INIT;
    107	attr.port_num = 1;
    108	ret = ib_modify_qp(qp, &attr,
    109			   IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT);
    110	if (ret) {
    111		mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
    112		return ret;
    113	}
    114
    115	memset(&attr, 0, sizeof(attr));
    116	attr.qp_state = IB_QPS_RTR;
    117
    118	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
    119	if (ret) {
    120		mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
    121		return ret;
    122	}
    123
    124	memset(&attr, 0, sizeof(attr));
    125	attr.qp_state = IB_QPS_RTS;
    126	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
    127	if (ret) {
    128		mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
    129		return ret;
    130	}
    131
    132	return 0;
    133}
    134
    135int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
    136{
    137	struct ib_qp_init_attr init_attr = {};
    138	struct ib_pd *pd;
    139	struct ib_cq *cq;
    140	struct ib_qp *qp;
    141	int ret;
    142
    143	pd = ib_alloc_pd(&dev->ib_dev, 0);
    144	if (IS_ERR(pd)) {
    145		mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
    146		return PTR_ERR(pd);
    147	}
    148
    149	cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
    150	if (IS_ERR(cq)) {
    151		mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
    152		ret = PTR_ERR(cq);
    153		goto destroy_pd;
    154	}
    155
    156	init_attr.send_cq = cq;
    157	init_attr.recv_cq = cq;
    158	init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
    159	init_attr.cap.max_send_wr = MAX_UMR_WR;
    160	init_attr.cap.max_send_sge = 1;
    161	init_attr.qp_type = MLX5_IB_QPT_REG_UMR;
    162	init_attr.port_num = 1;
    163	qp = ib_create_qp(pd, &init_attr);
    164	if (IS_ERR(qp)) {
    165		mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
    166		ret = PTR_ERR(qp);
    167		goto destroy_cq;
    168	}
    169
    170	ret = mlx5r_umr_qp_rst2rts(dev, qp);
    171	if (ret)
    172		goto destroy_qp;
    173
    174	dev->umrc.qp = qp;
    175	dev->umrc.cq = cq;
    176	dev->umrc.pd = pd;
    177
    178	sema_init(&dev->umrc.sem, MAX_UMR_WR);
    179
    180	return 0;
    181
    182destroy_qp:
    183	ib_destroy_qp(qp);
    184destroy_cq:
    185	ib_free_cq(cq);
    186destroy_pd:
    187	ib_dealloc_pd(pd);
    188	return ret;
    189}
    190
    191void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
    192{
    193	ib_destroy_qp(dev->umrc.qp);
    194	ib_free_cq(dev->umrc.cq);
    195	ib_dealloc_pd(dev->umrc.pd);
    196}
    197
    198static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
    199			       struct mlx5r_umr_wqe *wqe, bool with_data)
    200{
    201	unsigned int wqe_size =
    202		with_data ? sizeof(struct mlx5r_umr_wqe) :
    203			    sizeof(struct mlx5r_umr_wqe) -
    204				    sizeof(struct mlx5_wqe_data_seg);
    205	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
    206	struct mlx5_core_dev *mdev = dev->mdev;
    207	struct mlx5_ib_qp *qp = to_mqp(ibqp);
    208	struct mlx5_wqe_ctrl_seg *ctrl;
    209	union {
    210		struct ib_cqe *ib_cqe;
    211		u64 wr_id;
    212	} id;
    213	void *cur_edge, *seg;
    214	unsigned long flags;
    215	unsigned int idx;
    216	int size, err;
    217
    218	if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
    219		return -EIO;
    220
    221	spin_lock_irqsave(&qp->sq.lock, flags);
    222
    223	err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0,
    224			      cpu_to_be32(mkey), false, false);
    225	if (WARN_ON(err))
    226		goto out;
    227
    228	qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
    229
    230	mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size);
    231
    232	id.ib_cqe = cqe;
    233	mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0,
    234			 MLX5_FENCE_MODE_NONE, MLX5_OPCODE_UMR);
    235
    236	mlx5r_ring_db(qp, 1, ctrl);
    237
    238out:
    239	spin_unlock_irqrestore(&qp->sq.lock, flags);
    240
    241	return err;
    242}
    243
    244static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc)
    245{
    246	struct mlx5_ib_umr_context *context =
    247		container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
    248
    249	context->status = wc->status;
    250	complete(&context->done);
    251}
    252
    253static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context)
    254{
    255	context->cqe.done = mlx5r_umr_done;
    256	init_completion(&context->done);
    257}
    258
    259static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
    260				   struct mlx5r_umr_wqe *wqe, bool with_data)
    261{
    262	struct umr_common *umrc = &dev->umrc;
    263	struct mlx5r_umr_context umr_context;
    264	int err;
    265
    266	err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask));
    267	if (WARN_ON(err))
    268		return err;
    269
    270	mlx5r_umr_init_context(&umr_context);
    271
    272	down(&umrc->sem);
    273	err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
    274				  with_data);
    275	if (err)
    276		mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
    277	else {
    278		wait_for_completion(&umr_context.done);
    279		if (umr_context.status != IB_WC_SUCCESS) {
    280			mlx5_ib_warn(dev, "reg umr failed (%u)\n",
    281				     umr_context.status);
    282			err = -EFAULT;
    283		}
    284	}
    285	up(&umrc->sem);
    286	return err;
    287}
    288
    289/**
    290 * mlx5r_umr_revoke_mr - Fence all DMA on the MR
    291 * @mr: The MR to fence
    292 *
    293 * Upon return the NIC will not be doing any DMA to the pages under the MR,
    294 * and any DMA in progress will be completed. Failure of this function
    295 * indicates the HW has failed catastrophically.
    296 */
    297int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr)
    298{
    299	struct mlx5_ib_dev *dev = mr_to_mdev(mr);
    300	struct mlx5r_umr_wqe wqe = {};
    301
    302	if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
    303		return 0;
    304
    305	wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
    306	wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask();
    307	wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
    308
    309	MLX5_SET(mkc, &wqe.mkey_seg, free, 1);
    310	MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn);
    311	MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
    312	MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
    313		 mlx5_mkey_variant(mr->mmkey.key));
    314
    315	return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
    316}
    317
    318static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev,
    319				       struct mlx5_mkey_seg *seg,
    320				       unsigned int access_flags)
    321{
    322	MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
    323	MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
    324	MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
    325	MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
    326	MLX5_SET(mkc, seg, lr, 1);
    327	MLX5_SET(mkc, seg, relaxed_ordering_write,
    328		 !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
    329	MLX5_SET(mkc, seg, relaxed_ordering_read,
    330		 !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
    331}
    332
    333int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
    334			      int access_flags)
    335{
    336	struct mlx5_ib_dev *dev = mr_to_mdev(mr);
    337	struct mlx5r_umr_wqe wqe = {};
    338	int err;
    339
    340	wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev);
    341	wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
    342	wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE;
    343	wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
    344
    345	mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags);
    346	MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn);
    347	MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
    348	MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
    349		 mlx5_mkey_variant(mr->mmkey.key));
    350
    351	err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
    352	if (err)
    353		return err;
    354
    355	mr->access_flags = access_flags;
    356	return 0;
    357}
    358
    359#define MLX5_MAX_UMR_CHUNK                                                     \
    360	((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_MTT_ALIGNMENT)
    361#define MLX5_SPARE_UMR_CHUNK 0x10000
    362
    363/*
    364 * Allocate a temporary buffer to hold the per-page information to transfer to
    365 * HW. For efficiency this should be as large as it can be, but buffer
    366 * allocation failure is not allowed, so try smaller sizes.
    367 */
    368static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
    369{
    370	const size_t xlt_chunk_align = MLX5_UMR_MTT_ALIGNMENT / ent_size;
    371	size_t size;
    372	void *res = NULL;
    373
    374	static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
    375
    376	/*
    377	 * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
    378	 * allocation can't trigger any kind of reclaim.
    379	 */
    380	might_sleep();
    381
    382	gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
    383
    384	/*
    385	 * If the system already has a suitable high order page then just use
    386	 * that, but don't try hard to create one. This max is about 1M, so a
    387	 * free x86 huge page will satisfy it.
    388	 */
    389	size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
    390		     MLX5_MAX_UMR_CHUNK);
    391	*nents = size / ent_size;
    392	res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
    393				       get_order(size));
    394	if (res)
    395		return res;
    396
    397	if (size > MLX5_SPARE_UMR_CHUNK) {
    398		size = MLX5_SPARE_UMR_CHUNK;
    399		*nents = size / ent_size;
    400		res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
    401					       get_order(size));
    402		if (res)
    403			return res;
    404	}
    405
    406	*nents = PAGE_SIZE / ent_size;
    407	res = (void *)__get_free_page(gfp_mask);
    408	if (res)
    409		return res;
    410
    411	mutex_lock(&xlt_emergency_page_mutex);
    412	memset(xlt_emergency_page, 0, PAGE_SIZE);
    413	return xlt_emergency_page;
    414}
    415
    416static void mlx5r_umr_free_xlt(void *xlt, size_t length)
    417{
    418	if (xlt == xlt_emergency_page) {
    419		mutex_unlock(&xlt_emergency_page_mutex);
    420		return;
    421	}
    422
    423	free_pages((unsigned long)xlt, get_order(length));
    424}
    425
    426static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
    427				     struct ib_sge *sg)
    428{
    429	struct device *ddev = &dev->mdev->pdev->dev;
    430
    431	dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
    432	mlx5r_umr_free_xlt(xlt, sg->length);
    433}
    434
    435/*
    436 * Create an XLT buffer ready for submission.
    437 */
    438static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
    439				  size_t nents, size_t ent_size,
    440				  unsigned int flags)
    441{
    442	struct device *ddev = &dev->mdev->pdev->dev;
    443	dma_addr_t dma;
    444	void *xlt;
    445
    446	xlt = mlx5r_umr_alloc_xlt(&nents, ent_size,
    447				 flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
    448								  GFP_KERNEL);
    449	sg->length = nents * ent_size;
    450	dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
    451	if (dma_mapping_error(ddev, dma)) {
    452		mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
    453		mlx5r_umr_free_xlt(xlt, sg->length);
    454		return NULL;
    455	}
    456	sg->addr = dma;
    457	sg->lkey = dev->umrc.pd->local_dma_lkey;
    458
    459	return xlt;
    460}
    461
    462static void
    463mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
    464				  unsigned int flags, struct ib_sge *sg)
    465{
    466	if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
    467		/* fail if free */
    468		ctrl_seg->flags = MLX5_UMR_CHECK_FREE;
    469	else
    470		/* fail if not free */
    471		ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE;
    472	ctrl_seg->xlt_octowords =
    473		cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
    474}
    475
    476static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev,
    477					      struct mlx5_mkey_seg *mkey_seg,
    478					      struct mlx5_ib_mr *mr,
    479					      unsigned int page_shift)
    480{
    481	mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags);
    482	MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn);
    483	MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova);
    484	MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length);
    485	MLX5_SET(mkc, mkey_seg, log_page_size, page_shift);
    486	MLX5_SET(mkc, mkey_seg, qpn, 0xffffff);
    487	MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key));
    488}
    489
    490static void
    491mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg,
    492				  struct ib_sge *sg)
    493{
    494	data_seg->byte_count = cpu_to_be32(sg->length);
    495	data_seg->lkey = cpu_to_be32(sg->lkey);
    496	data_seg->addr = cpu_to_be64(sg->addr);
    497}
    498
    499static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
    500				    u64 offset)
    501{
    502	u64 octo_offset = mlx5r_umr_get_xlt_octo(offset);
    503
    504	ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff);
    505	ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16);
    506	ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
    507}
    508
    509static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
    510				       struct mlx5r_umr_wqe *wqe,
    511				       struct mlx5_ib_mr *mr, struct ib_sge *sg,
    512				       unsigned int flags)
    513{
    514	bool update_pd_access, update_translation;
    515
    516	if (flags & MLX5_IB_UPD_XLT_ENABLE)
    517		wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask();
    518
    519	update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE ||
    520			   flags & MLX5_IB_UPD_XLT_PD ||
    521			   flags & MLX5_IB_UPD_XLT_ACCESS;
    522
    523	if (update_pd_access) {
    524		wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev);
    525		wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
    526	}
    527
    528	update_translation =
    529		flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR;
    530
    531	if (update_translation) {
    532		wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask();
    533		if (!mr->ibmr.length)
    534			MLX5_SET(mkc, &wqe->mkey_seg, length64, 1);
    535	}
    536
    537	wqe->ctrl_seg.xlt_octowords =
    538		cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
    539	wqe->data_seg.byte_count = cpu_to_be32(sg->length);
    540}
    541
    542/*
    543 * Send the DMA list to the HW for a normal MR using UMR.
    544 * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
    545 * flag may be used.
    546 */
    547int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
    548{
    549	struct mlx5_ib_dev *dev = mr_to_mdev(mr);
    550	struct device *ddev = &dev->mdev->pdev->dev;
    551	struct mlx5r_umr_wqe wqe = {};
    552	struct ib_block_iter biter;
    553	struct mlx5_mtt *cur_mtt;
    554	size_t orig_sg_length;
    555	struct mlx5_mtt *mtt;
    556	size_t final_size;
    557	struct ib_sge sg;
    558	u64 offset = 0;
    559	int err = 0;
    560
    561	if (WARN_ON(mr->umem->is_odp))
    562		return -EINVAL;
    563
    564	mtt = mlx5r_umr_create_xlt(
    565		dev, &sg, ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
    566		sizeof(*mtt), flags);
    567	if (!mtt)
    568		return -ENOMEM;
    569
    570	orig_sg_length = sg.length;
    571
    572	mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
    573	mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr,
    574					  mr->page_shift);
    575	mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
    576
    577	cur_mtt = mtt;
    578	rdma_for_each_block(mr->umem->sgt_append.sgt.sgl, &biter,
    579			    mr->umem->sgt_append.sgt.nents,
    580			    BIT(mr->page_shift)) {
    581		if (cur_mtt == (void *)mtt + sg.length) {
    582			dma_sync_single_for_device(ddev, sg.addr, sg.length,
    583						   DMA_TO_DEVICE);
    584
    585			err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe,
    586						       true);
    587			if (err)
    588				goto err;
    589			dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
    590						DMA_TO_DEVICE);
    591			offset += sg.length;
    592			mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
    593
    594			cur_mtt = mtt;
    595		}
    596
    597		cur_mtt->ptag =
    598			cpu_to_be64(rdma_block_iter_dma_address(&biter) |
    599				    MLX5_IB_MTT_PRESENT);
    600
    601		if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
    602			cur_mtt->ptag = 0;
    603
    604		cur_mtt++;
    605	}
    606
    607	final_size = (void *)cur_mtt - (void *)mtt;
    608	sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
    609	memset(cur_mtt, 0, sg.length - final_size);
    610	mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
    611
    612	dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
    613	err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
    614
    615err:
    616	sg.length = orig_sg_length;
    617	mlx5r_umr_unmap_free_xlt(dev, mtt, &sg);
    618	return err;
    619}
    620
    621static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
    622{
    623	return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
    624}
    625
    626int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
    627			 int page_shift, int flags)
    628{
    629	int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
    630			       ? sizeof(struct mlx5_klm)
    631			       : sizeof(struct mlx5_mtt);
    632	const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
    633	struct mlx5_ib_dev *dev = mr_to_mdev(mr);
    634	struct device *ddev = &dev->mdev->pdev->dev;
    635	const int page_mask = page_align - 1;
    636	struct mlx5r_umr_wqe wqe = {};
    637	size_t pages_mapped = 0;
    638	size_t pages_to_map = 0;
    639	size_t size_to_map = 0;
    640	size_t orig_sg_length;
    641	size_t pages_iter;
    642	struct ib_sge sg;
    643	int err = 0;
    644	void *xlt;
    645
    646	if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
    647	    !umr_can_use_indirect_mkey(dev))
    648		return -EPERM;
    649
    650	if (WARN_ON(!mr->umem->is_odp))
    651		return -EINVAL;
    652
    653	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
    654	 * so we need to align the offset and length accordingly
    655	 */
    656	if (idx & page_mask) {
    657		npages += idx & page_mask;
    658		idx &= ~page_mask;
    659	}
    660	pages_to_map = ALIGN(npages, page_align);
    661
    662	xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags);
    663	if (!xlt)
    664		return -ENOMEM;
    665
    666	pages_iter = sg.length / desc_size;
    667	orig_sg_length = sg.length;
    668
    669	if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
    670		struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
    671		size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
    672
    673		pages_to_map = min_t(size_t, pages_to_map, max_pages);
    674	}
    675
    676	mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
    677	mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift);
    678	mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
    679
    680	for (pages_mapped = 0;
    681	     pages_mapped < pages_to_map && !err;
    682	     pages_mapped += pages_iter, idx += pages_iter) {
    683		npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
    684		size_to_map = npages * desc_size;
    685		dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
    686					DMA_TO_DEVICE);
    687		mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
    688		dma_sync_single_for_device(ddev, sg.addr, sg.length,
    689					   DMA_TO_DEVICE);
    690		sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
    691
    692		if (pages_mapped + pages_iter >= pages_to_map)
    693			mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
    694		mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size);
    695		err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
    696	}
    697	sg.length = orig_sg_length;
    698	mlx5r_umr_unmap_free_xlt(dev, xlt, &sg);
    699	return err;
    700}