cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

hns_roce_mr.c (25585B)


      1/*
      2 * Copyright (c) 2016 Hisilicon Limited.
      3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
      4 *
      5 * This software is available to you under a choice of one of two
      6 * licenses.  You may choose to be licensed under the terms of the GNU
      7 * General Public License (GPL) Version 2, available from the file
      8 * COPYING in the main directory of this source tree, or the
      9 * OpenIB.org BSD license below:
     10 *
     11 *     Redistribution and use in source and binary forms, with or
     12 *     without modification, are permitted provided that the following
     13 *     conditions are met:
     14 *
     15 *      - Redistributions of source code must retain the above
     16 *        copyright notice, this list of conditions and the following
     17 *        disclaimer.
     18 *
     19 *      - Redistributions in binary form must reproduce the above
     20 *        copyright notice, this list of conditions and the following
     21 *        disclaimer in the documentation and/or other materials
     22 *        provided with the distribution.
     23 *
     24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     31 * SOFTWARE.
     32 */
     33
     34#include <linux/vmalloc.h>
     35#include <rdma/ib_umem.h>
     36#include "hns_roce_device.h"
     37#include "hns_roce_cmd.h"
     38#include "hns_roce_hem.h"
     39
     40static u32 hw_index_to_key(int ind)
     41{
     42	return ((u32)ind >> 24) | ((u32)ind << 8);
     43}
     44
     45unsigned long key_to_hw_index(u32 key)
     46{
     47	return (key << 24) | (key >> 8);
     48}
     49
     50static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
     51{
     52	struct hns_roce_ida *mtpt_ida = &hr_dev->mr_table.mtpt_ida;
     53	struct ib_device *ibdev = &hr_dev->ib_dev;
     54	int err;
     55	int id;
     56
     57	/* Allocate a key for mr from mr_table */
     58	id = ida_alloc_range(&mtpt_ida->ida, mtpt_ida->min, mtpt_ida->max,
     59			     GFP_KERNEL);
     60	if (id < 0) {
     61		ibdev_err(ibdev, "failed to alloc id for MR key, id(%d)\n", id);
     62		return -ENOMEM;
     63	}
     64
     65	mr->key = hw_index_to_key(id); /* MR key */
     66
     67	err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table,
     68				 (unsigned long)id);
     69	if (err) {
     70		ibdev_err(ibdev, "failed to alloc mtpt, ret = %d.\n", err);
     71		goto err_free_bitmap;
     72	}
     73
     74	return 0;
     75err_free_bitmap:
     76	ida_free(&mtpt_ida->ida, id);
     77	return err;
     78}
     79
     80static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
     81{
     82	unsigned long obj = key_to_hw_index(mr->key);
     83
     84	hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, obj);
     85	ida_free(&hr_dev->mr_table.mtpt_ida.ida, (int)obj);
     86}
     87
     88static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
     89			struct ib_udata *udata, u64 start)
     90{
     91	struct ib_device *ibdev = &hr_dev->ib_dev;
     92	bool is_fast = mr->type == MR_TYPE_FRMR;
     93	struct hns_roce_buf_attr buf_attr = {};
     94	int err;
     95
     96	mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num;
     97	buf_attr.page_shift = is_fast ? PAGE_SHIFT :
     98			      hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT;
     99	buf_attr.region[0].size = mr->size;
    100	buf_attr.region[0].hopnum = mr->pbl_hop_num;
    101	buf_attr.region_count = 1;
    102	buf_attr.user_access = mr->access;
    103	/* fast MR's buffer is alloced before mapping, not at creation */
    104	buf_attr.mtt_only = is_fast;
    105
    106	err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr,
    107				  hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT,
    108				  udata, start);
    109	if (err)
    110		ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err);
    111	else
    112		mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count;
    113
    114	return err;
    115}
    116
    117static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
    118{
    119	hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr);
    120}
    121
    122static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
    123{
    124	struct ib_device *ibdev = &hr_dev->ib_dev;
    125	int ret;
    126
    127	if (mr->enabled) {
    128		ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
    129					      key_to_hw_index(mr->key) &
    130					      (hr_dev->caps.num_mtpts - 1));
    131		if (ret)
    132			ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n",
    133				   ret);
    134	}
    135
    136	free_mr_pbl(hr_dev, mr);
    137	free_mr_key(hr_dev, mr);
    138}
    139
    140static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
    141			      struct hns_roce_mr *mr)
    142{
    143	unsigned long mtpt_idx = key_to_hw_index(mr->key);
    144	struct hns_roce_cmd_mailbox *mailbox;
    145	struct device *dev = hr_dev->dev;
    146	int ret;
    147
    148	/* Allocate mailbox memory */
    149	mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
    150	if (IS_ERR(mailbox))
    151		return PTR_ERR(mailbox);
    152
    153	if (mr->type != MR_TYPE_FRMR)
    154		ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr);
    155	else
    156		ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr);
    157	if (ret) {
    158		dev_err(dev, "failed to write mtpt, ret = %d.\n", ret);
    159		goto err_page;
    160	}
    161
    162	ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
    163				     mtpt_idx & (hr_dev->caps.num_mtpts - 1));
    164	if (ret) {
    165		dev_err(dev, "failed to create mpt, ret = %d.\n", ret);
    166		goto err_page;
    167	}
    168
    169	mr->enabled = 1;
    170
    171err_page:
    172	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
    173
    174	return ret;
    175}
    176
    177void hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
    178{
    179	struct hns_roce_ida *mtpt_ida = &hr_dev->mr_table.mtpt_ida;
    180
    181	ida_init(&mtpt_ida->ida);
    182	mtpt_ida->max = hr_dev->caps.num_mtpts - 1;
    183	mtpt_ida->min = hr_dev->caps.reserved_mrws;
    184}
    185
    186struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
    187{
    188	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
    189	struct hns_roce_mr *mr;
    190	int ret;
    191
    192	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
    193	if (mr == NULL)
    194		return  ERR_PTR(-ENOMEM);
    195
    196	mr->type = MR_TYPE_DMA;
    197	mr->pd = to_hr_pd(pd)->pdn;
    198	mr->access = acc;
    199
    200	/* Allocate memory region key */
    201	hns_roce_hem_list_init(&mr->pbl_mtr.hem_list);
    202	ret = alloc_mr_key(hr_dev, mr);
    203	if (ret)
    204		goto err_free;
    205
    206	ret = hns_roce_mr_enable(hr_dev, mr);
    207	if (ret)
    208		goto err_mr;
    209
    210	mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
    211
    212	return &mr->ibmr;
    213err_mr:
    214	free_mr_key(hr_dev, mr);
    215
    216err_free:
    217	kfree(mr);
    218	return ERR_PTR(ret);
    219}
    220
    221struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
    222				   u64 virt_addr, int access_flags,
    223				   struct ib_udata *udata)
    224{
    225	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
    226	struct hns_roce_mr *mr;
    227	int ret;
    228
    229	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
    230	if (!mr)
    231		return ERR_PTR(-ENOMEM);
    232
    233	mr->iova = virt_addr;
    234	mr->size = length;
    235	mr->pd = to_hr_pd(pd)->pdn;
    236	mr->access = access_flags;
    237	mr->type = MR_TYPE_MR;
    238
    239	ret = alloc_mr_key(hr_dev, mr);
    240	if (ret)
    241		goto err_alloc_mr;
    242
    243	ret = alloc_mr_pbl(hr_dev, mr, udata, start);
    244	if (ret)
    245		goto err_alloc_key;
    246
    247	ret = hns_roce_mr_enable(hr_dev, mr);
    248	if (ret)
    249		goto err_alloc_pbl;
    250
    251	mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
    252	mr->ibmr.length = length;
    253
    254	return &mr->ibmr;
    255
    256err_alloc_pbl:
    257	free_mr_pbl(hr_dev, mr);
    258err_alloc_key:
    259	free_mr_key(hr_dev, mr);
    260err_alloc_mr:
    261	kfree(mr);
    262	return ERR_PTR(ret);
    263}
    264
    265struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
    266				     u64 length, u64 virt_addr,
    267				     int mr_access_flags, struct ib_pd *pd,
    268				     struct ib_udata *udata)
    269{
    270	struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
    271	struct ib_device *ib_dev = &hr_dev->ib_dev;
    272	struct hns_roce_mr *mr = to_hr_mr(ibmr);
    273	struct hns_roce_cmd_mailbox *mailbox;
    274	unsigned long mtpt_idx;
    275	int ret;
    276
    277	if (!mr->enabled)
    278		return ERR_PTR(-EINVAL);
    279
    280	mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
    281	if (IS_ERR(mailbox))
    282		return ERR_CAST(mailbox);
    283
    284	mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1);
    285
    286	ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_MPT,
    287				mtpt_idx);
    288	if (ret)
    289		goto free_cmd_mbox;
    290
    291	ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
    292				      mtpt_idx);
    293	if (ret)
    294		ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret);
    295
    296	mr->enabled = 0;
    297	mr->iova = virt_addr;
    298	mr->size = length;
    299
    300	if (flags & IB_MR_REREG_PD)
    301		mr->pd = to_hr_pd(pd)->pdn;
    302
    303	if (flags & IB_MR_REREG_ACCESS)
    304		mr->access = mr_access_flags;
    305
    306	if (flags & IB_MR_REREG_TRANS) {
    307		free_mr_pbl(hr_dev, mr);
    308		ret = alloc_mr_pbl(hr_dev, mr, udata, start);
    309		if (ret) {
    310			ibdev_err(ib_dev, "failed to alloc mr PBL, ret = %d.\n",
    311				  ret);
    312			goto free_cmd_mbox;
    313		}
    314	}
    315
    316	ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, mailbox->buf);
    317	if (ret) {
    318		ibdev_err(ib_dev, "failed to write mtpt, ret = %d.\n", ret);
    319		goto free_cmd_mbox;
    320	}
    321
    322	ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
    323				     mtpt_idx);
    324	if (ret) {
    325		ibdev_err(ib_dev, "failed to create MPT, ret = %d.\n", ret);
    326		goto free_cmd_mbox;
    327	}
    328
    329	mr->enabled = 1;
    330
    331free_cmd_mbox:
    332	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
    333
    334	if (ret)
    335		return ERR_PTR(ret);
    336	return NULL;
    337}
    338
    339int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
    340{
    341	struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
    342	struct hns_roce_mr *mr = to_hr_mr(ibmr);
    343
    344	if (hr_dev->hw->dereg_mr)
    345		hr_dev->hw->dereg_mr(hr_dev);
    346
    347	hns_roce_mr_free(hr_dev, mr);
    348	kfree(mr);
    349
    350	return 0;
    351}
    352
    353struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
    354				u32 max_num_sg)
    355{
    356	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
    357	struct device *dev = hr_dev->dev;
    358	struct hns_roce_mr *mr;
    359	int ret;
    360
    361	if (mr_type != IB_MR_TYPE_MEM_REG)
    362		return ERR_PTR(-EINVAL);
    363
    364	if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
    365		dev_err(dev, "max_num_sg larger than %d\n",
    366			HNS_ROCE_FRMR_MAX_PA);
    367		return ERR_PTR(-EINVAL);
    368	}
    369
    370	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
    371	if (!mr)
    372		return ERR_PTR(-ENOMEM);
    373
    374	mr->type = MR_TYPE_FRMR;
    375	mr->pd = to_hr_pd(pd)->pdn;
    376	mr->size = max_num_sg * (1 << PAGE_SHIFT);
    377
    378	/* Allocate memory region key */
    379	ret = alloc_mr_key(hr_dev, mr);
    380	if (ret)
    381		goto err_free;
    382
    383	ret = alloc_mr_pbl(hr_dev, mr, NULL, 0);
    384	if (ret)
    385		goto err_key;
    386
    387	ret = hns_roce_mr_enable(hr_dev, mr);
    388	if (ret)
    389		goto err_pbl;
    390
    391	mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
    392	mr->ibmr.length = mr->size;
    393
    394	return &mr->ibmr;
    395
    396err_key:
    397	free_mr_key(hr_dev, mr);
    398err_pbl:
    399	free_mr_pbl(hr_dev, mr);
    400err_free:
    401	kfree(mr);
    402	return ERR_PTR(ret);
    403}
    404
    405static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
    406{
    407	struct hns_roce_mr *mr = to_hr_mr(ibmr);
    408
    409	if (likely(mr->npages < mr->pbl_mtr.hem_cfg.buf_pg_count)) {
    410		mr->page_list[mr->npages++] = addr;
    411		return 0;
    412	}
    413
    414	return -ENOBUFS;
    415}
    416
    417int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
    418		       unsigned int *sg_offset)
    419{
    420	struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
    421	struct ib_device *ibdev = &hr_dev->ib_dev;
    422	struct hns_roce_mr *mr = to_hr_mr(ibmr);
    423	struct hns_roce_mtr *mtr = &mr->pbl_mtr;
    424	int ret = 0;
    425
    426	mr->npages = 0;
    427	mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count,
    428				 sizeof(dma_addr_t), GFP_KERNEL);
    429	if (!mr->page_list)
    430		return ret;
    431
    432	ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
    433	if (ret < 1) {
    434		ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n",
    435			  mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret);
    436		goto err_page_list;
    437	}
    438
    439	mtr->hem_cfg.region[0].offset = 0;
    440	mtr->hem_cfg.region[0].count = mr->npages;
    441	mtr->hem_cfg.region[0].hopnum = mr->pbl_hop_num;
    442	mtr->hem_cfg.region_count = 1;
    443	ret = hns_roce_mtr_map(hr_dev, mtr, mr->page_list, mr->npages);
    444	if (ret) {
    445		ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret);
    446		ret = 0;
    447	} else {
    448		mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size);
    449		ret = mr->npages;
    450	}
    451
    452err_page_list:
    453	kvfree(mr->page_list);
    454	mr->page_list = NULL;
    455
    456	return ret;
    457}
    458
    459static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
    460			     struct hns_roce_mw *mw)
    461{
    462	struct device *dev = hr_dev->dev;
    463	int ret;
    464
    465	if (mw->enabled) {
    466		ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
    467					      key_to_hw_index(mw->rkey) &
    468					      (hr_dev->caps.num_mtpts - 1));
    469		if (ret)
    470			dev_warn(dev, "MW DESTROY_MPT failed (%d)\n", ret);
    471
    472		hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
    473				   key_to_hw_index(mw->rkey));
    474	}
    475
    476	ida_free(&hr_dev->mr_table.mtpt_ida.ida,
    477		 (int)key_to_hw_index(mw->rkey));
    478}
    479
    480static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
    481			      struct hns_roce_mw *mw)
    482{
    483	struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
    484	struct hns_roce_cmd_mailbox *mailbox;
    485	struct device *dev = hr_dev->dev;
    486	unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
    487	int ret;
    488
    489	/* prepare HEM entry memory */
    490	ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
    491	if (ret)
    492		return ret;
    493
    494	mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
    495	if (IS_ERR(mailbox)) {
    496		ret = PTR_ERR(mailbox);
    497		goto err_table;
    498	}
    499
    500	ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
    501	if (ret) {
    502		dev_err(dev, "MW write mtpt fail!\n");
    503		goto err_page;
    504	}
    505
    506	ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
    507				     mtpt_idx & (hr_dev->caps.num_mtpts - 1));
    508	if (ret) {
    509		dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret);
    510		goto err_page;
    511	}
    512
    513	mw->enabled = 1;
    514
    515	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
    516
    517	return 0;
    518
    519err_page:
    520	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
    521
    522err_table:
    523	hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
    524
    525	return ret;
    526}
    527
    528int hns_roce_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
    529{
    530	struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
    531	struct hns_roce_ida *mtpt_ida = &hr_dev->mr_table.mtpt_ida;
    532	struct ib_device *ibdev = &hr_dev->ib_dev;
    533	struct hns_roce_mw *mw = to_hr_mw(ibmw);
    534	int ret;
    535	int id;
    536
    537	/* Allocate a key for mw from mr_table */
    538	id = ida_alloc_range(&mtpt_ida->ida, mtpt_ida->min, mtpt_ida->max,
    539			     GFP_KERNEL);
    540	if (id < 0) {
    541		ibdev_err(ibdev, "failed to alloc id for MW key, id(%d)\n", id);
    542		return -ENOMEM;
    543	}
    544
    545	mw->rkey = hw_index_to_key(id);
    546
    547	ibmw->rkey = mw->rkey;
    548	mw->pdn = to_hr_pd(ibmw->pd)->pdn;
    549	mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
    550	mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
    551	mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
    552
    553	ret = hns_roce_mw_enable(hr_dev, mw);
    554	if (ret)
    555		goto err_mw;
    556
    557	return 0;
    558
    559err_mw:
    560	hns_roce_mw_free(hr_dev, mw);
    561	return ret;
    562}
    563
    564int hns_roce_dealloc_mw(struct ib_mw *ibmw)
    565{
    566	struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
    567	struct hns_roce_mw *mw = to_hr_mw(ibmw);
    568
    569	hns_roce_mw_free(hr_dev, mw);
    570	return 0;
    571}
    572
    573static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
    574			  struct hns_roce_buf_region *region, dma_addr_t *pages,
    575			  int max_count)
    576{
    577	int count, npage;
    578	int offset, end;
    579	__le64 *mtts;
    580	u64 addr;
    581	int i;
    582
    583	offset = region->offset;
    584	end = offset + region->count;
    585	npage = 0;
    586	while (offset < end && npage < max_count) {
    587		count = 0;
    588		mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
    589						  offset, &count, NULL);
    590		if (!mtts)
    591			return -ENOBUFS;
    592
    593		for (i = 0; i < count && npage < max_count; i++) {
    594			addr = pages[npage];
    595
    596			mtts[i] = cpu_to_le64(addr);
    597			npage++;
    598		}
    599		offset += count;
    600	}
    601
    602	return npage;
    603}
    604
    605static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr)
    606{
    607	int i;
    608
    609	for (i = 0; i < attr->region_count; i++)
    610		if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 &&
    611		    attr->region[i].hopnum > 0)
    612			return true;
    613
    614	/* because the mtr only one root base address, when hopnum is 0 means
    615	 * root base address equals the first buffer address, thus all alloced
    616	 * memory must in a continuous space accessed by direct mode.
    617	 */
    618	return false;
    619}
    620
    621static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr)
    622{
    623	size_t size = 0;
    624	int i;
    625
    626	for (i = 0; i < attr->region_count; i++)
    627		size += attr->region[i].size;
    628
    629	return size;
    630}
    631
    632/*
    633 * check the given pages in continuous address space
    634 * Returns 0 on success, or the error page num.
    635 */
    636static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count,
    637					 unsigned int page_shift)
    638{
    639	size_t page_size = 1 << page_shift;
    640	int i;
    641
    642	for (i = 1; i < page_count; i++)
    643		if (pages[i] - pages[i - 1] != page_size)
    644			return i;
    645
    646	return 0;
    647}
    648
    649static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
    650{
    651	/* release user buffers */
    652	if (mtr->umem) {
    653		ib_umem_release(mtr->umem);
    654		mtr->umem = NULL;
    655	}
    656
    657	/* release kernel buffers */
    658	if (mtr->kmem) {
    659		hns_roce_buf_free(hr_dev, mtr->kmem);
    660		mtr->kmem = NULL;
    661	}
    662}
    663
    664static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
    665			  struct hns_roce_buf_attr *buf_attr,
    666			  struct ib_udata *udata, unsigned long user_addr)
    667{
    668	struct ib_device *ibdev = &hr_dev->ib_dev;
    669	size_t total_size;
    670
    671	total_size = mtr_bufs_size(buf_attr);
    672
    673	if (udata) {
    674		mtr->kmem = NULL;
    675		mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
    676					buf_attr->user_access);
    677		if (IS_ERR_OR_NULL(mtr->umem)) {
    678			ibdev_err(ibdev, "failed to get umem, ret = %ld.\n",
    679				  PTR_ERR(mtr->umem));
    680			return -ENOMEM;
    681		}
    682	} else {
    683		mtr->umem = NULL;
    684		mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size,
    685					       buf_attr->page_shift,
    686					       mtr->hem_cfg.is_direct ?
    687					       HNS_ROCE_BUF_DIRECT : 0);
    688		if (IS_ERR(mtr->kmem)) {
    689			ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n",
    690				  PTR_ERR(mtr->kmem));
    691			return PTR_ERR(mtr->kmem);
    692		}
    693	}
    694
    695	return 0;
    696}
    697
    698static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
    699			int page_count, unsigned int page_shift)
    700{
    701	struct ib_device *ibdev = &hr_dev->ib_dev;
    702	dma_addr_t *pages;
    703	int npage;
    704	int ret;
    705
    706	/* alloc a tmp array to store buffer's dma address */
    707	pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL);
    708	if (!pages)
    709		return -ENOMEM;
    710
    711	if (mtr->umem)
    712		npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count,
    713					       mtr->umem, page_shift);
    714	else
    715		npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count,
    716					       mtr->kmem, page_shift);
    717
    718	if (npage != page_count) {
    719		ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage,
    720			  page_count);
    721		ret = -ENOBUFS;
    722		goto err_alloc_list;
    723	}
    724
    725	if (mtr->hem_cfg.is_direct && npage > 1) {
    726		ret = mtr_check_direct_pages(pages, npage, page_shift);
    727		if (ret) {
    728			ibdev_err(ibdev, "failed to check %s page: %d / %d.\n",
    729				  mtr->umem ? "umtr" : "kmtr", ret, npage);
    730			ret = -ENOBUFS;
    731			goto err_alloc_list;
    732		}
    733	}
    734
    735	ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count);
    736	if (ret)
    737		ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret);
    738
    739err_alloc_list:
    740	kvfree(pages);
    741
    742	return ret;
    743}
    744
    745int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
    746		     dma_addr_t *pages, unsigned int page_cnt)
    747{
    748	struct ib_device *ibdev = &hr_dev->ib_dev;
    749	struct hns_roce_buf_region *r;
    750	unsigned int i, mapped_cnt;
    751	int ret = 0;
    752
    753	/*
    754	 * Only use the first page address as root ba when hopnum is 0, this
    755	 * is because the addresses of all pages are consecutive in this case.
    756	 */
    757	if (mtr->hem_cfg.is_direct) {
    758		mtr->hem_cfg.root_ba = pages[0];
    759		return 0;
    760	}
    761
    762	for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count &&
    763	     mapped_cnt < page_cnt; i++) {
    764		r = &mtr->hem_cfg.region[i];
    765		/* if hopnum is 0, no need to map pages in this region */
    766		if (!r->hopnum) {
    767			mapped_cnt += r->count;
    768			continue;
    769		}
    770
    771		if (r->offset + r->count > page_cnt) {
    772			ret = -EINVAL;
    773			ibdev_err(ibdev,
    774				  "failed to check mtr%u count %u + %u > %u.\n",
    775				  i, r->offset, r->count, page_cnt);
    776			return ret;
    777		}
    778
    779		ret = mtr_map_region(hr_dev, mtr, r, &pages[r->offset],
    780				     page_cnt - mapped_cnt);
    781		if (ret < 0) {
    782			ibdev_err(ibdev,
    783				  "failed to map mtr%u offset %u, ret = %d.\n",
    784				  i, r->offset, ret);
    785			return ret;
    786		}
    787		mapped_cnt += ret;
    788		ret = 0;
    789	}
    790
    791	if (mapped_cnt < page_cnt) {
    792		ret = -ENOBUFS;
    793		ibdev_err(ibdev, "failed to map mtr pages count: %u < %u.\n",
    794			  mapped_cnt, page_cnt);
    795	}
    796
    797	return ret;
    798}
    799
    800int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
    801		      u32 offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
    802{
    803	struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
    804	int mtt_count, left;
    805	u32 start_index;
    806	int total = 0;
    807	__le64 *mtts;
    808	u32 npage;
    809	u64 addr;
    810
    811	if (!mtt_buf || mtt_max < 1)
    812		goto done;
    813
    814	/* no mtt memory in direct mode, so just return the buffer address */
    815	if (cfg->is_direct) {
    816		start_index = offset >> HNS_HW_PAGE_SHIFT;
    817		for (mtt_count = 0; mtt_count < cfg->region_count &&
    818		     total < mtt_max; mtt_count++) {
    819			npage = cfg->region[mtt_count].offset;
    820			if (npage < start_index)
    821				continue;
    822
    823			addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
    824			mtt_buf[total] = addr;
    825
    826			total++;
    827		}
    828
    829		goto done;
    830	}
    831
    832	start_index = offset >> cfg->buf_pg_shift;
    833	left = mtt_max;
    834	while (left > 0) {
    835		mtt_count = 0;
    836		mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
    837						  start_index + total,
    838						  &mtt_count, NULL);
    839		if (!mtts || !mtt_count)
    840			goto done;
    841
    842		npage = min(mtt_count, left);
    843		left -= npage;
    844		for (mtt_count = 0; mtt_count < npage; mtt_count++)
    845			mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]);
    846	}
    847
    848done:
    849	if (base_addr)
    850		*base_addr = cfg->root_ba;
    851
    852	return total;
    853}
    854
    855static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
    856			    struct hns_roce_buf_attr *attr,
    857			    struct hns_roce_hem_cfg *cfg,
    858			    unsigned int *buf_page_shift, u64 unalinged_size)
    859{
    860	struct hns_roce_buf_region *r;
    861	u64 first_region_padding;
    862	int page_cnt, region_cnt;
    863	unsigned int page_shift;
    864	size_t buf_size;
    865
    866	/* If mtt is disabled, all pages must be within a continuous range */
    867	cfg->is_direct = !mtr_has_mtt(attr);
    868	buf_size = mtr_bufs_size(attr);
    869	if (cfg->is_direct) {
    870		/* When HEM buffer uses 0-level addressing, the page size is
    871		 * equal to the whole buffer size, and we split the buffer into
    872		 * small pages which is used to check whether the adjacent
    873		 * units are in the continuous space and its size is fixed to
    874		 * 4K based on hns ROCEE's requirement.
    875		 */
    876		page_shift = HNS_HW_PAGE_SHIFT;
    877
    878		/* The ROCEE requires the page size to be 4K * 2 ^ N. */
    879		cfg->buf_pg_count = 1;
    880		cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT +
    881			order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE));
    882		first_region_padding = 0;
    883	} else {
    884		page_shift = attr->page_shift;
    885		cfg->buf_pg_count = DIV_ROUND_UP(buf_size + unalinged_size,
    886						 1 << page_shift);
    887		cfg->buf_pg_shift = page_shift;
    888		first_region_padding = unalinged_size;
    889	}
    890
    891	/* Convert buffer size to page index and page count for each region and
    892	 * the buffer's offset needs to be appended to the first region.
    893	 */
    894	for (page_cnt = 0, region_cnt = 0; region_cnt < attr->region_count &&
    895	     region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) {
    896		r = &cfg->region[region_cnt];
    897		r->offset = page_cnt;
    898		buf_size = hr_hw_page_align(attr->region[region_cnt].size +
    899					    first_region_padding);
    900		r->count = DIV_ROUND_UP(buf_size, 1 << page_shift);
    901		first_region_padding = 0;
    902		page_cnt += r->count;
    903		r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum,
    904					     r->count);
    905	}
    906
    907	cfg->region_count = region_cnt;
    908	*buf_page_shift = page_shift;
    909
    910	return page_cnt;
    911}
    912
    913static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
    914			 unsigned int ba_page_shift)
    915{
    916	struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
    917	int ret;
    918
    919	hns_roce_hem_list_init(&mtr->hem_list);
    920	if (!cfg->is_direct) {
    921		ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
    922						cfg->region, cfg->region_count,
    923						ba_page_shift);
    924		if (ret)
    925			return ret;
    926		cfg->root_ba = mtr->hem_list.root_ba;
    927		cfg->ba_pg_shift = ba_page_shift;
    928	} else {
    929		cfg->ba_pg_shift = cfg->buf_pg_shift;
    930	}
    931
    932	return 0;
    933}
    934
    935static void mtr_free_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
    936{
    937	hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
    938}
    939
    940/**
    941 * hns_roce_mtr_create - Create hns memory translate region.
    942 *
    943 * @hr_dev: RoCE device struct pointer
    944 * @mtr: memory translate region
    945 * @buf_attr: buffer attribute for creating mtr
    946 * @ba_page_shift: page shift for multi-hop base address table
    947 * @udata: user space context, if it's NULL, means kernel space
    948 * @user_addr: userspace virtual address to start at
    949 */
    950int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
    951			struct hns_roce_buf_attr *buf_attr,
    952			unsigned int ba_page_shift, struct ib_udata *udata,
    953			unsigned long user_addr)
    954{
    955	struct ib_device *ibdev = &hr_dev->ib_dev;
    956	unsigned int buf_page_shift = 0;
    957	int buf_page_cnt;
    958	int ret;
    959
    960	buf_page_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, &mtr->hem_cfg,
    961					&buf_page_shift,
    962					udata ? user_addr & ~PAGE_MASK : 0);
    963	if (buf_page_cnt < 1 || buf_page_shift < HNS_HW_PAGE_SHIFT) {
    964		ibdev_err(ibdev, "failed to init mtr cfg, count %d shift %u.\n",
    965			  buf_page_cnt, buf_page_shift);
    966		return -EINVAL;
    967	}
    968
    969	ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift);
    970	if (ret) {
    971		ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret);
    972		return ret;
    973	}
    974
    975	/* The caller has its own buffer list and invokes the hns_roce_mtr_map()
    976	 * to finish the MTT configuration.
    977	 */
    978	if (buf_attr->mtt_only) {
    979		mtr->umem = NULL;
    980		mtr->kmem = NULL;
    981		return 0;
    982	}
    983
    984	ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr);
    985	if (ret) {
    986		ibdev_err(ibdev, "failed to alloc mtr bufs, ret = %d.\n", ret);
    987		goto err_alloc_mtt;
    988	}
    989
    990	/* Write buffer's dma address to MTT */
    991	ret = mtr_map_bufs(hr_dev, mtr, buf_page_cnt, buf_page_shift);
    992	if (ret)
    993		ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret);
    994	else
    995		return 0;
    996
    997	mtr_free_bufs(hr_dev, mtr);
    998err_alloc_mtt:
    999	mtr_free_mtt(hr_dev, mtr);
   1000	return ret;
   1001}
   1002
   1003void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
   1004{
   1005	/* release multi-hop addressing resource */
   1006	hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
   1007
   1008	/* free buffers */
   1009	mtr_free_bufs(hr_dev, mtr);
   1010}