cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

hinic_hw_qp.c (25425B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Huawei HiNIC PCI Express Linux driver
      4 * Copyright(c) 2017 Huawei Technologies Co., Ltd
      5 */
      6
      7#include <linux/kernel.h>
      8#include <linux/types.h>
      9#include <linux/pci.h>
     10#include <linux/device.h>
     11#include <linux/dma-mapping.h>
     12#include <linux/vmalloc.h>
     13#include <linux/errno.h>
     14#include <linux/sizes.h>
     15#include <linux/atomic.h>
     16#include <linux/skbuff.h>
     17#include <linux/io.h>
     18#include <asm/barrier.h>
     19#include <asm/byteorder.h>
     20
     21#include "hinic_common.h"
     22#include "hinic_hw_if.h"
     23#include "hinic_hw_wqe.h"
     24#include "hinic_hw_wq.h"
     25#include "hinic_hw_qp_ctxt.h"
     26#include "hinic_hw_qp.h"
     27#include "hinic_hw_io.h"
     28
     29#define SQ_DB_OFF               SZ_2K
     30
     31/* The number of cache line to prefetch Until threshold state */
     32#define WQ_PREFETCH_MAX         2
     33/* The number of cache line to prefetch After threshold state */
     34#define WQ_PREFETCH_MIN         1
     35/* Threshold state */
     36#define WQ_PREFETCH_THRESHOLD   256
     37
     38/* sizes of the SQ/RQ ctxt */
     39#define Q_CTXT_SIZE             48
     40#define CTXT_RSVD               240
     41
     42#define SQ_CTXT_OFFSET(max_sqs, max_rqs, q_id)  \
     43		(((max_rqs) + (max_sqs)) * CTXT_RSVD + (q_id) * Q_CTXT_SIZE)
     44
     45#define RQ_CTXT_OFFSET(max_sqs, max_rqs, q_id)  \
     46		(((max_rqs) + (max_sqs)) * CTXT_RSVD + \
     47		 (max_sqs + (q_id)) * Q_CTXT_SIZE)
     48
     49#define SIZE_16BYTES(size)              (ALIGN(size, 16) >> 4)
     50#define SIZE_8BYTES(size)               (ALIGN(size, 8) >> 3)
     51#define SECT_SIZE_FROM_8BYTES(size)     ((size) << 3)
     52
     53#define SQ_DB_PI_HI_SHIFT       8
     54#define SQ_DB_PI_HI(prod_idx)   ((prod_idx) >> SQ_DB_PI_HI_SHIFT)
     55
     56#define SQ_DB_PI_LOW_MASK       0xFF
     57#define SQ_DB_PI_LOW(prod_idx)  ((prod_idx) & SQ_DB_PI_LOW_MASK)
     58
     59#define SQ_DB_ADDR(sq, pi)      ((u64 *)((sq)->db_base) + SQ_DB_PI_LOW(pi))
     60
     61#define SQ_MASKED_IDX(sq, idx)  ((idx) & (sq)->wq->mask)
     62#define RQ_MASKED_IDX(rq, idx)  ((idx) & (rq)->wq->mask)
     63
     64enum sq_wqe_type {
     65	SQ_NORMAL_WQE = 0,
     66};
     67
     68enum rq_completion_fmt {
     69	RQ_COMPLETE_SGE = 1
     70};
     71
     72void hinic_qp_prepare_header(struct hinic_qp_ctxt_header *qp_ctxt_hdr,
     73			     enum hinic_qp_ctxt_type ctxt_type,
     74			     u16 num_queues, u16 max_queues)
     75{
     76	u16 max_sqs = max_queues;
     77	u16 max_rqs = max_queues;
     78
     79	qp_ctxt_hdr->num_queues = num_queues;
     80	qp_ctxt_hdr->queue_type = ctxt_type;
     81
     82	if (ctxt_type == HINIC_QP_CTXT_TYPE_SQ)
     83		qp_ctxt_hdr->addr_offset = SQ_CTXT_OFFSET(max_sqs, max_rqs, 0);
     84	else
     85		qp_ctxt_hdr->addr_offset = RQ_CTXT_OFFSET(max_sqs, max_rqs, 0);
     86
     87	qp_ctxt_hdr->addr_offset = SIZE_16BYTES(qp_ctxt_hdr->addr_offset);
     88
     89	hinic_cpu_to_be32(qp_ctxt_hdr, sizeof(*qp_ctxt_hdr));
     90}
     91
     92void hinic_sq_prepare_ctxt(struct hinic_sq_ctxt *sq_ctxt,
     93			   struct hinic_sq *sq, u16 global_qid)
     94{
     95	u32 wq_page_pfn_hi, wq_page_pfn_lo, wq_block_pfn_hi, wq_block_pfn_lo;
     96	u64 wq_page_addr, wq_page_pfn, wq_block_pfn;
     97	u16 pi_start, ci_start;
     98	struct hinic_wq *wq;
     99
    100	wq = sq->wq;
    101	ci_start = atomic_read(&wq->cons_idx);
    102	pi_start = atomic_read(&wq->prod_idx);
    103
    104	/* Read the first page paddr from the WQ page paddr ptrs */
    105	wq_page_addr = be64_to_cpu(*wq->block_vaddr);
    106
    107	wq_page_pfn = HINIC_WQ_PAGE_PFN(wq_page_addr);
    108	wq_page_pfn_hi = upper_32_bits(wq_page_pfn);
    109	wq_page_pfn_lo = lower_32_bits(wq_page_pfn);
    110
    111	/* If only one page, use 0-level CLA */
    112	if (wq->num_q_pages == 1)
    113		wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq_page_addr);
    114	else
    115		wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq->block_paddr);
    116
    117	wq_block_pfn_hi = upper_32_bits(wq_block_pfn);
    118	wq_block_pfn_lo = lower_32_bits(wq_block_pfn);
    119
    120	sq_ctxt->ceq_attr = HINIC_SQ_CTXT_CEQ_ATTR_SET(global_qid,
    121						       GLOBAL_SQ_ID) |
    122			    HINIC_SQ_CTXT_CEQ_ATTR_SET(0, EN);
    123
    124	sq_ctxt->ci_wrapped = HINIC_SQ_CTXT_CI_SET(ci_start, IDX) |
    125			      HINIC_SQ_CTXT_CI_SET(1, WRAPPED);
    126
    127	sq_ctxt->wq_hi_pfn_pi =
    128			HINIC_SQ_CTXT_WQ_PAGE_SET(wq_page_pfn_hi, HI_PFN) |
    129			HINIC_SQ_CTXT_WQ_PAGE_SET(pi_start, PI);
    130
    131	sq_ctxt->wq_lo_pfn = wq_page_pfn_lo;
    132
    133	sq_ctxt->pref_cache =
    134		HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_MIN, CACHE_MIN) |
    135		HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_MAX, CACHE_MAX) |
    136		HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_THRESHOLD, CACHE_THRESHOLD);
    137
    138	sq_ctxt->pref_wrapped = 1;
    139
    140	sq_ctxt->pref_wq_hi_pfn_ci =
    141		HINIC_SQ_CTXT_PREF_SET(ci_start, CI) |
    142		HINIC_SQ_CTXT_PREF_SET(wq_page_pfn_hi, WQ_HI_PFN);
    143
    144	sq_ctxt->pref_wq_lo_pfn = wq_page_pfn_lo;
    145
    146	sq_ctxt->wq_block_hi_pfn =
    147		HINIC_SQ_CTXT_WQ_BLOCK_SET(wq_block_pfn_hi, HI_PFN);
    148
    149	sq_ctxt->wq_block_lo_pfn = wq_block_pfn_lo;
    150
    151	hinic_cpu_to_be32(sq_ctxt, sizeof(*sq_ctxt));
    152}
    153
    154void hinic_rq_prepare_ctxt(struct hinic_rq_ctxt *rq_ctxt,
    155			   struct hinic_rq *rq, u16 global_qid)
    156{
    157	u32 wq_page_pfn_hi, wq_page_pfn_lo, wq_block_pfn_hi, wq_block_pfn_lo;
    158	u64 wq_page_addr, wq_page_pfn, wq_block_pfn;
    159	u16 pi_start, ci_start;
    160	struct hinic_wq *wq;
    161
    162	wq = rq->wq;
    163	ci_start = atomic_read(&wq->cons_idx);
    164	pi_start = atomic_read(&wq->prod_idx);
    165
    166	/* Read the first page paddr from the WQ page paddr ptrs */
    167	wq_page_addr = be64_to_cpu(*wq->block_vaddr);
    168
    169	wq_page_pfn = HINIC_WQ_PAGE_PFN(wq_page_addr);
    170	wq_page_pfn_hi = upper_32_bits(wq_page_pfn);
    171	wq_page_pfn_lo = lower_32_bits(wq_page_pfn);
    172
    173	wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq->block_paddr);
    174	wq_block_pfn_hi = upper_32_bits(wq_block_pfn);
    175	wq_block_pfn_lo = lower_32_bits(wq_block_pfn);
    176
    177	rq_ctxt->ceq_attr = HINIC_RQ_CTXT_CEQ_ATTR_SET(0, EN) |
    178			    HINIC_RQ_CTXT_CEQ_ATTR_SET(1, WRAPPED);
    179
    180	rq_ctxt->pi_intr_attr = HINIC_RQ_CTXT_PI_SET(pi_start, IDX) |
    181				HINIC_RQ_CTXT_PI_SET(rq->msix_entry, INTR);
    182
    183	rq_ctxt->wq_hi_pfn_ci = HINIC_RQ_CTXT_WQ_PAGE_SET(wq_page_pfn_hi,
    184							  HI_PFN) |
    185				HINIC_RQ_CTXT_WQ_PAGE_SET(ci_start, CI);
    186
    187	rq_ctxt->wq_lo_pfn = wq_page_pfn_lo;
    188
    189	rq_ctxt->pref_cache =
    190		HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_MIN, CACHE_MIN) |
    191		HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_MAX, CACHE_MAX) |
    192		HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_THRESHOLD, CACHE_THRESHOLD);
    193
    194	rq_ctxt->pref_wrapped = 1;
    195
    196	rq_ctxt->pref_wq_hi_pfn_ci =
    197		HINIC_RQ_CTXT_PREF_SET(wq_page_pfn_hi, WQ_HI_PFN) |
    198		HINIC_RQ_CTXT_PREF_SET(ci_start, CI);
    199
    200	rq_ctxt->pref_wq_lo_pfn = wq_page_pfn_lo;
    201
    202	rq_ctxt->pi_paddr_hi = upper_32_bits(rq->pi_dma_addr);
    203	rq_ctxt->pi_paddr_lo = lower_32_bits(rq->pi_dma_addr);
    204
    205	rq_ctxt->wq_block_hi_pfn =
    206		HINIC_RQ_CTXT_WQ_BLOCK_SET(wq_block_pfn_hi, HI_PFN);
    207
    208	rq_ctxt->wq_block_lo_pfn = wq_block_pfn_lo;
    209
    210	hinic_cpu_to_be32(rq_ctxt, sizeof(*rq_ctxt));
    211}
    212
    213/**
    214 * alloc_sq_skb_arr - allocate sq array for saved skb
    215 * @sq: HW Send Queue
    216 *
    217 * Return 0 - Success, negative - Failure
    218 **/
    219static int alloc_sq_skb_arr(struct hinic_sq *sq)
    220{
    221	struct hinic_wq *wq = sq->wq;
    222	size_t skb_arr_size;
    223
    224	skb_arr_size = wq->q_depth * sizeof(*sq->saved_skb);
    225	sq->saved_skb = vzalloc(skb_arr_size);
    226	if (!sq->saved_skb)
    227		return -ENOMEM;
    228
    229	return 0;
    230}
    231
    232/**
    233 * free_sq_skb_arr - free sq array for saved skb
    234 * @sq: HW Send Queue
    235 **/
    236static void free_sq_skb_arr(struct hinic_sq *sq)
    237{
    238	vfree(sq->saved_skb);
    239}
    240
    241/**
    242 * alloc_rq_skb_arr - allocate rq array for saved skb
    243 * @rq: HW Receive Queue
    244 *
    245 * Return 0 - Success, negative - Failure
    246 **/
    247static int alloc_rq_skb_arr(struct hinic_rq *rq)
    248{
    249	struct hinic_wq *wq = rq->wq;
    250	size_t skb_arr_size;
    251
    252	skb_arr_size = wq->q_depth * sizeof(*rq->saved_skb);
    253	rq->saved_skb = vzalloc(skb_arr_size);
    254	if (!rq->saved_skb)
    255		return -ENOMEM;
    256
    257	return 0;
    258}
    259
    260/**
    261 * free_rq_skb_arr - free rq array for saved skb
    262 * @rq: HW Receive Queue
    263 **/
    264static void free_rq_skb_arr(struct hinic_rq *rq)
    265{
    266	vfree(rq->saved_skb);
    267}
    268
    269/**
    270 * hinic_init_sq - Initialize HW Send Queue
    271 * @sq: HW Send Queue
    272 * @hwif: HW Interface for accessing HW
    273 * @wq: Work Queue for the data of the SQ
    274 * @entry: msix entry for sq
    275 * @ci_addr: address for reading the current HW consumer index
    276 * @ci_dma_addr: dma address for reading the current HW consumer index
    277 * @db_base: doorbell base address
    278 *
    279 * Return 0 - Success, negative - Failure
    280 **/
    281int hinic_init_sq(struct hinic_sq *sq, struct hinic_hwif *hwif,
    282		  struct hinic_wq *wq, struct msix_entry *entry,
    283		  void *ci_addr, dma_addr_t ci_dma_addr,
    284		  void __iomem *db_base)
    285{
    286	sq->hwif = hwif;
    287
    288	sq->wq = wq;
    289
    290	sq->irq = entry->vector;
    291	sq->msix_entry = entry->entry;
    292
    293	sq->hw_ci_addr = ci_addr;
    294	sq->hw_ci_dma_addr = ci_dma_addr;
    295
    296	sq->db_base = db_base + SQ_DB_OFF;
    297
    298	return alloc_sq_skb_arr(sq);
    299}
    300
    301/**
    302 * hinic_clean_sq - Clean HW Send Queue's Resources
    303 * @sq: Send Queue
    304 **/
    305void hinic_clean_sq(struct hinic_sq *sq)
    306{
    307	free_sq_skb_arr(sq);
    308}
    309
    310/**
    311 * alloc_rq_cqe - allocate rq completion queue elements
    312 * @rq: HW Receive Queue
    313 *
    314 * Return 0 - Success, negative - Failure
    315 **/
    316static int alloc_rq_cqe(struct hinic_rq *rq)
    317{
    318	struct hinic_hwif *hwif = rq->hwif;
    319	struct pci_dev *pdev = hwif->pdev;
    320	size_t cqe_dma_size, cqe_size;
    321	struct hinic_wq *wq = rq->wq;
    322	int j, i;
    323
    324	cqe_size = wq->q_depth * sizeof(*rq->cqe);
    325	rq->cqe = vzalloc(cqe_size);
    326	if (!rq->cqe)
    327		return -ENOMEM;
    328
    329	cqe_dma_size = wq->q_depth * sizeof(*rq->cqe_dma);
    330	rq->cqe_dma = vzalloc(cqe_dma_size);
    331	if (!rq->cqe_dma)
    332		goto err_cqe_dma_arr_alloc;
    333
    334	for (i = 0; i < wq->q_depth; i++) {
    335		rq->cqe[i] = dma_alloc_coherent(&pdev->dev,
    336						sizeof(*rq->cqe[i]),
    337						&rq->cqe_dma[i], GFP_KERNEL);
    338		if (!rq->cqe[i])
    339			goto err_cqe_alloc;
    340	}
    341
    342	return 0;
    343
    344err_cqe_alloc:
    345	for (j = 0; j < i; j++)
    346		dma_free_coherent(&pdev->dev, sizeof(*rq->cqe[j]), rq->cqe[j],
    347				  rq->cqe_dma[j]);
    348
    349	vfree(rq->cqe_dma);
    350
    351err_cqe_dma_arr_alloc:
    352	vfree(rq->cqe);
    353	return -ENOMEM;
    354}
    355
    356/**
    357 * free_rq_cqe - free rq completion queue elements
    358 * @rq: HW Receive Queue
    359 **/
    360static void free_rq_cqe(struct hinic_rq *rq)
    361{
    362	struct hinic_hwif *hwif = rq->hwif;
    363	struct pci_dev *pdev = hwif->pdev;
    364	struct hinic_wq *wq = rq->wq;
    365	int i;
    366
    367	for (i = 0; i < wq->q_depth; i++)
    368		dma_free_coherent(&pdev->dev, sizeof(*rq->cqe[i]), rq->cqe[i],
    369				  rq->cqe_dma[i]);
    370
    371	vfree(rq->cqe_dma);
    372	vfree(rq->cqe);
    373}
    374
    375/**
    376 * hinic_init_rq - Initialize HW Receive Queue
    377 * @rq: HW Receive Queue
    378 * @hwif: HW Interface for accessing HW
    379 * @wq: Work Queue for the data of the RQ
    380 * @entry: msix entry for rq
    381 *
    382 * Return 0 - Success, negative - Failure
    383 **/
    384int hinic_init_rq(struct hinic_rq *rq, struct hinic_hwif *hwif,
    385		  struct hinic_wq *wq, struct msix_entry *entry)
    386{
    387	struct pci_dev *pdev = hwif->pdev;
    388	size_t pi_size;
    389	int err;
    390
    391	rq->hwif = hwif;
    392
    393	rq->wq = wq;
    394
    395	rq->irq = entry->vector;
    396	rq->msix_entry = entry->entry;
    397
    398	rq->buf_sz = HINIC_RX_BUF_SZ;
    399
    400	err = alloc_rq_skb_arr(rq);
    401	if (err) {
    402		dev_err(&pdev->dev, "Failed to allocate rq priv data\n");
    403		return err;
    404	}
    405
    406	err = alloc_rq_cqe(rq);
    407	if (err) {
    408		dev_err(&pdev->dev, "Failed to allocate rq cqe\n");
    409		goto err_alloc_rq_cqe;
    410	}
    411
    412	/* HW requirements: Must be at least 32 bit */
    413	pi_size = ALIGN(sizeof(*rq->pi_virt_addr), sizeof(u32));
    414	rq->pi_virt_addr = dma_alloc_coherent(&pdev->dev, pi_size,
    415					      &rq->pi_dma_addr, GFP_KERNEL);
    416	if (!rq->pi_virt_addr) {
    417		err = -ENOMEM;
    418		goto err_pi_virt;
    419	}
    420
    421	return 0;
    422
    423err_pi_virt:
    424	free_rq_cqe(rq);
    425
    426err_alloc_rq_cqe:
    427	free_rq_skb_arr(rq);
    428	return err;
    429}
    430
    431/**
    432 * hinic_clean_rq - Clean HW Receive Queue's Resources
    433 * @rq: HW Receive Queue
    434 **/
    435void hinic_clean_rq(struct hinic_rq *rq)
    436{
    437	struct hinic_hwif *hwif = rq->hwif;
    438	struct pci_dev *pdev = hwif->pdev;
    439	size_t pi_size;
    440
    441	pi_size = ALIGN(sizeof(*rq->pi_virt_addr), sizeof(u32));
    442	dma_free_coherent(&pdev->dev, pi_size, rq->pi_virt_addr,
    443			  rq->pi_dma_addr);
    444
    445	free_rq_cqe(rq);
    446	free_rq_skb_arr(rq);
    447}
    448
    449/**
    450 * hinic_get_sq_free_wqebbs - return number of free wqebbs for use
    451 * @sq: send queue
    452 *
    453 * Return number of free wqebbs
    454 **/
    455int hinic_get_sq_free_wqebbs(struct hinic_sq *sq)
    456{
    457	struct hinic_wq *wq = sq->wq;
    458
    459	return atomic_read(&wq->delta) - 1;
    460}
    461
    462/**
    463 * hinic_get_rq_free_wqebbs - return number of free wqebbs for use
    464 * @rq: recv queue
    465 *
    466 * Return number of free wqebbs
    467 **/
    468int hinic_get_rq_free_wqebbs(struct hinic_rq *rq)
    469{
    470	struct hinic_wq *wq = rq->wq;
    471
    472	return atomic_read(&wq->delta) - 1;
    473}
    474
    475static void sq_prepare_ctrl(struct hinic_sq_ctrl *ctrl, u16 prod_idx,
    476			    int nr_descs)
    477{
    478	u32 ctrl_size, task_size, bufdesc_size;
    479
    480	ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl));
    481	task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task));
    482	bufdesc_size = nr_descs * sizeof(struct hinic_sq_bufdesc);
    483	bufdesc_size = SIZE_8BYTES(bufdesc_size);
    484
    485	ctrl->ctrl_info = HINIC_SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) |
    486			  HINIC_SQ_CTRL_SET(task_size, TASKSECT_LEN)        |
    487			  HINIC_SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT)     |
    488			  HINIC_SQ_CTRL_SET(ctrl_size, LEN);
    489
    490	ctrl->queue_info = HINIC_SQ_CTRL_SET(HINIC_MSS_DEFAULT,
    491					     QUEUE_INFO_MSS) |
    492			   HINIC_SQ_CTRL_SET(1, QUEUE_INFO_UC);
    493}
    494
    495static void sq_prepare_task(struct hinic_sq_task *task)
    496{
    497	task->pkt_info0 = 0;
    498	task->pkt_info1 = 0;
    499	task->pkt_info2 = 0;
    500
    501	task->ufo_v6_identify = 0;
    502
    503	task->pkt_info4 = HINIC_SQ_TASK_INFO4_SET(HINIC_L2TYPE_ETH, L2TYPE);
    504
    505	task->zero_pad = 0;
    506}
    507
    508void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len)
    509{
    510	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(len, L2HDR_LEN);
    511}
    512
    513void hinic_task_set_outter_l3(struct hinic_sq_task *task,
    514			      enum hinic_l3_offload_type l3_type,
    515			      u32 network_len)
    516{
    517	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l3_type, OUTER_L3TYPE) |
    518			   HINIC_SQ_TASK_INFO2_SET(network_len, OUTER_L3LEN);
    519}
    520
    521void hinic_task_set_inner_l3(struct hinic_sq_task *task,
    522			     enum hinic_l3_offload_type l3_type,
    523			     u32 network_len)
    524{
    525	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l3_type, INNER_L3TYPE);
    526	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(network_len, INNER_L3LEN);
    527}
    528
    529void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
    530			      enum hinic_l4_tunnel_type l4_type,
    531			      u32 tunnel_len)
    532{
    533	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) |
    534			   HINIC_SQ_TASK_INFO2_SET(tunnel_len, TUNNEL_L4LEN);
    535}
    536
    537void hinic_set_cs_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
    538			   enum hinic_l4_offload_type l4_offload,
    539			   u32 l4_len, u32 offset)
    540{
    541	u32 tcp_udp_cs = 0, sctp = 0;
    542	u32 mss = HINIC_MSS_DEFAULT;
    543
    544	if (l4_offload == TCP_OFFLOAD_ENABLE ||
    545	    l4_offload == UDP_OFFLOAD_ENABLE)
    546		tcp_udp_cs = 1;
    547	else if (l4_offload == SCTP_OFFLOAD_ENABLE)
    548		sctp = 1;
    549
    550	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
    551	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
    552
    553	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
    554		       HINIC_SQ_CTRL_SET(tcp_udp_cs, QUEUE_INFO_TCPUDP_CS) |
    555		       HINIC_SQ_CTRL_SET(sctp, QUEUE_INFO_SCTP);
    556
    557	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
    558	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
    559}
    560
    561void hinic_set_tso_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
    562			    enum hinic_l4_offload_type l4_offload,
    563			    u32 l4_len, u32 offset, u32 ip_ident, u32 mss)
    564{
    565	u32 tso = 0, ufo = 0;
    566
    567	if (l4_offload == TCP_OFFLOAD_ENABLE)
    568		tso = 1;
    569	else if (l4_offload == UDP_OFFLOAD_ENABLE)
    570		ufo = 1;
    571
    572	task->ufo_v6_identify = ip_ident;
    573
    574	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
    575	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(tso || ufo, TSO_FLAG);
    576	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
    577
    578	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
    579		       HINIC_SQ_CTRL_SET(tso, QUEUE_INFO_TSO) |
    580		       HINIC_SQ_CTRL_SET(ufo, QUEUE_INFO_UFO) |
    581		       HINIC_SQ_CTRL_SET(!!l4_offload, QUEUE_INFO_TCPUDP_CS);
    582
    583	/* set MSS value */
    584	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
    585	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
    586}
    587
    588/**
    589 * hinic_sq_prepare_wqe - prepare wqe before insert to the queue
    590 * @sq: send queue
    591 * @prod_idx: pi value
    592 * @sq_wqe: wqe to prepare
    593 * @sges: sges for use by the wqe for send for buf addresses
    594 * @nr_sges: number of sges
    595 **/
    596void hinic_sq_prepare_wqe(struct hinic_sq *sq, u16 prod_idx,
    597			  struct hinic_sq_wqe *sq_wqe, struct hinic_sge *sges,
    598			  int nr_sges)
    599{
    600	int i;
    601
    602	sq_prepare_ctrl(&sq_wqe->ctrl, prod_idx, nr_sges);
    603
    604	sq_prepare_task(&sq_wqe->task);
    605
    606	for (i = 0; i < nr_sges; i++)
    607		sq_wqe->buf_descs[i].sge = sges[i];
    608}
    609
    610/**
    611 * sq_prepare_db - prepare doorbell to write
    612 * @sq: send queue
    613 * @prod_idx: pi value for the doorbell
    614 * @cos: cos of the doorbell
    615 *
    616 * Return db value
    617 **/
    618static u32 sq_prepare_db(struct hinic_sq *sq, u16 prod_idx, unsigned int cos)
    619{
    620	struct hinic_qp *qp = container_of(sq, struct hinic_qp, sq);
    621	u8 hi_prod_idx = SQ_DB_PI_HI(SQ_MASKED_IDX(sq, prod_idx));
    622
    623	/* Data should be written to HW in Big Endian Format */
    624	return cpu_to_be32(HINIC_SQ_DB_INFO_SET(hi_prod_idx, PI_HI)     |
    625			   HINIC_SQ_DB_INFO_SET(HINIC_DB_SQ_TYPE, TYPE) |
    626			   HINIC_SQ_DB_INFO_SET(HINIC_DATA_PATH, PATH)  |
    627			   HINIC_SQ_DB_INFO_SET(cos, COS)               |
    628			   HINIC_SQ_DB_INFO_SET(qp->q_id, QID));
    629}
    630
    631/**
    632 * hinic_sq_write_db- write doorbell
    633 * @sq: send queue
    634 * @prod_idx: pi value for the doorbell
    635 * @wqe_size: wqe size
    636 * @cos: cos of the wqe
    637 **/
    638void hinic_sq_write_db(struct hinic_sq *sq, u16 prod_idx, unsigned int wqe_size,
    639		       unsigned int cos)
    640{
    641	struct hinic_wq *wq = sq->wq;
    642
    643	/* increment prod_idx to the next */
    644	prod_idx += ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
    645	prod_idx = SQ_MASKED_IDX(sq, prod_idx);
    646
    647	wmb();  /* Write all before the doorbell */
    648
    649	writel(sq_prepare_db(sq, prod_idx, cos), SQ_DB_ADDR(sq, prod_idx));
    650}
    651
    652/**
    653 * hinic_sq_get_wqe - get wqe ptr in the current pi and update the pi
    654 * @sq: sq to get wqe from
    655 * @wqe_size: wqe size
    656 * @prod_idx: returned pi
    657 *
    658 * Return wqe pointer
    659 **/
    660struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
    661				      unsigned int wqe_size, u16 *prod_idx)
    662{
    663	struct hinic_hw_wqe *hw_wqe = hinic_get_wqe(sq->wq, wqe_size,
    664						    prod_idx);
    665
    666	if (IS_ERR(hw_wqe))
    667		return NULL;
    668
    669	return &hw_wqe->sq_wqe;
    670}
    671
    672/**
    673 * hinic_sq_return_wqe - return the wqe to the sq
    674 * @sq: send queue
    675 * @wqe_size: the size of the wqe
    676 **/
    677void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size)
    678{
    679	hinic_return_wqe(sq->wq, wqe_size);
    680}
    681
    682/**
    683 * hinic_sq_write_wqe - write the wqe to the sq
    684 * @sq: send queue
    685 * @prod_idx: pi of the wqe
    686 * @sq_wqe: the wqe to write
    687 * @skb: skb to save
    688 * @wqe_size: the size of the wqe
    689 **/
    690void hinic_sq_write_wqe(struct hinic_sq *sq, u16 prod_idx,
    691			struct hinic_sq_wqe *sq_wqe,
    692			struct sk_buff *skb, unsigned int wqe_size)
    693{
    694	struct hinic_hw_wqe *hw_wqe = (struct hinic_hw_wqe *)sq_wqe;
    695
    696	sq->saved_skb[prod_idx] = skb;
    697
    698	/* The data in the HW should be in Big Endian Format */
    699	hinic_cpu_to_be32(sq_wqe, wqe_size);
    700
    701	hinic_write_wqe(sq->wq, hw_wqe, wqe_size);
    702}
    703
    704/**
    705 * hinic_sq_read_wqebb - read wqe ptr in the current ci and update the ci, the
    706 * wqe only have one wqebb
    707 * @sq: send queue
    708 * @skb: return skb that was saved
    709 * @wqe_size: the wqe size ptr
    710 * @cons_idx: consumer index of the wqe
    711 *
    712 * Return wqe in ci position
    713 **/
    714struct hinic_sq_wqe *hinic_sq_read_wqebb(struct hinic_sq *sq,
    715					 struct sk_buff **skb,
    716					 unsigned int *wqe_size, u16 *cons_idx)
    717{
    718	struct hinic_hw_wqe *hw_wqe;
    719	struct hinic_sq_wqe *sq_wqe;
    720	struct hinic_sq_ctrl *ctrl;
    721	unsigned int buf_sect_len;
    722	u32 ctrl_info;
    723
    724	/* read the ctrl section for getting wqe size */
    725	hw_wqe = hinic_read_wqe(sq->wq, sizeof(*ctrl), cons_idx);
    726	if (IS_ERR(hw_wqe))
    727		return NULL;
    728
    729	*skb = sq->saved_skb[*cons_idx];
    730
    731	sq_wqe = &hw_wqe->sq_wqe;
    732	ctrl = &sq_wqe->ctrl;
    733	ctrl_info = be32_to_cpu(ctrl->ctrl_info);
    734	buf_sect_len = HINIC_SQ_CTRL_GET(ctrl_info, BUFDESC_SECT_LEN);
    735
    736	*wqe_size = sizeof(*ctrl) + sizeof(sq_wqe->task);
    737	*wqe_size += SECT_SIZE_FROM_8BYTES(buf_sect_len);
    738	*wqe_size = ALIGN(*wqe_size, sq->wq->wqebb_size);
    739
    740	return &hw_wqe->sq_wqe;
    741}
    742
    743/**
    744 * hinic_sq_read_wqe - read wqe ptr in the current ci and update the ci
    745 * @sq: send queue
    746 * @skb: return skb that was saved
    747 * @wqe_size: the size of the wqe
    748 * @cons_idx: consumer index of the wqe
    749 *
    750 * Return wqe in ci position
    751 **/
    752struct hinic_sq_wqe *hinic_sq_read_wqe(struct hinic_sq *sq,
    753				       struct sk_buff **skb,
    754				       unsigned int wqe_size, u16 *cons_idx)
    755{
    756	struct hinic_hw_wqe *hw_wqe;
    757
    758	hw_wqe = hinic_read_wqe(sq->wq, wqe_size, cons_idx);
    759	*skb = sq->saved_skb[*cons_idx];
    760
    761	return &hw_wqe->sq_wqe;
    762}
    763
    764/**
    765 * hinic_sq_put_wqe - release the ci for new wqes
    766 * @sq: send queue
    767 * @wqe_size: the size of the wqe
    768 **/
    769void hinic_sq_put_wqe(struct hinic_sq *sq, unsigned int wqe_size)
    770{
    771	hinic_put_wqe(sq->wq, wqe_size);
    772}
    773
    774/**
    775 * hinic_sq_get_sges - get sges from the wqe
    776 * @sq_wqe: wqe to get the sges from its buffer addresses
    777 * @sges: returned sges
    778 * @nr_sges: number sges to return
    779 **/
    780void hinic_sq_get_sges(struct hinic_sq_wqe *sq_wqe, struct hinic_sge *sges,
    781		       int nr_sges)
    782{
    783	int i;
    784
    785	for (i = 0; i < nr_sges && i < HINIC_MAX_SQ_BUFDESCS; i++) {
    786		sges[i] = sq_wqe->buf_descs[i].sge;
    787		hinic_be32_to_cpu(&sges[i], sizeof(sges[i]));
    788	}
    789}
    790
    791/**
    792 * hinic_rq_get_wqe - get wqe ptr in the current pi and update the pi
    793 * @rq: rq to get wqe from
    794 * @wqe_size: wqe size
    795 * @prod_idx: returned pi
    796 *
    797 * Return wqe pointer
    798 **/
    799struct hinic_rq_wqe *hinic_rq_get_wqe(struct hinic_rq *rq,
    800				      unsigned int wqe_size, u16 *prod_idx)
    801{
    802	struct hinic_hw_wqe *hw_wqe = hinic_get_wqe(rq->wq, wqe_size,
    803						    prod_idx);
    804
    805	if (IS_ERR(hw_wqe))
    806		return NULL;
    807
    808	return &hw_wqe->rq_wqe;
    809}
    810
    811/**
    812 * hinic_rq_write_wqe - write the wqe to the rq
    813 * @rq: recv queue
    814 * @prod_idx: pi of the wqe
    815 * @rq_wqe: the wqe to write
    816 * @skb: skb to save
    817 **/
    818void hinic_rq_write_wqe(struct hinic_rq *rq, u16 prod_idx,
    819			struct hinic_rq_wqe *rq_wqe, struct sk_buff *skb)
    820{
    821	struct hinic_hw_wqe *hw_wqe = (struct hinic_hw_wqe *)rq_wqe;
    822
    823	rq->saved_skb[prod_idx] = skb;
    824
    825	/* The data in the HW should be in Big Endian Format */
    826	hinic_cpu_to_be32(rq_wqe, sizeof(*rq_wqe));
    827
    828	hinic_write_wqe(rq->wq, hw_wqe, sizeof(*rq_wqe));
    829}
    830
    831/**
    832 * hinic_rq_read_wqe - read wqe ptr in the current ci and update the ci
    833 * @rq: recv queue
    834 * @wqe_size: the size of the wqe
    835 * @skb: return saved skb
    836 * @cons_idx: consumer index of the wqe
    837 *
    838 * Return wqe in ci position
    839 **/
    840struct hinic_rq_wqe *hinic_rq_read_wqe(struct hinic_rq *rq,
    841				       unsigned int wqe_size,
    842				       struct sk_buff **skb, u16 *cons_idx)
    843{
    844	struct hinic_hw_wqe *hw_wqe;
    845	struct hinic_rq_cqe *cqe;
    846	int rx_done;
    847	u32 status;
    848
    849	hw_wqe = hinic_read_wqe(rq->wq, wqe_size, cons_idx);
    850	if (IS_ERR(hw_wqe))
    851		return NULL;
    852
    853	cqe = rq->cqe[*cons_idx];
    854
    855	status = be32_to_cpu(cqe->status);
    856
    857	rx_done = HINIC_RQ_CQE_STATUS_GET(status, RXDONE);
    858	if (!rx_done)
    859		return NULL;
    860
    861	*skb = rq->saved_skb[*cons_idx];
    862
    863	return &hw_wqe->rq_wqe;
    864}
    865
    866/**
    867 * hinic_rq_read_next_wqe - increment ci and read the wqe in ci position
    868 * @rq: recv queue
    869 * @wqe_size: the size of the wqe
    870 * @skb: return saved skb
    871 * @cons_idx: consumer index in the wq
    872 *
    873 * Return wqe in incremented ci position
    874 **/
    875struct hinic_rq_wqe *hinic_rq_read_next_wqe(struct hinic_rq *rq,
    876					    unsigned int wqe_size,
    877					    struct sk_buff **skb,
    878					    u16 *cons_idx)
    879{
    880	struct hinic_wq *wq = rq->wq;
    881	struct hinic_hw_wqe *hw_wqe;
    882	unsigned int num_wqebbs;
    883
    884	wqe_size = ALIGN(wqe_size, wq->wqebb_size);
    885	num_wqebbs = wqe_size / wq->wqebb_size;
    886
    887	*cons_idx = RQ_MASKED_IDX(rq, *cons_idx + num_wqebbs);
    888
    889	*skb = rq->saved_skb[*cons_idx];
    890
    891	hw_wqe = hinic_read_wqe_direct(wq, *cons_idx);
    892
    893	return &hw_wqe->rq_wqe;
    894}
    895
    896/**
    897 * hinic_rq_put_wqe - release the ci for new wqes
    898 * @rq: recv queue
    899 * @cons_idx: consumer index of the wqe
    900 * @wqe_size: the size of the wqe
    901 **/
    902void hinic_rq_put_wqe(struct hinic_rq *rq, u16 cons_idx,
    903		      unsigned int wqe_size)
    904{
    905	struct hinic_rq_cqe *cqe = rq->cqe[cons_idx];
    906	u32 status = be32_to_cpu(cqe->status);
    907
    908	status = HINIC_RQ_CQE_STATUS_CLEAR(status, RXDONE);
    909
    910	/* Rx WQE size is 1 WQEBB, no wq shadow*/
    911	cqe->status = cpu_to_be32(status);
    912
    913	wmb();          /* clear done flag */
    914
    915	hinic_put_wqe(rq->wq, wqe_size);
    916}
    917
    918/**
    919 * hinic_rq_get_sge - get sge from the wqe
    920 * @rq: recv queue
    921 * @rq_wqe: wqe to get the sge from its buf address
    922 * @cons_idx: consumer index
    923 * @sge: returned sge
    924 **/
    925void hinic_rq_get_sge(struct hinic_rq *rq, struct hinic_rq_wqe *rq_wqe,
    926		      u16 cons_idx, struct hinic_sge *sge)
    927{
    928	struct hinic_rq_cqe *cqe = rq->cqe[cons_idx];
    929	u32 len = be32_to_cpu(cqe->len);
    930
    931	sge->hi_addr = be32_to_cpu(rq_wqe->buf_desc.hi_addr);
    932	sge->lo_addr = be32_to_cpu(rq_wqe->buf_desc.lo_addr);
    933	sge->len = HINIC_RQ_CQE_SGE_GET(len, LEN);
    934}
    935
    936/**
    937 * hinic_rq_prepare_wqe - prepare wqe before insert to the queue
    938 * @rq: recv queue
    939 * @prod_idx: pi value
    940 * @rq_wqe: the wqe
    941 * @sge: sge for use by the wqe for recv buf address
    942 **/
    943void hinic_rq_prepare_wqe(struct hinic_rq *rq, u16 prod_idx,
    944			  struct hinic_rq_wqe *rq_wqe, struct hinic_sge *sge)
    945{
    946	struct hinic_rq_cqe_sect *cqe_sect = &rq_wqe->cqe_sect;
    947	struct hinic_rq_bufdesc *buf_desc = &rq_wqe->buf_desc;
    948	struct hinic_rq_cqe *cqe = rq->cqe[prod_idx];
    949	struct hinic_rq_ctrl *ctrl = &rq_wqe->ctrl;
    950	dma_addr_t cqe_dma = rq->cqe_dma[prod_idx];
    951
    952	ctrl->ctrl_info =
    953		HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*ctrl)), LEN) |
    954		HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*cqe_sect)),
    955				  COMPLETE_LEN)                    |
    956		HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*buf_desc)),
    957				  BUFDESC_SECT_LEN)                |
    958		HINIC_RQ_CTRL_SET(RQ_COMPLETE_SGE, COMPLETE_FORMAT);
    959
    960	hinic_set_sge(&cqe_sect->sge, cqe_dma, sizeof(*cqe));
    961
    962	buf_desc->hi_addr = sge->hi_addr;
    963	buf_desc->lo_addr = sge->lo_addr;
    964}
    965
    966/**
    967 * hinic_rq_update - update pi of the rq
    968 * @rq: recv queue
    969 * @prod_idx: pi value
    970 **/
    971void hinic_rq_update(struct hinic_rq *rq, u16 prod_idx)
    972{
    973	*rq->pi_virt_addr = cpu_to_be16(RQ_MASKED_IDX(rq, prod_idx + 1));
    974}