cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

pvrdma_cq.c (11243B)


      1/*
      2 * Copyright (c) 2012-2016 VMware, Inc.  All rights reserved.
      3 *
      4 * This program is free software; you can redistribute it and/or
      5 * modify it under the terms of EITHER the GNU General Public License
      6 * version 2 as published by the Free Software Foundation or the BSD
      7 * 2-Clause License. This program is distributed in the hope that it
      8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
      9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
     10 * See the GNU General Public License version 2 for more details at
     11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
     12 *
     13 * You should have received a copy of the GNU General Public License
     14 * along with this program available in the file COPYING in the main
     15 * directory of this source tree.
     16 *
     17 * The BSD 2-Clause License
     18 *
     19 *     Redistribution and use in source and binary forms, with or
     20 *     without modification, are permitted provided that the following
     21 *     conditions are met:
     22 *
     23 *      - Redistributions of source code must retain the above
     24 *        copyright notice, this list of conditions and the following
     25 *        disclaimer.
     26 *
     27 *      - Redistributions in binary form must reproduce the above
     28 *        copyright notice, this list of conditions and the following
     29 *        disclaimer in the documentation and/or other materials
     30 *        provided with the distribution.
     31 *
     32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
     37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
     43 * OF THE POSSIBILITY OF SUCH DAMAGE.
     44 */
     45
     46#include <asm/page.h>
     47#include <linux/io.h>
     48#include <linux/wait.h>
     49#include <rdma/ib_addr.h>
     50#include <rdma/ib_smi.h>
     51#include <rdma/ib_user_verbs.h>
     52#include <rdma/uverbs_ioctl.h>
     53
     54#include "pvrdma.h"
     55
     56/**
     57 * pvrdma_req_notify_cq - request notification for a completion queue
     58 * @ibcq: the completion queue
     59 * @notify_flags: notification flags
     60 *
     61 * @return: 0 for success.
     62 */
     63int pvrdma_req_notify_cq(struct ib_cq *ibcq,
     64			 enum ib_cq_notify_flags notify_flags)
     65{
     66	struct pvrdma_dev *dev = to_vdev(ibcq->device);
     67	struct pvrdma_cq *cq = to_vcq(ibcq);
     68	u32 val = cq->cq_handle;
     69	unsigned long flags;
     70	int has_data = 0;
     71
     72	val |= (notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
     73		PVRDMA_UAR_CQ_ARM_SOL : PVRDMA_UAR_CQ_ARM;
     74
     75	spin_lock_irqsave(&cq->cq_lock, flags);
     76
     77	pvrdma_write_uar_cq(dev, val);
     78
     79	if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
     80		unsigned int head;
     81
     82		has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
     83						    cq->ibcq.cqe, &head);
     84		if (unlikely(has_data == PVRDMA_INVALID_IDX))
     85			dev_err(&dev->pdev->dev, "CQ ring state invalid\n");
     86	}
     87
     88	spin_unlock_irqrestore(&cq->cq_lock, flags);
     89
     90	return has_data;
     91}
     92
     93/**
     94 * pvrdma_create_cq - create completion queue
     95 * @ibcq: Allocated CQ
     96 * @attr: completion queue attributes
     97 * @udata: user data
     98 *
     99 * @return: 0 on success
    100 */
    101int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
    102		     struct ib_udata *udata)
    103{
    104	struct ib_device *ibdev = ibcq->device;
    105	int entries = attr->cqe;
    106	struct pvrdma_dev *dev = to_vdev(ibdev);
    107	struct pvrdma_cq *cq = to_vcq(ibcq);
    108	int ret;
    109	int npages;
    110	unsigned long flags;
    111	union pvrdma_cmd_req req;
    112	union pvrdma_cmd_resp rsp;
    113	struct pvrdma_cmd_create_cq *cmd = &req.create_cq;
    114	struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp;
    115	struct pvrdma_create_cq_resp cq_resp = {};
    116	struct pvrdma_create_cq ucmd;
    117	struct pvrdma_ucontext *context = rdma_udata_to_drv_context(
    118		udata, struct pvrdma_ucontext, ibucontext);
    119
    120	BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64);
    121
    122	if (attr->flags)
    123		return -EOPNOTSUPP;
    124
    125	entries = roundup_pow_of_two(entries);
    126	if (entries < 1 || entries > dev->dsr->caps.max_cqe)
    127		return -EINVAL;
    128
    129	if (!atomic_add_unless(&dev->num_cqs, 1, dev->dsr->caps.max_cq))
    130		return -ENOMEM;
    131
    132	cq->ibcq.cqe = entries;
    133	cq->is_kernel = !udata;
    134
    135	if (!cq->is_kernel) {
    136		if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
    137			ret = -EFAULT;
    138			goto err_cq;
    139		}
    140
    141		cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, ucmd.buf_size,
    142				       IB_ACCESS_LOCAL_WRITE);
    143		if (IS_ERR(cq->umem)) {
    144			ret = PTR_ERR(cq->umem);
    145			goto err_cq;
    146		}
    147
    148		npages = ib_umem_num_dma_blocks(cq->umem, PAGE_SIZE);
    149	} else {
    150		/* One extra page for shared ring state */
    151		npages = 1 + (entries * sizeof(struct pvrdma_cqe) +
    152			      PAGE_SIZE - 1) / PAGE_SIZE;
    153
    154		/* Skip header page. */
    155		cq->offset = PAGE_SIZE;
    156	}
    157
    158	if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
    159		dev_warn(&dev->pdev->dev,
    160			 "overflow pages in completion queue\n");
    161		ret = -EINVAL;
    162		goto err_umem;
    163	}
    164
    165	ret = pvrdma_page_dir_init(dev, &cq->pdir, npages, cq->is_kernel);
    166	if (ret) {
    167		dev_warn(&dev->pdev->dev,
    168			 "could not allocate page directory\n");
    169		goto err_umem;
    170	}
    171
    172	/* Ring state is always the first page. Set in library for user cq. */
    173	if (cq->is_kernel)
    174		cq->ring_state = cq->pdir.pages[0];
    175	else
    176		pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
    177
    178	refcount_set(&cq->refcnt, 1);
    179	init_completion(&cq->free);
    180	spin_lock_init(&cq->cq_lock);
    181
    182	memset(cmd, 0, sizeof(*cmd));
    183	cmd->hdr.cmd = PVRDMA_CMD_CREATE_CQ;
    184	cmd->nchunks = npages;
    185	cmd->ctx_handle = context ? context->ctx_handle : 0;
    186	cmd->cqe = entries;
    187	cmd->pdir_dma = cq->pdir.dir_dma;
    188	ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_CQ_RESP);
    189	if (ret < 0) {
    190		dev_warn(&dev->pdev->dev,
    191			 "could not create completion queue, error: %d\n", ret);
    192		goto err_page_dir;
    193	}
    194
    195	cq->ibcq.cqe = resp->cqe;
    196	cq->cq_handle = resp->cq_handle;
    197	cq_resp.cqn = resp->cq_handle;
    198	spin_lock_irqsave(&dev->cq_tbl_lock, flags);
    199	dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq;
    200	spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
    201
    202	if (!cq->is_kernel) {
    203		cq->uar = &context->uar;
    204
    205		/* Copy udata back. */
    206		if (ib_copy_to_udata(udata, &cq_resp, sizeof(cq_resp))) {
    207			dev_warn(&dev->pdev->dev,
    208				 "failed to copy back udata\n");
    209			pvrdma_destroy_cq(&cq->ibcq, udata);
    210			return -EINVAL;
    211		}
    212	}
    213
    214	return 0;
    215
    216err_page_dir:
    217	pvrdma_page_dir_cleanup(dev, &cq->pdir);
    218err_umem:
    219	ib_umem_release(cq->umem);
    220err_cq:
    221	atomic_dec(&dev->num_cqs);
    222	return ret;
    223}
    224
    225static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
    226{
    227	if (refcount_dec_and_test(&cq->refcnt))
    228		complete(&cq->free);
    229	wait_for_completion(&cq->free);
    230
    231	ib_umem_release(cq->umem);
    232
    233	pvrdma_page_dir_cleanup(dev, &cq->pdir);
    234}
    235
    236/**
    237 * pvrdma_destroy_cq - destroy completion queue
    238 * @cq: the completion queue to destroy.
    239 * @udata: user data or null for kernel object
    240 */
    241int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
    242{
    243	struct pvrdma_cq *vcq = to_vcq(cq);
    244	union pvrdma_cmd_req req;
    245	struct pvrdma_cmd_destroy_cq *cmd = &req.destroy_cq;
    246	struct pvrdma_dev *dev = to_vdev(cq->device);
    247	unsigned long flags;
    248	int ret;
    249
    250	memset(cmd, 0, sizeof(*cmd));
    251	cmd->hdr.cmd = PVRDMA_CMD_DESTROY_CQ;
    252	cmd->cq_handle = vcq->cq_handle;
    253
    254	ret = pvrdma_cmd_post(dev, &req, NULL, 0);
    255	if (ret < 0)
    256		dev_warn(&dev->pdev->dev,
    257			 "could not destroy completion queue, error: %d\n",
    258			 ret);
    259
    260	/* free cq's resources */
    261	spin_lock_irqsave(&dev->cq_tbl_lock, flags);
    262	dev->cq_tbl[vcq->cq_handle] = NULL;
    263	spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
    264
    265	pvrdma_free_cq(dev, vcq);
    266	atomic_dec(&dev->num_cqs);
    267	return 0;
    268}
    269
    270static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i)
    271{
    272	return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr(
    273					&cq->pdir,
    274					cq->offset +
    275					sizeof(struct pvrdma_cqe) * i);
    276}
    277
    278void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq)
    279{
    280	unsigned int head;
    281	int has_data;
    282
    283	if (!cq->is_kernel)
    284		return;
    285
    286	/* Lock held */
    287	has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
    288					    cq->ibcq.cqe, &head);
    289	if (unlikely(has_data > 0)) {
    290		int items;
    291		int curr;
    292		int tail = pvrdma_idx(&cq->ring_state->rx.prod_tail,
    293				      cq->ibcq.cqe);
    294		struct pvrdma_cqe *cqe;
    295		struct pvrdma_cqe *curr_cqe;
    296
    297		items = (tail > head) ? (tail - head) :
    298			(cq->ibcq.cqe - head + tail);
    299		curr = --tail;
    300		while (items-- > 0) {
    301			if (curr < 0)
    302				curr = cq->ibcq.cqe - 1;
    303			if (tail < 0)
    304				tail = cq->ibcq.cqe - 1;
    305			curr_cqe = get_cqe(cq, curr);
    306			if ((curr_cqe->qp & 0xFFFF) != qp->qp_handle) {
    307				if (curr != tail) {
    308					cqe = get_cqe(cq, tail);
    309					*cqe = *curr_cqe;
    310				}
    311				tail--;
    312			} else {
    313				pvrdma_idx_ring_inc(
    314					&cq->ring_state->rx.cons_head,
    315					cq->ibcq.cqe);
    316			}
    317			curr--;
    318		}
    319	}
    320}
    321
    322static int pvrdma_poll_one(struct pvrdma_cq *cq, struct pvrdma_qp **cur_qp,
    323			   struct ib_wc *wc)
    324{
    325	struct pvrdma_dev *dev = to_vdev(cq->ibcq.device);
    326	int has_data;
    327	unsigned int head;
    328	bool tried = false;
    329	struct pvrdma_cqe *cqe;
    330
    331retry:
    332	has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
    333					    cq->ibcq.cqe, &head);
    334	if (has_data == 0) {
    335		if (tried)
    336			return -EAGAIN;
    337
    338		pvrdma_write_uar_cq(dev, cq->cq_handle | PVRDMA_UAR_CQ_POLL);
    339
    340		tried = true;
    341		goto retry;
    342	} else if (has_data == PVRDMA_INVALID_IDX) {
    343		dev_err(&dev->pdev->dev, "CQ ring state invalid\n");
    344		return -EAGAIN;
    345	}
    346
    347	cqe = get_cqe(cq, head);
    348
    349	/* Ensure cqe is valid. */
    350	rmb();
    351	if (dev->qp_tbl[cqe->qp & 0xffff])
    352		*cur_qp = (struct pvrdma_qp *)dev->qp_tbl[cqe->qp & 0xffff];
    353	else
    354		return -EAGAIN;
    355
    356	wc->opcode = pvrdma_wc_opcode_to_ib(cqe->opcode);
    357	wc->status = pvrdma_wc_status_to_ib(cqe->status);
    358	wc->wr_id = cqe->wr_id;
    359	wc->qp = &(*cur_qp)->ibqp;
    360	wc->byte_len = cqe->byte_len;
    361	wc->ex.imm_data = cqe->imm_data;
    362	wc->src_qp = cqe->src_qp;
    363	wc->wc_flags = pvrdma_wc_flags_to_ib(cqe->wc_flags);
    364	wc->pkey_index = cqe->pkey_index;
    365	wc->slid = cqe->slid;
    366	wc->sl = cqe->sl;
    367	wc->dlid_path_bits = cqe->dlid_path_bits;
    368	wc->port_num = cqe->port_num;
    369	wc->vendor_err = cqe->vendor_err;
    370	wc->network_hdr_type = pvrdma_network_type_to_ib(cqe->network_hdr_type);
    371
    372	/* Update shared ring state */
    373	pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe);
    374
    375	return 0;
    376}
    377
    378/**
    379 * pvrdma_poll_cq - poll for work completion queue entries
    380 * @ibcq: completion queue
    381 * @num_entries: the maximum number of entries
    382 * @wc: pointer to work completion array
    383 *
    384 * @return: number of polled completion entries
    385 */
    386int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
    387{
    388	struct pvrdma_cq *cq = to_vcq(ibcq);
    389	struct pvrdma_qp *cur_qp = NULL;
    390	unsigned long flags;
    391	int npolled;
    392
    393	if (num_entries < 1 || wc == NULL)
    394		return 0;
    395
    396	spin_lock_irqsave(&cq->cq_lock, flags);
    397	for (npolled = 0; npolled < num_entries; ++npolled) {
    398		if (pvrdma_poll_one(cq, &cur_qp, wc + npolled))
    399			break;
    400	}
    401
    402	spin_unlock_irqrestore(&cq->cq_lock, flags);
    403
    404	/* Ensure we do not return errors from poll_cq */
    405	return npolled;
    406}