cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

mthca_eq.c (24882B)


      1/*
      2 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
      3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
      4 *
      5 * This software is available to you under a choice of one of two
      6 * licenses.  You may choose to be licensed under the terms of the GNU
      7 * General Public License (GPL) Version 2, available from the file
      8 * COPYING in the main directory of this source tree, or the
      9 * OpenIB.org BSD license below:
     10 *
     11 *     Redistribution and use in source and binary forms, with or
     12 *     without modification, are permitted provided that the following
     13 *     conditions are met:
     14 *
     15 *      - Redistributions of source code must retain the above
     16 *        copyright notice, this list of conditions and the following
     17 *        disclaimer.
     18 *
     19 *      - Redistributions in binary form must reproduce the above
     20 *        copyright notice, this list of conditions and the following
     21 *        disclaimer in the documentation and/or other materials
     22 *        provided with the distribution.
     23 *
     24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     31 * SOFTWARE.
     32 */
     33
     34#include <linux/errno.h>
     35#include <linux/interrupt.h>
     36#include <linux/pci.h>
     37#include <linux/slab.h>
     38
     39#include "mthca_dev.h"
     40#include "mthca_cmd.h"
     41#include "mthca_config_reg.h"
     42
     43enum {
     44	MTHCA_NUM_ASYNC_EQE = 0x80,
     45	MTHCA_NUM_CMD_EQE   = 0x80,
     46	MTHCA_NUM_SPARE_EQE = 0x80,
     47	MTHCA_EQ_ENTRY_SIZE = 0x20
     48};
     49
     50/*
     51 * Must be packed because start is 64 bits but only aligned to 32 bits.
     52 */
     53struct mthca_eq_context {
     54	__be32 flags;
     55	__be64 start;
     56	__be32 logsize_usrpage;
     57	__be32 tavor_pd;	/* reserved for Arbel */
     58	u8     reserved1[3];
     59	u8     intr;
     60	__be32 arbel_pd;	/* lost_count for Tavor */
     61	__be32 lkey;
     62	u32    reserved2[2];
     63	__be32 consumer_index;
     64	__be32 producer_index;
     65	u32    reserved3[4];
     66} __packed;
     67
     68#define MTHCA_EQ_STATUS_OK          ( 0 << 28)
     69#define MTHCA_EQ_STATUS_OVERFLOW    ( 9 << 28)
     70#define MTHCA_EQ_STATUS_WRITE_FAIL  (10 << 28)
     71#define MTHCA_EQ_OWNER_SW           ( 0 << 24)
     72#define MTHCA_EQ_OWNER_HW           ( 1 << 24)
     73#define MTHCA_EQ_FLAG_TR            ( 1 << 18)
     74#define MTHCA_EQ_FLAG_OI            ( 1 << 17)
     75#define MTHCA_EQ_STATE_ARMED        ( 1 <<  8)
     76#define MTHCA_EQ_STATE_FIRED        ( 2 <<  8)
     77#define MTHCA_EQ_STATE_ALWAYS_ARMED ( 3 <<  8)
     78#define MTHCA_EQ_STATE_ARBEL        ( 8 <<  8)
     79
     80enum {
     81	MTHCA_EVENT_TYPE_COMP       	    = 0x00,
     82	MTHCA_EVENT_TYPE_PATH_MIG   	    = 0x01,
     83	MTHCA_EVENT_TYPE_COMM_EST   	    = 0x02,
     84	MTHCA_EVENT_TYPE_SQ_DRAINED 	    = 0x03,
     85	MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE    = 0x13,
     86	MTHCA_EVENT_TYPE_SRQ_LIMIT	    = 0x14,
     87	MTHCA_EVENT_TYPE_CQ_ERROR   	    = 0x04,
     88	MTHCA_EVENT_TYPE_WQ_CATAS_ERROR     = 0x05,
     89	MTHCA_EVENT_TYPE_EEC_CATAS_ERROR    = 0x06,
     90	MTHCA_EVENT_TYPE_PATH_MIG_FAILED    = 0x07,
     91	MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10,
     92	MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR    = 0x11,
     93	MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR    = 0x12,
     94	MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR  = 0x08,
     95	MTHCA_EVENT_TYPE_PORT_CHANGE        = 0x09,
     96	MTHCA_EVENT_TYPE_EQ_OVERFLOW        = 0x0f,
     97	MTHCA_EVENT_TYPE_ECC_DETECT         = 0x0e,
     98	MTHCA_EVENT_TYPE_CMD                = 0x0a
     99};
    100
    101#define MTHCA_ASYNC_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_PATH_MIG)           | \
    102				(1ULL << MTHCA_EVENT_TYPE_COMM_EST)           | \
    103				(1ULL << MTHCA_EVENT_TYPE_SQ_DRAINED)         | \
    104				(1ULL << MTHCA_EVENT_TYPE_CQ_ERROR)           | \
    105				(1ULL << MTHCA_EVENT_TYPE_WQ_CATAS_ERROR)     | \
    106				(1ULL << MTHCA_EVENT_TYPE_EEC_CATAS_ERROR)    | \
    107				(1ULL << MTHCA_EVENT_TYPE_PATH_MIG_FAILED)    | \
    108				(1ULL << MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
    109				(1ULL << MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
    110				(1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR)  | \
    111				(1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE)        | \
    112				(1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
    113#define MTHCA_SRQ_EVENT_MASK   ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
    114				(1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE)    | \
    115				(1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT))
    116#define MTHCA_CMD_EVENT_MASK    (1ULL << MTHCA_EVENT_TYPE_CMD)
    117
    118#define MTHCA_EQ_DB_INC_CI     (1 << 24)
    119#define MTHCA_EQ_DB_REQ_NOT    (2 << 24)
    120#define MTHCA_EQ_DB_DISARM_CQ  (3 << 24)
    121#define MTHCA_EQ_DB_SET_CI     (4 << 24)
    122#define MTHCA_EQ_DB_ALWAYS_ARM (5 << 24)
    123
    124struct mthca_eqe {
    125	u8 reserved1;
    126	u8 type;
    127	u8 reserved2;
    128	u8 subtype;
    129	union {
    130		u32 raw[6];
    131		struct {
    132			__be32 cqn;
    133		} __packed comp;
    134		struct {
    135			u16    reserved1;
    136			__be16 token;
    137			u32    reserved2;
    138			u8     reserved3[3];
    139			u8     status;
    140			__be64 out_param;
    141		} __packed cmd;
    142		struct {
    143			__be32 qpn;
    144		} __packed qp;
    145		struct {
    146			__be32 srqn;
    147		} __packed srq;
    148		struct {
    149			__be32 cqn;
    150			u32    reserved1;
    151			u8     reserved2[3];
    152			u8     syndrome;
    153		} __packed cq_err;
    154		struct {
    155			u32    reserved1[2];
    156			__be32 port;
    157		} __packed port_change;
    158	} event;
    159	u8 reserved3[3];
    160	u8 owner;
    161} __packed;
    162
    163#define  MTHCA_EQ_ENTRY_OWNER_SW      (0 << 7)
    164#define  MTHCA_EQ_ENTRY_OWNER_HW      (1 << 7)
    165
    166static inline u64 async_mask(struct mthca_dev *dev)
    167{
    168	return dev->mthca_flags & MTHCA_FLAG_SRQ ?
    169		MTHCA_ASYNC_EVENT_MASK | MTHCA_SRQ_EVENT_MASK :
    170		MTHCA_ASYNC_EVENT_MASK;
    171}
    172
    173static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
    174{
    175	/*
    176	 * This barrier makes sure that all updates to ownership bits
    177	 * done by set_eqe_hw() hit memory before the consumer index
    178	 * is updated.  set_eq_ci() allows the HCA to possibly write
    179	 * more EQ entries, and we want to avoid the exceedingly
    180	 * unlikely possibility of the HCA writing an entry and then
    181	 * having set_eqe_hw() overwrite the owner field.
    182	 */
    183	wmb();
    184	mthca_write64(MTHCA_EQ_DB_SET_CI | eq->eqn, ci & (eq->nent - 1),
    185		      dev->kar + MTHCA_EQ_DOORBELL,
    186		      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
    187}
    188
    189static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
    190{
    191	/* See comment in tavor_set_eq_ci() above. */
    192	wmb();
    193	__raw_writel((__force u32) cpu_to_be32(ci),
    194		     dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8);
    195	/* We still want ordering, just not swabbing, so add a barrier */
    196	mb();
    197}
    198
    199static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
    200{
    201	if (mthca_is_memfree(dev))
    202		arbel_set_eq_ci(dev, eq, ci);
    203	else
    204		tavor_set_eq_ci(dev, eq, ci);
    205}
    206
    207static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn)
    208{
    209	mthca_write64(MTHCA_EQ_DB_REQ_NOT | eqn, 0,
    210		      dev->kar + MTHCA_EQ_DOORBELL,
    211		      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
    212}
    213
    214static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask)
    215{
    216	writel(eqn_mask, dev->eq_regs.arbel.eq_arm);
    217}
    218
    219static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
    220{
    221	if (!mthca_is_memfree(dev)) {
    222		mthca_write64(MTHCA_EQ_DB_DISARM_CQ | eqn, cqn,
    223			      dev->kar + MTHCA_EQ_DOORBELL,
    224			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
    225	}
    226}
    227
    228static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry)
    229{
    230	unsigned long off = (entry & (eq->nent - 1)) * MTHCA_EQ_ENTRY_SIZE;
    231	return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
    232}
    233
    234static inline struct mthca_eqe *next_eqe_sw(struct mthca_eq *eq)
    235{
    236	struct mthca_eqe *eqe;
    237	eqe = get_eqe(eq, eq->cons_index);
    238	return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe;
    239}
    240
    241static inline void set_eqe_hw(struct mthca_eqe *eqe)
    242{
    243	eqe->owner =  MTHCA_EQ_ENTRY_OWNER_HW;
    244}
    245
    246static void port_change(struct mthca_dev *dev, int port, int active)
    247{
    248	struct ib_event record;
    249
    250	mthca_dbg(dev, "Port change to %s for port %d\n",
    251		  active ? "active" : "down", port);
    252
    253	record.device = &dev->ib_dev;
    254	record.event  = active ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
    255	record.element.port_num = port;
    256
    257	ib_dispatch_event(&record);
    258}
    259
    260static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
    261{
    262	struct mthca_eqe *eqe;
    263	int disarm_cqn;
    264	int eqes_found = 0;
    265	int set_ci = 0;
    266
    267	while ((eqe = next_eqe_sw(eq))) {
    268		/*
    269		 * Make sure we read EQ entry contents after we've
    270		 * checked the ownership bit.
    271		 */
    272		rmb();
    273
    274		switch (eqe->type) {
    275		case MTHCA_EVENT_TYPE_COMP:
    276			disarm_cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
    277			disarm_cq(dev, eq->eqn, disarm_cqn);
    278			mthca_cq_completion(dev, disarm_cqn);
    279			break;
    280
    281		case MTHCA_EVENT_TYPE_PATH_MIG:
    282			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
    283				       IB_EVENT_PATH_MIG);
    284			break;
    285
    286		case MTHCA_EVENT_TYPE_COMM_EST:
    287			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
    288				       IB_EVENT_COMM_EST);
    289			break;
    290
    291		case MTHCA_EVENT_TYPE_SQ_DRAINED:
    292			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
    293				       IB_EVENT_SQ_DRAINED);
    294			break;
    295
    296		case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE:
    297			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
    298				       IB_EVENT_QP_LAST_WQE_REACHED);
    299			break;
    300
    301		case MTHCA_EVENT_TYPE_SRQ_LIMIT:
    302			mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff,
    303					IB_EVENT_SRQ_LIMIT_REACHED);
    304			break;
    305
    306		case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR:
    307			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
    308				       IB_EVENT_QP_FATAL);
    309			break;
    310
    311		case MTHCA_EVENT_TYPE_PATH_MIG_FAILED:
    312			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
    313				       IB_EVENT_PATH_MIG_ERR);
    314			break;
    315
    316		case MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
    317			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
    318				       IB_EVENT_QP_REQ_ERR);
    319			break;
    320
    321		case MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR:
    322			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
    323				       IB_EVENT_QP_ACCESS_ERR);
    324			break;
    325
    326		case MTHCA_EVENT_TYPE_CMD:
    327			mthca_cmd_event(dev,
    328					be16_to_cpu(eqe->event.cmd.token),
    329					eqe->event.cmd.status,
    330					be64_to_cpu(eqe->event.cmd.out_param));
    331			break;
    332
    333		case MTHCA_EVENT_TYPE_PORT_CHANGE:
    334			port_change(dev,
    335				    (be32_to_cpu(eqe->event.port_change.port) >> 28) & 3,
    336				    eqe->subtype == 0x4);
    337			break;
    338
    339		case MTHCA_EVENT_TYPE_CQ_ERROR:
    340			mthca_warn(dev, "CQ %s on CQN %06x\n",
    341				   eqe->event.cq_err.syndrome == 1 ?
    342				   "overrun" : "access violation",
    343				   be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
    344			mthca_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn),
    345				       IB_EVENT_CQ_ERR);
    346			break;
    347
    348		case MTHCA_EVENT_TYPE_EQ_OVERFLOW:
    349			mthca_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
    350			break;
    351
    352		case MTHCA_EVENT_TYPE_EEC_CATAS_ERROR:
    353		case MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR:
    354		case MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR:
    355		case MTHCA_EVENT_TYPE_ECC_DETECT:
    356		default:
    357			mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n",
    358				   eqe->type, eqe->subtype, eq->eqn);
    359			break;
    360		}
    361
    362		set_eqe_hw(eqe);
    363		++eq->cons_index;
    364		eqes_found = 1;
    365		++set_ci;
    366
    367		/*
    368		 * The HCA will think the queue has overflowed if we
    369		 * don't tell it we've been processing events.  We
    370		 * create our EQs with MTHCA_NUM_SPARE_EQE extra
    371		 * entries, so we must update our consumer index at
    372		 * least that often.
    373		 */
    374		if (unlikely(set_ci >= MTHCA_NUM_SPARE_EQE)) {
    375			/*
    376			 * Conditional on hca_type is OK here because
    377			 * this is a rare case, not the fast path.
    378			 */
    379			set_eq_ci(dev, eq, eq->cons_index);
    380			set_ci = 0;
    381		}
    382	}
    383
    384	/*
    385	 * Rely on caller to set consumer index so that we don't have
    386	 * to test hca_type in our interrupt handling fast path.
    387	 */
    388	return eqes_found;
    389}
    390
    391static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr)
    392{
    393	struct mthca_dev *dev = dev_ptr;
    394	u32 ecr;
    395	int i;
    396
    397	if (dev->eq_table.clr_mask)
    398		writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
    399
    400	ecr = readl(dev->eq_regs.tavor.ecr_base + 4);
    401	if (!ecr)
    402		return IRQ_NONE;
    403
    404	writel(ecr, dev->eq_regs.tavor.ecr_base +
    405	       MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
    406
    407	for (i = 0; i < MTHCA_NUM_EQ; ++i)
    408		if (ecr & dev->eq_table.eq[i].eqn_mask) {
    409			if (mthca_eq_int(dev, &dev->eq_table.eq[i]))
    410				tavor_set_eq_ci(dev, &dev->eq_table.eq[i],
    411						dev->eq_table.eq[i].cons_index);
    412			tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
    413		}
    414
    415	return IRQ_HANDLED;
    416}
    417
    418static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr)
    419{
    420	struct mthca_eq  *eq  = eq_ptr;
    421	struct mthca_dev *dev = eq->dev;
    422
    423	mthca_eq_int(dev, eq);
    424	tavor_set_eq_ci(dev, eq, eq->cons_index);
    425	tavor_eq_req_not(dev, eq->eqn);
    426
    427	/* MSI-X vectors always belong to us */
    428	return IRQ_HANDLED;
    429}
    430
    431static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr)
    432{
    433	struct mthca_dev *dev = dev_ptr;
    434	int work = 0;
    435	int i;
    436
    437	if (dev->eq_table.clr_mask)
    438		writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
    439
    440	for (i = 0; i < MTHCA_NUM_EQ; ++i)
    441		if (mthca_eq_int(dev, &dev->eq_table.eq[i])) {
    442			work = 1;
    443			arbel_set_eq_ci(dev, &dev->eq_table.eq[i],
    444					dev->eq_table.eq[i].cons_index);
    445		}
    446
    447	arbel_eq_req_not(dev, dev->eq_table.arm_mask);
    448
    449	return IRQ_RETVAL(work);
    450}
    451
    452static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr)
    453{
    454	struct mthca_eq  *eq  = eq_ptr;
    455	struct mthca_dev *dev = eq->dev;
    456
    457	mthca_eq_int(dev, eq);
    458	arbel_set_eq_ci(dev, eq, eq->cons_index);
    459	arbel_eq_req_not(dev, eq->eqn_mask);
    460
    461	/* MSI-X vectors always belong to us */
    462	return IRQ_HANDLED;
    463}
    464
    465static int mthca_create_eq(struct mthca_dev *dev,
    466			   int nent,
    467			   u8 intr,
    468			   struct mthca_eq *eq)
    469{
    470	int npages;
    471	u64 *dma_list = NULL;
    472	dma_addr_t t;
    473	struct mthca_mailbox *mailbox;
    474	struct mthca_eq_context *eq_context;
    475	int err = -ENOMEM;
    476	int i;
    477
    478	eq->dev  = dev;
    479	eq->nent = roundup_pow_of_two(max(nent, 2));
    480	npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE;
    481
    482	eq->page_list = kmalloc_array(npages, sizeof(*eq->page_list),
    483				      GFP_KERNEL);
    484	if (!eq->page_list)
    485		goto err_out;
    486
    487	for (i = 0; i < npages; ++i)
    488		eq->page_list[i].buf = NULL;
    489
    490	dma_list = kmalloc_array(npages, sizeof(*dma_list), GFP_KERNEL);
    491	if (!dma_list)
    492		goto err_out_free;
    493
    494	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
    495	if (IS_ERR(mailbox))
    496		goto err_out_free;
    497	eq_context = mailbox->buf;
    498
    499	for (i = 0; i < npages; ++i) {
    500		eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
    501							  PAGE_SIZE, &t, GFP_KERNEL);
    502		if (!eq->page_list[i].buf)
    503			goto err_out_free_pages;
    504
    505		dma_list[i] = t;
    506		dma_unmap_addr_set(&eq->page_list[i], mapping, t);
    507
    508		clear_page(eq->page_list[i].buf);
    509	}
    510
    511	for (i = 0; i < eq->nent; ++i)
    512		set_eqe_hw(get_eqe(eq, i));
    513
    514	eq->eqn = mthca_alloc(&dev->eq_table.alloc);
    515	if (eq->eqn == -1)
    516		goto err_out_free_pages;
    517
    518	err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
    519				  dma_list, PAGE_SHIFT, npages,
    520				  0, npages * PAGE_SIZE,
    521				  MTHCA_MPT_FLAG_LOCAL_WRITE |
    522				  MTHCA_MPT_FLAG_LOCAL_READ,
    523				  &eq->mr);
    524	if (err)
    525		goto err_out_free_eq;
    526
    527	memset(eq_context, 0, sizeof *eq_context);
    528	eq_context->flags           = cpu_to_be32(MTHCA_EQ_STATUS_OK   |
    529						  MTHCA_EQ_OWNER_HW    |
    530						  MTHCA_EQ_STATE_ARMED |
    531						  MTHCA_EQ_FLAG_TR);
    532	if (mthca_is_memfree(dev))
    533		eq_context->flags  |= cpu_to_be32(MTHCA_EQ_STATE_ARBEL);
    534
    535	eq_context->logsize_usrpage = cpu_to_be32((ffs(eq->nent) - 1) << 24);
    536	if (mthca_is_memfree(dev)) {
    537		eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num);
    538	} else {
    539		eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
    540		eq_context->tavor_pd         = cpu_to_be32(dev->driver_pd.pd_num);
    541	}
    542	eq_context->intr            = intr;
    543	eq_context->lkey            = cpu_to_be32(eq->mr.ibmr.lkey);
    544
    545	err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn);
    546	if (err) {
    547		mthca_warn(dev, "SW2HW_EQ returned %d\n", err);
    548		goto err_out_free_mr;
    549	}
    550
    551	kfree(dma_list);
    552	mthca_free_mailbox(dev, mailbox);
    553
    554	eq->eqn_mask   = swab32(1 << eq->eqn);
    555	eq->cons_index = 0;
    556
    557	dev->eq_table.arm_mask |= eq->eqn_mask;
    558
    559	mthca_dbg(dev, "Allocated EQ %d with %d entries\n",
    560		  eq->eqn, eq->nent);
    561
    562	return err;
    563
    564 err_out_free_mr:
    565	mthca_free_mr(dev, &eq->mr);
    566
    567 err_out_free_eq:
    568	mthca_free(&dev->eq_table.alloc, eq->eqn);
    569
    570 err_out_free_pages:
    571	for (i = 0; i < npages; ++i)
    572		if (eq->page_list[i].buf)
    573			dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
    574					  eq->page_list[i].buf,
    575					  dma_unmap_addr(&eq->page_list[i],
    576							 mapping));
    577
    578	mthca_free_mailbox(dev, mailbox);
    579
    580 err_out_free:
    581	kfree(eq->page_list);
    582	kfree(dma_list);
    583
    584 err_out:
    585	return err;
    586}
    587
    588static void mthca_free_eq(struct mthca_dev *dev,
    589			  struct mthca_eq *eq)
    590{
    591	struct mthca_mailbox *mailbox;
    592	int err;
    593	int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
    594		PAGE_SIZE;
    595	int i;
    596
    597	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
    598	if (IS_ERR(mailbox))
    599		return;
    600
    601	err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn);
    602	if (err)
    603		mthca_warn(dev, "HW2SW_EQ returned %d\n", err);
    604
    605	dev->eq_table.arm_mask &= ~eq->eqn_mask;
    606
    607	if (0) {
    608		mthca_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn);
    609		for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) {
    610			if (i % 4 == 0)
    611				printk("[%02x] ", i * 4);
    612			printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
    613			if ((i + 1) % 4 == 0)
    614				printk("\n");
    615		}
    616	}
    617
    618	mthca_free_mr(dev, &eq->mr);
    619	for (i = 0; i < npages; ++i)
    620		dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
    621				  eq->page_list[i].buf,
    622				  dma_unmap_addr(&eq->page_list[i], mapping));
    623
    624	kfree(eq->page_list);
    625	mthca_free_mailbox(dev, mailbox);
    626}
    627
    628static void mthca_free_irqs(struct mthca_dev *dev)
    629{
    630	int i;
    631
    632	if (dev->eq_table.have_irq)
    633		free_irq(dev->pdev->irq, dev);
    634	for (i = 0; i < MTHCA_NUM_EQ; ++i)
    635		if (dev->eq_table.eq[i].have_irq) {
    636			free_irq(dev->eq_table.eq[i].msi_x_vector,
    637				 dev->eq_table.eq + i);
    638			dev->eq_table.eq[i].have_irq = 0;
    639		}
    640}
    641
    642static int mthca_map_reg(struct mthca_dev *dev,
    643			 unsigned long offset, unsigned long size,
    644			 void __iomem **map)
    645{
    646	phys_addr_t base = pci_resource_start(dev->pdev, 0);
    647
    648	*map = ioremap(base + offset, size);
    649	if (!*map)
    650		return -ENOMEM;
    651
    652	return 0;
    653}
    654
    655static int mthca_map_eq_regs(struct mthca_dev *dev)
    656{
    657	if (mthca_is_memfree(dev)) {
    658		/*
    659		 * We assume that the EQ arm and EQ set CI registers
    660		 * fall within the first BAR.  We can't trust the
    661		 * values firmware gives us, since those addresses are
    662		 * valid on the HCA's side of the PCI bus but not
    663		 * necessarily the host side.
    664		 */
    665		if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
    666				  dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
    667				  &dev->clr_base)) {
    668			mthca_err(dev, "Couldn't map interrupt clear register, "
    669				  "aborting.\n");
    670			return -ENOMEM;
    671		}
    672
    673		/*
    674		 * Add 4 because we limit ourselves to EQs 0 ... 31,
    675		 * so we only need the low word of the register.
    676		 */
    677		if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
    678					dev->fw.arbel.eq_arm_base) + 4, 4,
    679				  &dev->eq_regs.arbel.eq_arm)) {
    680			mthca_err(dev, "Couldn't map EQ arm register, aborting.\n");
    681			iounmap(dev->clr_base);
    682			return -ENOMEM;
    683		}
    684
    685		if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
    686				  dev->fw.arbel.eq_set_ci_base,
    687				  MTHCA_EQ_SET_CI_SIZE,
    688				  &dev->eq_regs.arbel.eq_set_ci_base)) {
    689			mthca_err(dev, "Couldn't map EQ CI register, aborting.\n");
    690			iounmap(dev->eq_regs.arbel.eq_arm);
    691			iounmap(dev->clr_base);
    692			return -ENOMEM;
    693		}
    694	} else {
    695		if (mthca_map_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
    696				  &dev->clr_base)) {
    697			mthca_err(dev, "Couldn't map interrupt clear register, "
    698				  "aborting.\n");
    699			return -ENOMEM;
    700		}
    701
    702		if (mthca_map_reg(dev, MTHCA_ECR_BASE,
    703				  MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
    704				  &dev->eq_regs.tavor.ecr_base)) {
    705			mthca_err(dev, "Couldn't map ecr register, "
    706				  "aborting.\n");
    707			iounmap(dev->clr_base);
    708			return -ENOMEM;
    709		}
    710	}
    711
    712	return 0;
    713
    714}
    715
    716static void mthca_unmap_eq_regs(struct mthca_dev *dev)
    717{
    718	if (mthca_is_memfree(dev)) {
    719		iounmap(dev->eq_regs.arbel.eq_set_ci_base);
    720		iounmap(dev->eq_regs.arbel.eq_arm);
    721		iounmap(dev->clr_base);
    722	} else {
    723		iounmap(dev->eq_regs.tavor.ecr_base);
    724		iounmap(dev->clr_base);
    725	}
    726}
    727
    728int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
    729{
    730	int ret;
    731
    732	/*
    733	 * We assume that mapping one page is enough for the whole EQ
    734	 * context table.  This is fine with all current HCAs, because
    735	 * we only use 32 EQs and each EQ uses 32 bytes of context
    736	 * memory, or 1 KB total.
    737	 */
    738	dev->eq_table.icm_virt = icm_virt;
    739	dev->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
    740	if (!dev->eq_table.icm_page)
    741		return -ENOMEM;
    742	dev->eq_table.icm_dma =
    743		dma_map_page(&dev->pdev->dev, dev->eq_table.icm_page, 0,
    744			     PAGE_SIZE, DMA_BIDIRECTIONAL);
    745	if (dma_mapping_error(&dev->pdev->dev, dev->eq_table.icm_dma)) {
    746		__free_page(dev->eq_table.icm_page);
    747		return -ENOMEM;
    748	}
    749
    750	ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt);
    751	if (ret) {
    752		dma_unmap_page(&dev->pdev->dev, dev->eq_table.icm_dma,
    753			       PAGE_SIZE, DMA_BIDIRECTIONAL);
    754		__free_page(dev->eq_table.icm_page);
    755	}
    756
    757	return ret;
    758}
    759
    760void mthca_unmap_eq_icm(struct mthca_dev *dev)
    761{
    762	mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1);
    763	dma_unmap_page(&dev->pdev->dev, dev->eq_table.icm_dma, PAGE_SIZE,
    764		       DMA_BIDIRECTIONAL);
    765	__free_page(dev->eq_table.icm_page);
    766}
    767
    768int mthca_init_eq_table(struct mthca_dev *dev)
    769{
    770	int err;
    771	u8 intr;
    772	int i;
    773
    774	err = mthca_alloc_init(&dev->eq_table.alloc,
    775			       dev->limits.num_eqs,
    776			       dev->limits.num_eqs - 1,
    777			       dev->limits.reserved_eqs);
    778	if (err)
    779		return err;
    780
    781	err = mthca_map_eq_regs(dev);
    782	if (err)
    783		goto err_out_free;
    784
    785	if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
    786		dev->eq_table.clr_mask = 0;
    787	} else {
    788		dev->eq_table.clr_mask =
    789			swab32(1 << (dev->eq_table.inta_pin & 31));
    790		dev->eq_table.clr_int  = dev->clr_base +
    791			(dev->eq_table.inta_pin < 32 ? 4 : 0);
    792	}
    793
    794	dev->eq_table.arm_mask = 0;
    795
    796	intr = dev->eq_table.inta_pin;
    797
    798	err = mthca_create_eq(dev, dev->limits.num_cqs + MTHCA_NUM_SPARE_EQE,
    799			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr,
    800			      &dev->eq_table.eq[MTHCA_EQ_COMP]);
    801	if (err)
    802		goto err_out_unmap;
    803
    804	err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE + MTHCA_NUM_SPARE_EQE,
    805			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr,
    806			      &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
    807	if (err)
    808		goto err_out_comp;
    809
    810	err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE + MTHCA_NUM_SPARE_EQE,
    811			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr,
    812			      &dev->eq_table.eq[MTHCA_EQ_CMD]);
    813	if (err)
    814		goto err_out_async;
    815
    816	if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
    817		static const char *eq_name[] = {
    818			[MTHCA_EQ_COMP]  = DRV_NAME "-comp",
    819			[MTHCA_EQ_ASYNC] = DRV_NAME "-async",
    820			[MTHCA_EQ_CMD]   = DRV_NAME "-cmd"
    821		};
    822
    823		for (i = 0; i < MTHCA_NUM_EQ; ++i) {
    824			snprintf(dev->eq_table.eq[i].irq_name,
    825				 IB_DEVICE_NAME_MAX,
    826				 "%s@pci:%s", eq_name[i],
    827				 pci_name(dev->pdev));
    828			err = request_irq(dev->eq_table.eq[i].msi_x_vector,
    829					  mthca_is_memfree(dev) ?
    830					  mthca_arbel_msi_x_interrupt :
    831					  mthca_tavor_msi_x_interrupt,
    832					  0, dev->eq_table.eq[i].irq_name,
    833					  dev->eq_table.eq + i);
    834			if (err)
    835				goto err_out_cmd;
    836			dev->eq_table.eq[i].have_irq = 1;
    837		}
    838	} else {
    839		snprintf(dev->eq_table.eq[0].irq_name, IB_DEVICE_NAME_MAX,
    840			 DRV_NAME "@pci:%s", pci_name(dev->pdev));
    841		err = request_irq(dev->pdev->irq,
    842				  mthca_is_memfree(dev) ?
    843				  mthca_arbel_interrupt :
    844				  mthca_tavor_interrupt,
    845				  IRQF_SHARED, dev->eq_table.eq[0].irq_name, dev);
    846		if (err)
    847			goto err_out_cmd;
    848		dev->eq_table.have_irq = 1;
    849	}
    850
    851	err = mthca_MAP_EQ(dev, async_mask(dev),
    852			   0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
    853	if (err)
    854		mthca_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
    855			   dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err);
    856
    857	err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
    858			   0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn);
    859	if (err)
    860		mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n",
    861			   dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err);
    862
    863	for (i = 0; i < MTHCA_NUM_EQ; ++i)
    864		if (mthca_is_memfree(dev))
    865			arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
    866		else
    867			tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
    868
    869	return 0;
    870
    871err_out_cmd:
    872	mthca_free_irqs(dev);
    873	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_CMD]);
    874
    875err_out_async:
    876	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
    877
    878err_out_comp:
    879	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_COMP]);
    880
    881err_out_unmap:
    882	mthca_unmap_eq_regs(dev);
    883
    884err_out_free:
    885	mthca_alloc_cleanup(&dev->eq_table.alloc);
    886	return err;
    887}
    888
    889void mthca_cleanup_eq_table(struct mthca_dev *dev)
    890{
    891	int i;
    892
    893	mthca_free_irqs(dev);
    894
    895	mthca_MAP_EQ(dev, async_mask(dev),
    896		     1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
    897	mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
    898		     1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn);
    899
    900	for (i = 0; i < MTHCA_NUM_EQ; ++i)
    901		mthca_free_eq(dev, &dev->eq_table.eq[i]);
    902
    903	mthca_unmap_eq_regs(dev);
    904
    905	mthca_alloc_cleanup(&dev->eq_table.alloc);
    906}