cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

eq.c (28334B)


      1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
      2/*
      3 * Copyright (c) 2013-2021, Mellanox Technologies inc.  All rights reserved.
      4 */
      5
      6#include <linux/interrupt.h>
      7#include <linux/notifier.h>
      8#include <linux/mlx5/driver.h>
      9#include <linux/mlx5/vport.h>
     10#include <linux/mlx5/eq.h>
     11#ifdef CONFIG_RFS_ACCEL
     12#include <linux/cpu_rmap.h>
     13#endif
     14#include "mlx5_core.h"
     15#include "lib/eq.h"
     16#include "fpga/core.h"
     17#include "eswitch.h"
     18#include "lib/clock.h"
     19#include "diag/fw_tracer.h"
     20#include "mlx5_irq.h"
     21#include "devlink.h"
     22
     23enum {
     24	MLX5_EQE_OWNER_INIT_VAL	= 0x1,
     25};
     26
     27enum {
     28	MLX5_EQ_STATE_ARMED		= 0x9,
     29	MLX5_EQ_STATE_FIRED		= 0xa,
     30	MLX5_EQ_STATE_ALWAYS_ARMED	= 0xb,
     31};
     32
     33enum {
     34	MLX5_EQ_DOORBEL_OFFSET	= 0x40,
     35};
     36
     37/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update
     38 * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is
     39 * used to set the EQ size, budget must be smaller than the EQ size.
     40 */
     41enum {
     42	MLX5_EQ_POLLING_BUDGET	= 128,
     43};
     44
     45static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE);
     46
     47struct mlx5_eq_table {
     48	struct list_head        comp_eqs_list;
     49	struct mlx5_eq_async    pages_eq;
     50	struct mlx5_eq_async    cmd_eq;
     51	struct mlx5_eq_async    async_eq;
     52
     53	struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
     54
     55	/* Since CQ DB is stored in async_eq */
     56	struct mlx5_nb          cq_err_nb;
     57
     58	struct mutex            lock; /* sync async eqs creations */
     59	int			num_comp_eqs;
     60	struct mlx5_irq_table	*irq_table;
     61	struct mlx5_irq         **comp_irqs;
     62	struct mlx5_irq         *ctrl_irq;
     63#ifdef CONFIG_RFS_ACCEL
     64	struct cpu_rmap		*rmap;
     65#endif
     66};
     67
     68#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG)	    | \
     69			       (1ull << MLX5_EVENT_TYPE_COMM_EST)	    | \
     70			       (1ull << MLX5_EVENT_TYPE_SQ_DRAINED)	    | \
     71			       (1ull << MLX5_EVENT_TYPE_CQ_ERROR)	    | \
     72			       (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR)	    | \
     73			       (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED)    | \
     74			       (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
     75			       (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
     76			       (1ull << MLX5_EVENT_TYPE_PORT_CHANGE)	    | \
     77			       (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
     78			       (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE)	    | \
     79			       (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
     80
     81static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
     82{
     83	u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {};
     84
     85	MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
     86	MLX5_SET(destroy_eq_in, in, eq_number, eqn);
     87	return mlx5_cmd_exec_in(dev, destroy_eq, in);
     88}
     89
     90/* caller must eventually call mlx5_cq_put on the returned cq */
     91static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
     92{
     93	struct mlx5_cq_table *table = &eq->cq_table;
     94	struct mlx5_core_cq *cq = NULL;
     95
     96	rcu_read_lock();
     97	cq = radix_tree_lookup(&table->tree, cqn);
     98	if (likely(cq))
     99		mlx5_cq_hold(cq);
    100	rcu_read_unlock();
    101
    102	return cq;
    103}
    104
    105static int mlx5_eq_comp_int(struct notifier_block *nb,
    106			    __always_unused unsigned long action,
    107			    __always_unused void *data)
    108{
    109	struct mlx5_eq_comp *eq_comp =
    110		container_of(nb, struct mlx5_eq_comp, irq_nb);
    111	struct mlx5_eq *eq = &eq_comp->core;
    112	struct mlx5_eqe *eqe;
    113	int num_eqes = 0;
    114	u32 cqn = -1;
    115
    116	eqe = next_eqe_sw(eq);
    117	if (!eqe)
    118		goto out;
    119
    120	do {
    121		struct mlx5_core_cq *cq;
    122
    123		/* Make sure we read EQ entry contents after we've
    124		 * checked the ownership bit.
    125		 */
    126		dma_rmb();
    127		/* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */
    128		cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
    129
    130		cq = mlx5_eq_cq_get(eq, cqn);
    131		if (likely(cq)) {
    132			++cq->arm_sn;
    133			cq->comp(cq, eqe);
    134			mlx5_cq_put(cq);
    135		} else {
    136			dev_dbg_ratelimited(eq->dev->device,
    137					    "Completion event for bogus CQ 0x%x\n", cqn);
    138		}
    139
    140		++eq->cons_index;
    141
    142	} while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
    143
    144out:
    145	eq_update_ci(eq, 1);
    146
    147	if (cqn != -1)
    148		tasklet_schedule(&eq_comp->tasklet_ctx.task);
    149
    150	return 0;
    151}
    152
    153/* Some architectures don't latch interrupts when they are disabled, so using
    154 * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
    155 * avoid losing them.  It is not recommended to use it, unless this is the last
    156 * resort.
    157 */
    158u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
    159{
    160	u32 count_eqe;
    161
    162	disable_irq(eq->core.irqn);
    163	count_eqe = eq->core.cons_index;
    164	mlx5_eq_comp_int(&eq->irq_nb, 0, NULL);
    165	count_eqe = eq->core.cons_index - count_eqe;
    166	enable_irq(eq->core.irqn);
    167
    168	return count_eqe;
    169}
    170
    171static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, bool recovery,
    172				   unsigned long *flags)
    173	__acquires(&eq->lock)
    174{
    175	if (!recovery)
    176		spin_lock(&eq->lock);
    177	else
    178		spin_lock_irqsave(&eq->lock, *flags);
    179}
    180
    181static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, bool recovery,
    182				     unsigned long *flags)
    183	__releases(&eq->lock)
    184{
    185	if (!recovery)
    186		spin_unlock(&eq->lock);
    187	else
    188		spin_unlock_irqrestore(&eq->lock, *flags);
    189}
    190
    191enum async_eq_nb_action {
    192	ASYNC_EQ_IRQ_HANDLER = 0,
    193	ASYNC_EQ_RECOVER = 1,
    194};
    195
    196static int mlx5_eq_async_int(struct notifier_block *nb,
    197			     unsigned long action, void *data)
    198{
    199	struct mlx5_eq_async *eq_async =
    200		container_of(nb, struct mlx5_eq_async, irq_nb);
    201	struct mlx5_eq *eq = &eq_async->core;
    202	struct mlx5_eq_table *eqt;
    203	struct mlx5_core_dev *dev;
    204	struct mlx5_eqe *eqe;
    205	unsigned long flags;
    206	int num_eqes = 0;
    207	bool recovery;
    208
    209	dev = eq->dev;
    210	eqt = dev->priv.eq_table;
    211
    212	recovery = action == ASYNC_EQ_RECOVER;
    213	mlx5_eq_async_int_lock(eq_async, recovery, &flags);
    214
    215	eqe = next_eqe_sw(eq);
    216	if (!eqe)
    217		goto out;
    218
    219	do {
    220		/*
    221		 * Make sure we read EQ entry contents after we've
    222		 * checked the ownership bit.
    223		 */
    224		dma_rmb();
    225
    226		atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe);
    227		atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
    228
    229		++eq->cons_index;
    230
    231	} while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
    232
    233out:
    234	eq_update_ci(eq, 1);
    235	mlx5_eq_async_int_unlock(eq_async, recovery, &flags);
    236
    237	return unlikely(recovery) ? num_eqes : 0;
    238}
    239
    240void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev)
    241{
    242	struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq;
    243	int eqes;
    244
    245	eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL);
    246	if (eqes)
    247		mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes);
    248}
    249
    250static void init_eq_buf(struct mlx5_eq *eq)
    251{
    252	struct mlx5_eqe *eqe;
    253	int i;
    254
    255	for (i = 0; i < eq_get_size(eq); i++) {
    256		eqe = get_eqe(eq, i);
    257		eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
    258	}
    259}
    260
    261static int
    262create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
    263	      struct mlx5_eq_param *param)
    264{
    265	u8 log_eq_size = order_base_2(param->nent + MLX5_NUM_SPARE_EQE);
    266	struct mlx5_cq_table *cq_table = &eq->cq_table;
    267	u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
    268	u8 log_eq_stride = ilog2(MLX5_EQE_SIZE);
    269	struct mlx5_priv *priv = &dev->priv;
    270	__be64 *pas;
    271	u16 vecidx;
    272	void *eqc;
    273	int inlen;
    274	u32 *in;
    275	int err;
    276	int i;
    277
    278	/* Init CQ table */
    279	memset(cq_table, 0, sizeof(*cq_table));
    280	spin_lock_init(&cq_table->lock);
    281	INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
    282
    283	eq->cons_index = 0;
    284
    285	err = mlx5_frag_buf_alloc_node(dev, wq_get_byte_sz(log_eq_size, log_eq_stride),
    286				       &eq->frag_buf, dev->priv.numa_node);
    287	if (err)
    288		return err;
    289
    290	mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc);
    291	init_eq_buf(eq);
    292
    293	eq->irq = param->irq;
    294	vecidx = mlx5_irq_get_index(eq->irq);
    295
    296	inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
    297		MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
    298
    299	in = kvzalloc(inlen, GFP_KERNEL);
    300	if (!in) {
    301		err = -ENOMEM;
    302		goto err_buf;
    303	}
    304
    305	pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
    306	mlx5_fill_page_frag_array(&eq->frag_buf, pas);
    307
    308	MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
    309	if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx))
    310		MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID);
    311
    312	for (i = 0; i < 4; i++)
    313		MLX5_ARRAY_SET64(create_eq_in, in, event_bitmask, i,
    314				 param->mask[i]);
    315
    316	eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
    317	MLX5_SET(eqc, eqc, log_eq_size, eq->fbc.log_sz);
    318	MLX5_SET(eqc, eqc, uar_page, priv->uar->index);
    319	MLX5_SET(eqc, eqc, intr, vecidx);
    320	MLX5_SET(eqc, eqc, log_page_size,
    321		 eq->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
    322
    323	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
    324	if (err)
    325		goto err_in;
    326
    327	eq->vecidx = vecidx;
    328	eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
    329	eq->irqn = pci_irq_vector(dev->pdev, vecidx);
    330	eq->dev = dev;
    331	eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
    332
    333	err = mlx5_debug_eq_add(dev, eq);
    334	if (err)
    335		goto err_eq;
    336
    337	kvfree(in);
    338	return 0;
    339
    340err_eq:
    341	mlx5_cmd_destroy_eq(dev, eq->eqn);
    342
    343err_in:
    344	kvfree(in);
    345
    346err_buf:
    347	mlx5_frag_buf_free(dev, &eq->frag_buf);
    348	return err;
    349}
    350
    351/**
    352 * mlx5_eq_enable - Enable EQ for receiving EQEs
    353 * @dev : Device which owns the eq
    354 * @eq  : EQ to enable
    355 * @nb  : Notifier call block
    356 *
    357 * Must be called after EQ is created in device.
    358 *
    359 * @return: 0 if no error
    360 */
    361int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
    362		   struct notifier_block *nb)
    363{
    364	int err;
    365
    366	err = mlx5_irq_attach_nb(eq->irq, nb);
    367	if (!err)
    368		eq_update_ci(eq, 1);
    369
    370	return err;
    371}
    372EXPORT_SYMBOL(mlx5_eq_enable);
    373
    374/**
    375 * mlx5_eq_disable - Disable EQ for receiving EQEs
    376 * @dev : Device which owns the eq
    377 * @eq  : EQ to disable
    378 * @nb  : Notifier call block
    379 *
    380 * Must be called before EQ is destroyed.
    381 */
    382void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
    383		     struct notifier_block *nb)
    384{
    385	mlx5_irq_detach_nb(eq->irq, nb);
    386}
    387EXPORT_SYMBOL(mlx5_eq_disable);
    388
    389static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
    390{
    391	int err;
    392
    393	mlx5_debug_eq_remove(dev, eq);
    394
    395	err = mlx5_cmd_destroy_eq(dev, eq->eqn);
    396	if (err)
    397		mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
    398			       eq->eqn);
    399
    400	mlx5_frag_buf_free(dev, &eq->frag_buf);
    401	return err;
    402}
    403
    404int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
    405{
    406	struct mlx5_cq_table *table = &eq->cq_table;
    407	int err;
    408
    409	spin_lock(&table->lock);
    410	err = radix_tree_insert(&table->tree, cq->cqn, cq);
    411	spin_unlock(&table->lock);
    412
    413	return err;
    414}
    415
    416void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
    417{
    418	struct mlx5_cq_table *table = &eq->cq_table;
    419	struct mlx5_core_cq *tmp;
    420
    421	spin_lock(&table->lock);
    422	tmp = radix_tree_delete(&table->tree, cq->cqn);
    423	spin_unlock(&table->lock);
    424
    425	if (!tmp) {
    426		mlx5_core_dbg(eq->dev, "cq 0x%x not found in eq 0x%x tree\n",
    427			      eq->eqn, cq->cqn);
    428		return;
    429	}
    430
    431	if (tmp != cq)
    432		mlx5_core_dbg(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n",
    433			      eq->eqn, cq->cqn);
    434}
    435
    436int mlx5_eq_table_init(struct mlx5_core_dev *dev)
    437{
    438	struct mlx5_eq_table *eq_table;
    439	int i;
    440
    441	eq_table = kvzalloc_node(sizeof(*eq_table), GFP_KERNEL,
    442				 dev->priv.numa_node);
    443	if (!eq_table)
    444		return -ENOMEM;
    445
    446	dev->priv.eq_table = eq_table;
    447
    448	mlx5_eq_debugfs_init(dev);
    449
    450	mutex_init(&eq_table->lock);
    451	for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
    452		ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
    453
    454	eq_table->irq_table = mlx5_irq_table_get(dev);
    455	return 0;
    456}
    457
    458void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
    459{
    460	mlx5_eq_debugfs_cleanup(dev);
    461	kvfree(dev->priv.eq_table);
    462}
    463
    464/* Async EQs */
    465
    466static int create_async_eq(struct mlx5_core_dev *dev,
    467			   struct mlx5_eq *eq, struct mlx5_eq_param *param)
    468{
    469	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
    470	int err;
    471
    472	mutex_lock(&eq_table->lock);
    473	err = create_map_eq(dev, eq, param);
    474	mutex_unlock(&eq_table->lock);
    475	return err;
    476}
    477
    478static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
    479{
    480	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
    481	int err;
    482
    483	mutex_lock(&eq_table->lock);
    484	err = destroy_unmap_eq(dev, eq);
    485	mutex_unlock(&eq_table->lock);
    486	return err;
    487}
    488
    489static int cq_err_event_notifier(struct notifier_block *nb,
    490				 unsigned long type, void *data)
    491{
    492	struct mlx5_eq_table *eqt;
    493	struct mlx5_core_cq *cq;
    494	struct mlx5_eqe *eqe;
    495	struct mlx5_eq *eq;
    496	u32 cqn;
    497
    498	/* type == MLX5_EVENT_TYPE_CQ_ERROR */
    499
    500	eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
    501	eq  = &eqt->async_eq.core;
    502	eqe = data;
    503
    504	cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
    505	mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
    506		       cqn, eqe->data.cq_err.syndrome);
    507
    508	cq = mlx5_eq_cq_get(eq, cqn);
    509	if (unlikely(!cq)) {
    510		mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
    511		return NOTIFY_OK;
    512	}
    513
    514	if (cq->event)
    515		cq->event(cq, type);
    516
    517	mlx5_cq_put(cq);
    518
    519	return NOTIFY_OK;
    520}
    521
    522static void gather_user_async_events(struct mlx5_core_dev *dev, u64 mask[4])
    523{
    524	__be64 *user_unaffiliated_events;
    525	__be64 *user_affiliated_events;
    526	int i;
    527
    528	user_affiliated_events =
    529		MLX5_CAP_DEV_EVENT(dev, user_affiliated_events);
    530	user_unaffiliated_events =
    531		MLX5_CAP_DEV_EVENT(dev, user_unaffiliated_events);
    532
    533	for (i = 0; i < 4; i++)
    534		mask[i] |= be64_to_cpu(user_affiliated_events[i] |
    535				       user_unaffiliated_events[i]);
    536}
    537
    538static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4])
    539{
    540	u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
    541
    542	if (MLX5_VPORT_MANAGER(dev))
    543		async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
    544
    545	if (MLX5_CAP_GEN(dev, general_notification_event))
    546		async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT);
    547
    548	if (MLX5_CAP_GEN(dev, port_module_event))
    549		async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT);
    550	else
    551		mlx5_core_dbg(dev, "port_module_event is not set\n");
    552
    553	if (MLX5_PPS_CAP(dev))
    554		async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT);
    555
    556	if (MLX5_CAP_GEN(dev, fpga))
    557		async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) |
    558				    (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR);
    559	if (MLX5_CAP_GEN_MAX(dev, dct))
    560		async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED);
    561
    562	if (MLX5_CAP_GEN(dev, temp_warn_event))
    563		async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT);
    564
    565	if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
    566		async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
    567
    568	if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters))
    569		async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER);
    570
    571	if (mlx5_eswitch_is_funcs_handler(dev))
    572		async_event_mask |=
    573			(1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED);
    574
    575	if (MLX5_CAP_GEN_MAX(dev, vhca_state))
    576		async_event_mask |= (1ull << MLX5_EVENT_TYPE_VHCA_STATE_CHANGE);
    577
    578	mask[0] = async_event_mask;
    579
    580	if (MLX5_CAP_GEN(dev, event_cap))
    581		gather_user_async_events(dev, mask);
    582}
    583
    584static int
    585setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
    586	       struct mlx5_eq_param *param, const char *name)
    587{
    588	int err;
    589
    590	eq->irq_nb.notifier_call = mlx5_eq_async_int;
    591	spin_lock_init(&eq->lock);
    592
    593	err = create_async_eq(dev, &eq->core, param);
    594	if (err) {
    595		mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err);
    596		return err;
    597	}
    598	err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
    599	if (err) {
    600		mlx5_core_warn(dev, "failed to enable %s EQ %d\n", name, err);
    601		destroy_async_eq(dev, &eq->core);
    602	}
    603	return err;
    604}
    605
    606static void cleanup_async_eq(struct mlx5_core_dev *dev,
    607			     struct mlx5_eq_async *eq, const char *name)
    608{
    609	int err;
    610
    611	mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
    612	err = destroy_async_eq(dev, &eq->core);
    613	if (err)
    614		mlx5_core_err(dev, "failed to destroy %s eq, err(%d)\n",
    615			      name, err);
    616}
    617
    618static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
    619{
    620	struct devlink *devlink = priv_to_devlink(dev);
    621	union devlink_param_value val;
    622	int err;
    623
    624	err = devlink_param_driverinit_value_get(devlink,
    625						 DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
    626						 &val);
    627	if (!err)
    628		return val.vu32;
    629	mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err);
    630	return MLX5_NUM_ASYNC_EQE;
    631}
    632static int create_async_eqs(struct mlx5_core_dev *dev)
    633{
    634	struct mlx5_eq_table *table = dev->priv.eq_table;
    635	struct mlx5_eq_param param = {};
    636	int err;
    637
    638	/* All the async_eqs are using single IRQ, request one IRQ and share its
    639	 * index among all the async_eqs of this device.
    640	 */
    641	table->ctrl_irq = mlx5_ctrl_irq_request(dev);
    642	if (IS_ERR(table->ctrl_irq))
    643		return PTR_ERR(table->ctrl_irq);
    644
    645	MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
    646	mlx5_eq_notifier_register(dev, &table->cq_err_nb);
    647
    648	param = (struct mlx5_eq_param) {
    649		.irq = table->ctrl_irq,
    650		.nent = MLX5_NUM_CMD_EQE,
    651		.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
    652	};
    653	mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ);
    654	err = setup_async_eq(dev, &table->cmd_eq, &param, "cmd");
    655	if (err)
    656		goto err1;
    657
    658	mlx5_cmd_use_events(dev);
    659	mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
    660
    661	param = (struct mlx5_eq_param) {
    662		.irq = table->ctrl_irq,
    663		.nent = async_eq_depth_devlink_param_get(dev),
    664	};
    665
    666	gather_async_events_mask(dev, param.mask);
    667	err = setup_async_eq(dev, &table->async_eq, &param, "async");
    668	if (err)
    669		goto err2;
    670
    671	param = (struct mlx5_eq_param) {
    672		.irq = table->ctrl_irq,
    673		.nent = /* TODO: sriov max_vf + */ 1,
    674		.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
    675	};
    676
    677	err = setup_async_eq(dev, &table->pages_eq, &param, "pages");
    678	if (err)
    679		goto err3;
    680
    681	return 0;
    682
    683err3:
    684	cleanup_async_eq(dev, &table->async_eq, "async");
    685err2:
    686	mlx5_cmd_use_polling(dev);
    687	cleanup_async_eq(dev, &table->cmd_eq, "cmd");
    688err1:
    689	mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
    690	mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
    691	mlx5_ctrl_irq_release(table->ctrl_irq);
    692	return err;
    693}
    694
    695static void destroy_async_eqs(struct mlx5_core_dev *dev)
    696{
    697	struct mlx5_eq_table *table = dev->priv.eq_table;
    698
    699	cleanup_async_eq(dev, &table->pages_eq, "pages");
    700	cleanup_async_eq(dev, &table->async_eq, "async");
    701	mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ);
    702	mlx5_cmd_use_polling(dev);
    703	cleanup_async_eq(dev, &table->cmd_eq, "cmd");
    704	mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
    705	mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
    706	mlx5_ctrl_irq_release(table->ctrl_irq);
    707}
    708
    709struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
    710{
    711	return &dev->priv.eq_table->async_eq.core;
    712}
    713
    714void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
    715{
    716	synchronize_irq(dev->priv.eq_table->async_eq.core.irqn);
    717}
    718
    719void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
    720{
    721	synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn);
    722}
    723
    724/* Generic EQ API for mlx5_core consumers
    725 * Needed For RDMA ODP EQ for now
    726 */
    727struct mlx5_eq *
    728mlx5_eq_create_generic(struct mlx5_core_dev *dev,
    729		       struct mlx5_eq_param *param)
    730{
    731	struct mlx5_eq *eq = kvzalloc_node(sizeof(*eq), GFP_KERNEL,
    732					   dev->priv.numa_node);
    733	int err;
    734
    735	if (!eq)
    736		return ERR_PTR(-ENOMEM);
    737
    738	param->irq = dev->priv.eq_table->ctrl_irq;
    739	err = create_async_eq(dev, eq, param);
    740	if (err) {
    741		kvfree(eq);
    742		eq = ERR_PTR(err);
    743	}
    744
    745	return eq;
    746}
    747EXPORT_SYMBOL(mlx5_eq_create_generic);
    748
    749int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
    750{
    751	int err;
    752
    753	if (IS_ERR(eq))
    754		return -EINVAL;
    755
    756	err = destroy_async_eq(dev, eq);
    757	if (err)
    758		goto out;
    759
    760	kvfree(eq);
    761out:
    762	return err;
    763}
    764EXPORT_SYMBOL(mlx5_eq_destroy_generic);
    765
    766struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc)
    767{
    768	u32 ci = eq->cons_index + cc;
    769	u32 nent = eq_get_size(eq);
    770	struct mlx5_eqe *eqe;
    771
    772	eqe = get_eqe(eq, ci & (nent - 1));
    773	eqe = ((eqe->owner & 1) ^ !!(ci & nent)) ? NULL : eqe;
    774	/* Make sure we read EQ entry contents after we've
    775	 * checked the ownership bit.
    776	 */
    777	if (eqe)
    778		dma_rmb();
    779
    780	return eqe;
    781}
    782EXPORT_SYMBOL(mlx5_eq_get_eqe);
    783
    784void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
    785{
    786	__be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
    787	u32 val;
    788
    789	eq->cons_index += cc;
    790	val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
    791
    792	__raw_writel((__force u32)cpu_to_be32(val), addr);
    793	/* We still want ordering, just not swabbing, so add a barrier */
    794	wmb();
    795}
    796EXPORT_SYMBOL(mlx5_eq_update_ci);
    797
    798static void comp_irqs_release(struct mlx5_core_dev *dev)
    799{
    800	struct mlx5_eq_table *table = dev->priv.eq_table;
    801
    802	if (mlx5_core_is_sf(dev))
    803		mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs);
    804	else
    805		mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs);
    806	kfree(table->comp_irqs);
    807}
    808
    809static int comp_irqs_request(struct mlx5_core_dev *dev)
    810{
    811	struct mlx5_eq_table *table = dev->priv.eq_table;
    812	int ncomp_eqs = table->num_comp_eqs;
    813	u16 *cpus;
    814	int ret;
    815	int i;
    816
    817	ncomp_eqs = table->num_comp_eqs;
    818	table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
    819	if (!table->comp_irqs)
    820		return -ENOMEM;
    821	if (mlx5_core_is_sf(dev)) {
    822		ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs);
    823		if (ret < 0)
    824			goto free_irqs;
    825		return ret;
    826	}
    827
    828	cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL);
    829	if (!cpus) {
    830		ret = -ENOMEM;
    831		goto free_irqs;
    832	}
    833	for (i = 0; i < ncomp_eqs; i++)
    834		cpus[i] = cpumask_local_spread(i, dev->priv.numa_node);
    835	ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs);
    836	kfree(cpus);
    837	if (ret < 0)
    838		goto free_irqs;
    839	return ret;
    840
    841free_irqs:
    842	kfree(table->comp_irqs);
    843	return ret;
    844}
    845
    846static void destroy_comp_eqs(struct mlx5_core_dev *dev)
    847{
    848	struct mlx5_eq_table *table = dev->priv.eq_table;
    849	struct mlx5_eq_comp *eq, *n;
    850
    851	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
    852		list_del(&eq->list);
    853		mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
    854		if (destroy_unmap_eq(dev, &eq->core))
    855			mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
    856				       eq->core.eqn);
    857		tasklet_disable(&eq->tasklet_ctx.task);
    858		kfree(eq);
    859	}
    860	comp_irqs_release(dev);
    861}
    862
    863static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
    864{
    865	struct devlink *devlink = priv_to_devlink(dev);
    866	union devlink_param_value val;
    867	int err;
    868
    869	err = devlink_param_driverinit_value_get(devlink,
    870						 DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
    871						 &val);
    872	if (!err)
    873		return val.vu32;
    874	mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err);
    875	return MLX5_COMP_EQ_SIZE;
    876}
    877
    878static int create_comp_eqs(struct mlx5_core_dev *dev)
    879{
    880	struct mlx5_eq_table *table = dev->priv.eq_table;
    881	struct mlx5_eq_comp *eq;
    882	int ncomp_eqs;
    883	int nent;
    884	int err;
    885	int i;
    886
    887	ncomp_eqs = comp_irqs_request(dev);
    888	if (ncomp_eqs < 0)
    889		return ncomp_eqs;
    890	INIT_LIST_HEAD(&table->comp_eqs_list);
    891	nent = comp_eq_depth_devlink_param_get(dev);
    892
    893	for (i = 0; i < ncomp_eqs; i++) {
    894		struct mlx5_eq_param param = {};
    895
    896		eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, dev->priv.numa_node);
    897		if (!eq) {
    898			err = -ENOMEM;
    899			goto clean;
    900		}
    901
    902		INIT_LIST_HEAD(&eq->tasklet_ctx.list);
    903		INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
    904		spin_lock_init(&eq->tasklet_ctx.lock);
    905		tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb);
    906
    907		eq->irq_nb.notifier_call = mlx5_eq_comp_int;
    908		param = (struct mlx5_eq_param) {
    909			.irq = table->comp_irqs[i],
    910			.nent = nent,
    911		};
    912
    913		err = create_map_eq(dev, &eq->core, &param);
    914		if (err)
    915			goto clean_eq;
    916		err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
    917		if (err) {
    918			destroy_unmap_eq(dev, &eq->core);
    919			goto clean_eq;
    920		}
    921
    922		mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
    923		/* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
    924		list_add_tail(&eq->list, &table->comp_eqs_list);
    925	}
    926
    927	table->num_comp_eqs = ncomp_eqs;
    928	return 0;
    929
    930clean_eq:
    931	kfree(eq);
    932clean:
    933	destroy_comp_eqs(dev);
    934	return err;
    935}
    936
    937static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn,
    938			  unsigned int *irqn)
    939{
    940	struct mlx5_eq_table *table = dev->priv.eq_table;
    941	struct mlx5_eq_comp *eq, *n;
    942	int err = -ENOENT;
    943	int i = 0;
    944
    945	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
    946		if (i++ == vector) {
    947			if (irqn)
    948				*irqn = eq->core.irqn;
    949			if (eqn)
    950				*eqn = eq->core.eqn;
    951			err = 0;
    952			break;
    953		}
    954	}
    955
    956	return err;
    957}
    958
    959int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn)
    960{
    961	return vector2eqnirqn(dev, vector, eqn, NULL);
    962}
    963EXPORT_SYMBOL(mlx5_vector2eqn);
    964
    965int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn)
    966{
    967	return vector2eqnirqn(dev, vector, NULL, irqn);
    968}
    969
    970unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
    971{
    972	return dev->priv.eq_table->num_comp_eqs;
    973}
    974EXPORT_SYMBOL(mlx5_comp_vectors_count);
    975
    976struct cpumask *
    977mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
    978{
    979	struct mlx5_eq_table *table = dev->priv.eq_table;
    980	struct mlx5_eq_comp *eq, *n;
    981	int i = 0;
    982
    983	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
    984		if (i++ == vector)
    985			break;
    986	}
    987
    988	return mlx5_irq_get_affinity_mask(eq->core.irq);
    989}
    990EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
    991
    992#ifdef CONFIG_RFS_ACCEL
    993struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
    994{
    995	return dev->priv.eq_table->rmap;
    996}
    997#endif
    998
    999struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
   1000{
   1001	struct mlx5_eq_table *table = dev->priv.eq_table;
   1002	struct mlx5_eq_comp *eq;
   1003
   1004	list_for_each_entry(eq, &table->comp_eqs_list, list) {
   1005		if (eq->core.eqn == eqn)
   1006			return eq;
   1007	}
   1008
   1009	return ERR_PTR(-ENOENT);
   1010}
   1011
   1012static void clear_rmap(struct mlx5_core_dev *dev)
   1013{
   1014#ifdef CONFIG_RFS_ACCEL
   1015	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
   1016
   1017	free_irq_cpu_rmap(eq_table->rmap);
   1018#endif
   1019}
   1020
   1021static int set_rmap(struct mlx5_core_dev *mdev)
   1022{
   1023	int err = 0;
   1024#ifdef CONFIG_RFS_ACCEL
   1025	struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
   1026	int vecidx;
   1027
   1028	eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs);
   1029	if (!eq_table->rmap) {
   1030		err = -ENOMEM;
   1031		mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
   1032		goto err_out;
   1033	}
   1034
   1035	for (vecidx = 0; vecidx < eq_table->num_comp_eqs; vecidx++) {
   1036		err = irq_cpu_rmap_add(eq_table->rmap,
   1037				       pci_irq_vector(mdev->pdev, vecidx));
   1038		if (err) {
   1039			mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
   1040				      err);
   1041			goto err_irq_cpu_rmap_add;
   1042		}
   1043	}
   1044	return 0;
   1045
   1046err_irq_cpu_rmap_add:
   1047	clear_rmap(mdev);
   1048err_out:
   1049#endif
   1050	return err;
   1051}
   1052
   1053/* This function should only be called after mlx5_cmd_force_teardown_hca */
   1054void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
   1055{
   1056	struct mlx5_eq_table *table = dev->priv.eq_table;
   1057
   1058	mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
   1059	if (!mlx5_core_is_sf(dev))
   1060		clear_rmap(dev);
   1061	mlx5_irq_table_destroy(dev);
   1062	mutex_unlock(&table->lock);
   1063}
   1064
   1065#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
   1066#define MLX5_MAX_ASYNC_EQS 4
   1067#else
   1068#define MLX5_MAX_ASYNC_EQS 3
   1069#endif
   1070
   1071int mlx5_eq_table_create(struct mlx5_core_dev *dev)
   1072{
   1073	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
   1074	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
   1075		      MLX5_CAP_GEN(dev, max_num_eqs) :
   1076		      1 << MLX5_CAP_GEN(dev, log_max_eq);
   1077	int max_eqs_sf;
   1078	int err;
   1079
   1080	eq_table->num_comp_eqs =
   1081		min_t(int,
   1082		      mlx5_irq_table_get_num_comp(eq_table->irq_table),
   1083		      num_eqs - MLX5_MAX_ASYNC_EQS);
   1084	if (mlx5_core_is_sf(dev)) {
   1085		max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF,
   1086				   mlx5_irq_table_get_sfs_vec(eq_table->irq_table));
   1087		eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs,
   1088					       max_eqs_sf);
   1089	}
   1090
   1091	err = create_async_eqs(dev);
   1092	if (err) {
   1093		mlx5_core_err(dev, "Failed to create async EQs\n");
   1094		goto err_async_eqs;
   1095	}
   1096
   1097	if (!mlx5_core_is_sf(dev)) {
   1098		/* rmap is a mapping between irq number and queue number.
   1099		 * each irq can be assign only to a single rmap.
   1100		 * since SFs share IRQs, rmap mapping cannot function correctly
   1101		 * for irqs that are shared for different core/netdev RX rings.
   1102		 * Hence we don't allow netdev rmap for SFs
   1103		 */
   1104		err = set_rmap(dev);
   1105		if (err)
   1106			goto err_rmap;
   1107	}
   1108
   1109	err = create_comp_eqs(dev);
   1110	if (err) {
   1111		mlx5_core_err(dev, "Failed to create completion EQs\n");
   1112		goto err_comp_eqs;
   1113	}
   1114
   1115	return 0;
   1116err_comp_eqs:
   1117	if (!mlx5_core_is_sf(dev))
   1118		clear_rmap(dev);
   1119err_rmap:
   1120	destroy_async_eqs(dev);
   1121err_async_eqs:
   1122	return err;
   1123}
   1124
   1125void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
   1126{
   1127	if (!mlx5_core_is_sf(dev))
   1128		clear_rmap(dev);
   1129	destroy_comp_eqs(dev);
   1130	destroy_async_eqs(dev);
   1131}
   1132
   1133int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
   1134{
   1135	struct mlx5_eq_table *eqt = dev->priv.eq_table;
   1136
   1137	return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb);
   1138}
   1139EXPORT_SYMBOL(mlx5_eq_notifier_register);
   1140
   1141int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
   1142{
   1143	struct mlx5_eq_table *eqt = dev->priv.eq_table;
   1144
   1145	return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb);
   1146}
   1147EXPORT_SYMBOL(mlx5_eq_notifier_unregister);