cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

pci_irq.c (18201B)


      1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
      2/* Copyright (c) 2019 Mellanox Technologies. */
      3
      4#include <linux/interrupt.h>
      5#include <linux/notifier.h>
      6#include <linux/mlx5/driver.h>
      7#include "mlx5_core.h"
      8#include "mlx5_irq.h"
      9#include "pci_irq.h"
     10#include "lib/sf.h"
     11#ifdef CONFIG_RFS_ACCEL
     12#include <linux/cpu_rmap.h>
     13#endif
     14
     15#define MLX5_SFS_PER_CTRL_IRQ 64
     16#define MLX5_IRQ_CTRL_SF_MAX 8
     17/* min num of vectors for SFs to be enabled */
     18#define MLX5_IRQ_VEC_COMP_BASE_SF 2
     19
     20#define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
     21#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
     22#define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
     23#define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
     24
     25struct mlx5_irq {
     26	struct atomic_notifier_head nh;
     27	cpumask_var_t mask;
     28	char name[MLX5_MAX_IRQ_NAME];
     29	struct mlx5_irq_pool *pool;
     30	int refcount;
     31	u32 index;
     32	int irqn;
     33};
     34
     35struct mlx5_irq_table {
     36	struct mlx5_irq_pool *pf_pool;
     37	struct mlx5_irq_pool *sf_ctrl_pool;
     38	struct mlx5_irq_pool *sf_comp_pool;
     39};
     40
     41/**
     42 * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
     43 *                                   to be ssigned to each VF.
     44 * @dev: PF to work on
     45 * @num_vfs: Number of enabled VFs
     46 */
     47int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
     48{
     49	int num_vf_msix, min_msix, max_msix;
     50
     51	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
     52	if (!num_vf_msix)
     53		return 0;
     54
     55	min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
     56	max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
     57
     58	/* Limit maximum number of MSI-X vectors so the default configuration
     59	 * has some available in the pool. This will allow the user to increase
     60	 * the number of vectors in a VF without having to first size-down other
     61	 * VFs.
     62	 */
     63	return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
     64}
     65
     66/**
     67 * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
     68 * @dev: PF to work on
     69 * @function_id: Internal PCI VF function IDd
     70 * @msix_vec_count: Number of MSI-X vectors to set
     71 */
     72int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
     73			    int msix_vec_count)
     74{
     75	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
     76	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
     77	void *hca_cap = NULL, *query_cap = NULL, *cap;
     78	int num_vf_msix, min_msix, max_msix;
     79	int ret;
     80
     81	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
     82	if (!num_vf_msix)
     83		return 0;
     84
     85	if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
     86		return -EOPNOTSUPP;
     87
     88	min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
     89	max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
     90
     91	if (msix_vec_count < min_msix)
     92		return -EINVAL;
     93
     94	if (msix_vec_count > max_msix)
     95		return -EOVERFLOW;
     96
     97	query_cap = kvzalloc(query_sz, GFP_KERNEL);
     98	hca_cap = kvzalloc(set_sz, GFP_KERNEL);
     99	if (!hca_cap || !query_cap) {
    100		ret = -ENOMEM;
    101		goto out;
    102	}
    103
    104	ret = mlx5_vport_get_other_func_cap(dev, function_id, query_cap);
    105	if (ret)
    106		goto out;
    107
    108	cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
    109	memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
    110	       MLX5_UN_SZ_BYTES(hca_cap_union));
    111	MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
    112
    113	MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
    114	MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
    115	MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
    116
    117	MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
    118		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
    119	ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
    120out:
    121	kvfree(hca_cap);
    122	kvfree(query_cap);
    123	return ret;
    124}
    125
    126static void irq_release(struct mlx5_irq *irq)
    127{
    128	struct mlx5_irq_pool *pool = irq->pool;
    129
    130	xa_erase(&pool->irqs, irq->index);
    131	/* free_irq requires that affinity_hint and rmap will be cleared
    132	 * before calling it. This is why there is asymmetry with set_rmap
    133	 * which should be called after alloc_irq but before request_irq.
    134	 */
    135	irq_update_affinity_hint(irq->irqn, NULL);
    136	free_cpumask_var(irq->mask);
    137	free_irq(irq->irqn, &irq->nh);
    138	kfree(irq);
    139}
    140
    141int mlx5_irq_put(struct mlx5_irq *irq)
    142{
    143	struct mlx5_irq_pool *pool = irq->pool;
    144	int ret = 0;
    145
    146	mutex_lock(&pool->lock);
    147	irq->refcount--;
    148	if (!irq->refcount) {
    149		irq_release(irq);
    150		ret = 1;
    151	}
    152	mutex_unlock(&pool->lock);
    153	return ret;
    154}
    155
    156int mlx5_irq_read_locked(struct mlx5_irq *irq)
    157{
    158	lockdep_assert_held(&irq->pool->lock);
    159	return irq->refcount;
    160}
    161
    162int mlx5_irq_get_locked(struct mlx5_irq *irq)
    163{
    164	lockdep_assert_held(&irq->pool->lock);
    165	if (WARN_ON_ONCE(!irq->refcount))
    166		return 0;
    167	irq->refcount++;
    168	return 1;
    169}
    170
    171static int irq_get(struct mlx5_irq *irq)
    172{
    173	int err;
    174
    175	mutex_lock(&irq->pool->lock);
    176	err = mlx5_irq_get_locked(irq);
    177	mutex_unlock(&irq->pool->lock);
    178	return err;
    179}
    180
    181static irqreturn_t irq_int_handler(int irq, void *nh)
    182{
    183	atomic_notifier_call_chain(nh, 0, NULL);
    184	return IRQ_HANDLED;
    185}
    186
    187static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
    188{
    189	snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
    190}
    191
    192static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
    193{
    194	if (!pool->xa_num_irqs.max) {
    195		/* in case we only have a single irq for the device */
    196		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
    197		return;
    198	}
    199
    200	if (vecidx == pool->xa_num_irqs.max) {
    201		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
    202		return;
    203	}
    204
    205	snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
    206}
    207
    208struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
    209				const struct cpumask *affinity)
    210{
    211	struct mlx5_core_dev *dev = pool->dev;
    212	char name[MLX5_MAX_IRQ_NAME];
    213	struct mlx5_irq *irq;
    214	int err;
    215
    216	irq = kzalloc(sizeof(*irq), GFP_KERNEL);
    217	if (!irq)
    218		return ERR_PTR(-ENOMEM);
    219	irq->irqn = pci_irq_vector(dev->pdev, i);
    220	if (!mlx5_irq_pool_is_sf_pool(pool))
    221		irq_set_name(pool, name, i);
    222	else
    223		irq_sf_set_name(pool, name, i);
    224	ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
    225	snprintf(irq->name, MLX5_MAX_IRQ_NAME,
    226		 "%s@pci:%s", name, pci_name(dev->pdev));
    227	err = request_irq(irq->irqn, irq_int_handler, 0, irq->name,
    228			  &irq->nh);
    229	if (err) {
    230		mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
    231		goto err_req_irq;
    232	}
    233	if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
    234		mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
    235		err = -ENOMEM;
    236		goto err_cpumask;
    237	}
    238	if (affinity) {
    239		cpumask_copy(irq->mask, affinity);
    240		irq_set_affinity_and_hint(irq->irqn, irq->mask);
    241	}
    242	irq->pool = pool;
    243	irq->refcount = 1;
    244	irq->index = i;
    245	err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL));
    246	if (err) {
    247		mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
    248			      irq->index, err);
    249		goto err_xa;
    250	}
    251	return irq;
    252err_xa:
    253	irq_update_affinity_hint(irq->irqn, NULL);
    254	free_cpumask_var(irq->mask);
    255err_cpumask:
    256	free_irq(irq->irqn, &irq->nh);
    257err_req_irq:
    258	kfree(irq);
    259	return ERR_PTR(err);
    260}
    261
    262int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
    263{
    264	int ret;
    265
    266	ret = irq_get(irq);
    267	if (!ret)
    268		/* Something very bad happens here, we are enabling EQ
    269		 * on non-existing IRQ.
    270		 */
    271		return -ENOENT;
    272	ret = atomic_notifier_chain_register(&irq->nh, nb);
    273	if (ret)
    274		mlx5_irq_put(irq);
    275	return ret;
    276}
    277
    278int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
    279{
    280	int err = 0;
    281
    282	err = atomic_notifier_chain_unregister(&irq->nh, nb);
    283	mlx5_irq_put(irq);
    284	return err;
    285}
    286
    287struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
    288{
    289	return irq->mask;
    290}
    291
    292int mlx5_irq_get_index(struct mlx5_irq *irq)
    293{
    294	return irq->index;
    295}
    296
    297/* irq_pool API */
    298
    299/* requesting an irq from a given pool according to given index */
    300static struct mlx5_irq *
    301irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
    302			struct cpumask *affinity)
    303{
    304	struct mlx5_irq *irq;
    305
    306	mutex_lock(&pool->lock);
    307	irq = xa_load(&pool->irqs, vecidx);
    308	if (irq) {
    309		mlx5_irq_get_locked(irq);
    310		goto unlock;
    311	}
    312	irq = mlx5_irq_alloc(pool, vecidx, affinity);
    313unlock:
    314	mutex_unlock(&pool->lock);
    315	return irq;
    316}
    317
    318static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
    319{
    320	return irq_table->sf_ctrl_pool;
    321}
    322
    323static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
    324{
    325	return irq_table->sf_comp_pool;
    326}
    327
    328struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
    329{
    330	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
    331	struct mlx5_irq_pool *pool = NULL;
    332
    333	if (mlx5_core_is_sf(dev))
    334		pool = sf_irq_pool_get(irq_table);
    335
    336	/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
    337	 * the PF IRQs pool in case the SF pool doesn't exist.
    338	 */
    339	return pool ? pool : irq_table->pf_pool;
    340}
    341
    342static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
    343{
    344	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
    345	struct mlx5_irq_pool *pool = NULL;
    346
    347	if (mlx5_core_is_sf(dev))
    348		pool = sf_ctrl_irq_pool_get(irq_table);
    349
    350	/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
    351	 * the PF IRQs pool in case the SF pool doesn't exist.
    352	 */
    353	return pool ? pool : irq_table->pf_pool;
    354}
    355
    356/**
    357 * mlx5_irqs_release - release one or more IRQs back to the system.
    358 * @irqs: IRQs to be released.
    359 * @nirqs: number of IRQs to be released.
    360 */
    361static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs)
    362{
    363	int i;
    364
    365	for (i = 0; i < nirqs; i++) {
    366		synchronize_irq(irqs[i]->irqn);
    367		mlx5_irq_put(irqs[i]);
    368	}
    369}
    370
    371/**
    372 * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
    373 * @ctrl_irq: ctrl IRQ to be released.
    374 */
    375void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
    376{
    377	mlx5_irqs_release(&ctrl_irq, 1);
    378}
    379
    380/**
    381 * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
    382 * @dev: mlx5 device that requesting the IRQ.
    383 *
    384 * This function returns a pointer to IRQ, or ERR_PTR in case of error.
    385 */
    386struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
    387{
    388	struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
    389	cpumask_var_t req_mask;
    390	struct mlx5_irq *irq;
    391
    392	if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
    393		return ERR_PTR(-ENOMEM);
    394	cpumask_copy(req_mask, cpu_online_mask);
    395	if (!mlx5_irq_pool_is_sf_pool(pool)) {
    396		/* In case we are allocating a control IRQ for PF/VF */
    397		if (!pool->xa_num_irqs.max) {
    398			cpumask_clear(req_mask);
    399			/* In case we only have a single IRQ for PF/VF */
    400			cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask);
    401		}
    402		/* Allocate the IRQ in the last index of the pool */
    403		irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask);
    404	} else {
    405		irq = mlx5_irq_affinity_request(pool, req_mask);
    406	}
    407
    408	free_cpumask_var(req_mask);
    409	return irq;
    410}
    411
    412/**
    413 * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
    414 * @dev: mlx5 device that requesting the IRQ.
    415 * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
    416 * provided.
    417 * @affinity: cpumask requested for this IRQ.
    418 *
    419 * This function returns a pointer to IRQ, or ERR_PTR in case of error.
    420 */
    421struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
    422				  struct cpumask *affinity)
    423{
    424	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
    425	struct mlx5_irq_pool *pool;
    426	struct mlx5_irq *irq;
    427
    428	pool = irq_table->pf_pool;
    429	irq = irq_pool_request_vector(pool, vecidx, affinity);
    430	if (IS_ERR(irq))
    431		return irq;
    432	mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
    433		      irq->irqn, cpumask_pr_args(affinity),
    434		      irq->refcount / MLX5_EQ_REFS_PER_IRQ);
    435	return irq;
    436}
    437
    438/**
    439 * mlx5_irqs_release_vectors - release one or more IRQs back to the system.
    440 * @irqs: IRQs to be released.
    441 * @nirqs: number of IRQs to be released.
    442 */
    443void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
    444{
    445	mlx5_irqs_release(irqs, nirqs);
    446}
    447
    448/**
    449 * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device.
    450 * @dev: mlx5 device that is requesting the IRQs.
    451 * @cpus: CPUs array for binding the IRQs
    452 * @nirqs: number of IRQs to request.
    453 * @irqs: an output array of IRQs pointers.
    454 *
    455 * Each IRQ is bound to at most 1 CPU.
    456 * This function is requests nirqs IRQs, starting from @vecidx.
    457 *
    458 * This function returns the number of IRQs requested, (which might be smaller than
    459 * @nirqs), if successful, or a negative error code in case of an error.
    460 */
    461int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
    462			      struct mlx5_irq **irqs)
    463{
    464	cpumask_var_t req_mask;
    465	struct mlx5_irq *irq;
    466	int i;
    467
    468	if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
    469		return -ENOMEM;
    470	for (i = 0; i < nirqs; i++) {
    471		cpumask_set_cpu(cpus[i], req_mask);
    472		irq = mlx5_irq_request(dev, i, req_mask);
    473		if (IS_ERR(irq))
    474			break;
    475		cpumask_clear(req_mask);
    476		irqs[i] = irq;
    477	}
    478
    479	free_cpumask_var(req_mask);
    480	return i ? i : PTR_ERR(irq);
    481}
    482
    483static struct mlx5_irq_pool *
    484irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
    485	       u32 min_threshold, u32 max_threshold)
    486{
    487	struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
    488
    489	if (!pool)
    490		return ERR_PTR(-ENOMEM);
    491	pool->dev = dev;
    492	mutex_init(&pool->lock);
    493	xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
    494	pool->xa_num_irqs.min = start;
    495	pool->xa_num_irqs.max = start + size - 1;
    496	if (name)
    497		snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
    498			 "%s", name);
    499	pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
    500	pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
    501	mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
    502		      name, size, start);
    503	return pool;
    504}
    505
    506static void irq_pool_free(struct mlx5_irq_pool *pool)
    507{
    508	struct mlx5_irq *irq;
    509	unsigned long index;
    510
    511	/* There are cases in which we are destrying the irq_table before
    512	 * freeing all the IRQs, fast teardown for example. Hence, free the irqs
    513	 * which might not have been freed.
    514	 */
    515	xa_for_each(&pool->irqs, index, irq)
    516		irq_release(irq);
    517	xa_destroy(&pool->irqs);
    518	mutex_destroy(&pool->lock);
    519	kfree(pool->irqs_per_cpu);
    520	kvfree(pool);
    521}
    522
    523static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
    524{
    525	struct mlx5_irq_table *table = dev->priv.irq_table;
    526	int num_sf_ctrl_by_msix;
    527	int num_sf_ctrl_by_sfs;
    528	int num_sf_ctrl;
    529	int err;
    530
    531	/* init pf_pool */
    532	table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL,
    533					MLX5_EQ_SHARE_IRQ_MIN_COMP,
    534					MLX5_EQ_SHARE_IRQ_MAX_COMP);
    535	if (IS_ERR(table->pf_pool))
    536		return PTR_ERR(table->pf_pool);
    537	if (!mlx5_sf_max_functions(dev))
    538		return 0;
    539	if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
    540		mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
    541		return 0;
    542	}
    543
    544	/* init sf_ctrl_pool */
    545	num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF);
    546	num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
    547					  MLX5_SFS_PER_CTRL_IRQ);
    548	num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
    549	num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
    550	table->sf_ctrl_pool = irq_pool_alloc(dev, pf_vec, num_sf_ctrl,
    551					     "mlx5_sf_ctrl",
    552					     MLX5_EQ_SHARE_IRQ_MIN_CTRL,
    553					     MLX5_EQ_SHARE_IRQ_MAX_CTRL);
    554	if (IS_ERR(table->sf_ctrl_pool)) {
    555		err = PTR_ERR(table->sf_ctrl_pool);
    556		goto err_pf;
    557	}
    558	/* init sf_comp_pool */
    559	table->sf_comp_pool = irq_pool_alloc(dev, pf_vec + num_sf_ctrl,
    560					     sf_vec - num_sf_ctrl, "mlx5_sf_comp",
    561					     MLX5_EQ_SHARE_IRQ_MIN_COMP,
    562					     MLX5_EQ_SHARE_IRQ_MAX_COMP);
    563	if (IS_ERR(table->sf_comp_pool)) {
    564		err = PTR_ERR(table->sf_comp_pool);
    565		goto err_sf_ctrl;
    566	}
    567
    568	table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
    569	if (!table->sf_comp_pool->irqs_per_cpu) {
    570		err = -ENOMEM;
    571		goto err_irqs_per_cpu;
    572	}
    573
    574	return 0;
    575
    576err_irqs_per_cpu:
    577	irq_pool_free(table->sf_comp_pool);
    578err_sf_ctrl:
    579	irq_pool_free(table->sf_ctrl_pool);
    580err_pf:
    581	irq_pool_free(table->pf_pool);
    582	return err;
    583}
    584
    585static void irq_pools_destroy(struct mlx5_irq_table *table)
    586{
    587	if (table->sf_ctrl_pool) {
    588		irq_pool_free(table->sf_comp_pool);
    589		irq_pool_free(table->sf_ctrl_pool);
    590	}
    591	irq_pool_free(table->pf_pool);
    592}
    593
    594/* irq_table API */
    595
    596int mlx5_irq_table_init(struct mlx5_core_dev *dev)
    597{
    598	struct mlx5_irq_table *irq_table;
    599
    600	if (mlx5_core_is_sf(dev))
    601		return 0;
    602
    603	irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL,
    604				  dev->priv.numa_node);
    605	if (!irq_table)
    606		return -ENOMEM;
    607
    608	dev->priv.irq_table = irq_table;
    609	return 0;
    610}
    611
    612void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
    613{
    614	if (mlx5_core_is_sf(dev))
    615		return;
    616
    617	kvfree(dev->priv.irq_table);
    618}
    619
    620int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
    621{
    622	if (!table->pf_pool->xa_num_irqs.max)
    623		return 1;
    624	return table->pf_pool->xa_num_irqs.max - table->pf_pool->xa_num_irqs.min;
    625}
    626
    627int mlx5_irq_table_create(struct mlx5_core_dev *dev)
    628{
    629	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
    630		      MLX5_CAP_GEN(dev, max_num_eqs) :
    631		      1 << MLX5_CAP_GEN(dev, log_max_eq);
    632	int total_vec;
    633	int pf_vec;
    634	int err;
    635
    636	if (mlx5_core_is_sf(dev))
    637		return 0;
    638
    639	pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
    640	pf_vec = min_t(int, pf_vec, num_eqs);
    641
    642	total_vec = pf_vec;
    643	if (mlx5_sf_max_functions(dev))
    644		total_vec += MLX5_IRQ_CTRL_SF_MAX +
    645			MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
    646
    647	total_vec = pci_alloc_irq_vectors(dev->pdev, 1, total_vec, PCI_IRQ_MSIX);
    648	if (total_vec < 0)
    649		return total_vec;
    650	pf_vec = min(pf_vec, total_vec);
    651
    652	err = irq_pools_init(dev, total_vec - pf_vec, pf_vec);
    653	if (err)
    654		pci_free_irq_vectors(dev->pdev);
    655
    656	return err;
    657}
    658
    659void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
    660{
    661	struct mlx5_irq_table *table = dev->priv.irq_table;
    662
    663	if (mlx5_core_is_sf(dev))
    664		return;
    665
    666	/* There are cases where IRQs still will be in used when we reaching
    667	 * to here. Hence, making sure all the irqs are released.
    668	 */
    669	irq_pools_destroy(table);
    670	pci_free_irq_vectors(dev->pdev);
    671}
    672
    673int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
    674{
    675	if (table->sf_comp_pool)
    676		return min_t(int, num_online_cpus(),
    677			     table->sf_comp_pool->xa_num_irqs.max -
    678			     table->sf_comp_pool->xa_num_irqs.min + 1);
    679	else
    680		return mlx5_irq_table_get_num_comp(table);
    681}
    682
    683struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
    684{
    685#ifdef CONFIG_MLX5_SF
    686	if (mlx5_core_is_sf(dev))
    687		return dev->priv.parent_mdev->priv.irq_table;
    688#endif
    689	return dev->priv.irq_table;
    690}