cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

main.c (50225B)


      1/*
      2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
      3 *
      4 * This software is available to you under a choice of one of two
      5 * licenses.  You may choose to be licensed under the terms of the GNU
      6 * General Public License (GPL) Version 2, available from the file
      7 * COPYING in the main directory of this source tree, or the
      8 * OpenIB.org BSD license below:
      9 *
     10 *     Redistribution and use in source and binary forms, with or
     11 *     without modification, are permitted provided that the following
     12 *     conditions are met:
     13 *
     14 *      - Redistributions of source code must retain the above
     15 *        copyright notice, this list of conditions and the following
     16 *        disclaimer.
     17 *
     18 *      - Redistributions in binary form must reproduce the above
     19 *        copyright notice, this list of conditions and the following
     20 *        disclaimer in the documentation and/or other materials
     21 *        provided with the distribution.
     22 *
     23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     30 * SOFTWARE.
     31 */
     32
     33#include <linux/highmem.h>
     34#include <linux/module.h>
     35#include <linux/init.h>
     36#include <linux/errno.h>
     37#include <linux/pci.h>
     38#include <linux/dma-mapping.h>
     39#include <linux/slab.h>
     40#include <linux/io-mapping.h>
     41#include <linux/interrupt.h>
     42#include <linux/delay.h>
     43#include <linux/mlx5/driver.h>
     44#include <linux/mlx5/cq.h>
     45#include <linux/mlx5/qp.h>
     46#include <linux/debugfs.h>
     47#include <linux/kmod.h>
     48#include <linux/mlx5/mlx5_ifc.h>
     49#include <linux/mlx5/vport.h>
     50#ifdef CONFIG_RFS_ACCEL
     51#include <linux/cpu_rmap.h>
     52#endif
     53#include <linux/version.h>
     54#include <net/devlink.h>
     55#include "mlx5_core.h"
     56#include "lib/eq.h"
     57#include "fs_core.h"
     58#include "lib/mpfs.h"
     59#include "eswitch.h"
     60#include "devlink.h"
     61#include "fw_reset.h"
     62#include "lib/mlx5.h"
     63#include "lib/tout.h"
     64#include "fpga/core.h"
     65#include "en_accel/ipsec.h"
     66#include "lib/clock.h"
     67#include "lib/vxlan.h"
     68#include "lib/geneve.h"
     69#include "lib/devcom.h"
     70#include "lib/pci_vsc.h"
     71#include "diag/fw_tracer.h"
     72#include "ecpf.h"
     73#include "lib/hv_vhca.h"
     74#include "diag/rsc_dump.h"
     75#include "sf/vhca_event.h"
     76#include "sf/dev/dev.h"
     77#include "sf/sf.h"
     78#include "mlx5_irq.h"
     79
     80MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
     81MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
     82MODULE_LICENSE("Dual BSD/GPL");
     83
     84unsigned int mlx5_core_debug_mask;
     85module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644);
     86MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
     87
     88static unsigned int prof_sel = MLX5_DEFAULT_PROF;
     89module_param_named(prof_sel, prof_sel, uint, 0444);
     90MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
     91
     92static u32 sw_owner_id[4];
     93
     94enum {
     95	MLX5_ATOMIC_REQ_MODE_BE = 0x0,
     96	MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
     97};
     98
     99#define LOG_MAX_SUPPORTED_QPS 0xff
    100
    101static struct mlx5_profile profile[] = {
    102	[0] = {
    103		.mask           = 0,
    104	},
    105	[1] = {
    106		.mask		= MLX5_PROF_MASK_QP_SIZE,
    107		.log_max_qp	= 12,
    108	},
    109	[2] = {
    110		.mask		= MLX5_PROF_MASK_QP_SIZE |
    111				  MLX5_PROF_MASK_MR_CACHE,
    112		.log_max_qp	= LOG_MAX_SUPPORTED_QPS,
    113		.mr_cache[0]	= {
    114			.size	= 500,
    115			.limit	= 250
    116		},
    117		.mr_cache[1]	= {
    118			.size	= 500,
    119			.limit	= 250
    120		},
    121		.mr_cache[2]	= {
    122			.size	= 500,
    123			.limit	= 250
    124		},
    125		.mr_cache[3]	= {
    126			.size	= 500,
    127			.limit	= 250
    128		},
    129		.mr_cache[4]	= {
    130			.size	= 500,
    131			.limit	= 250
    132		},
    133		.mr_cache[5]	= {
    134			.size	= 500,
    135			.limit	= 250
    136		},
    137		.mr_cache[6]	= {
    138			.size	= 500,
    139			.limit	= 250
    140		},
    141		.mr_cache[7]	= {
    142			.size	= 500,
    143			.limit	= 250
    144		},
    145		.mr_cache[8]	= {
    146			.size	= 500,
    147			.limit	= 250
    148		},
    149		.mr_cache[9]	= {
    150			.size	= 500,
    151			.limit	= 250
    152		},
    153		.mr_cache[10]	= {
    154			.size	= 500,
    155			.limit	= 250
    156		},
    157		.mr_cache[11]	= {
    158			.size	= 500,
    159			.limit	= 250
    160		},
    161		.mr_cache[12]	= {
    162			.size	= 64,
    163			.limit	= 32
    164		},
    165		.mr_cache[13]	= {
    166			.size	= 32,
    167			.limit	= 16
    168		},
    169		.mr_cache[14]	= {
    170			.size	= 16,
    171			.limit	= 8
    172		},
    173		.mr_cache[15]	= {
    174			.size	= 8,
    175			.limit	= 4
    176		},
    177	},
    178};
    179
    180static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
    181			u32 warn_time_mili)
    182{
    183	unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili);
    184	unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
    185	u32 fw_initializing;
    186	int err = 0;
    187
    188	do {
    189		fw_initializing = ioread32be(&dev->iseg->initializing);
    190		if (!(fw_initializing >> 31))
    191			break;
    192		if (time_after(jiffies, end) ||
    193		    test_and_clear_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
    194			err = -EBUSY;
    195			break;
    196		}
    197		if (warn_time_mili && time_after(jiffies, warn)) {
    198			mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds (0x%x)\n",
    199				       jiffies_to_msecs(end - warn) / 1000, fw_initializing);
    200			warn = jiffies + msecs_to_jiffies(warn_time_mili);
    201		}
    202		msleep(mlx5_tout_ms(dev, FW_PRE_INIT_WAIT));
    203	} while (true);
    204
    205	return err;
    206}
    207
    208static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
    209{
    210	int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in,
    211					      driver_version);
    212	u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {};
    213	int remaining_size = driver_ver_sz;
    214	char *string;
    215
    216	if (!MLX5_CAP_GEN(dev, driver_version))
    217		return;
    218
    219	string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version);
    220
    221	strncpy(string, "Linux", remaining_size);
    222
    223	remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
    224	strncat(string, ",", remaining_size);
    225
    226	remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
    227	strncat(string, KBUILD_MODNAME, remaining_size);
    228
    229	remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
    230	strncat(string, ",", remaining_size);
    231
    232	remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
    233
    234	snprintf(string + strlen(string), remaining_size, "%u.%u.%u",
    235		LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL,
    236		LINUX_VERSION_SUBLEVEL);
    237
    238	/*Send the command*/
    239	MLX5_SET(set_driver_version_in, in, opcode,
    240		 MLX5_CMD_OP_SET_DRIVER_VERSION);
    241
    242	mlx5_cmd_exec_in(dev, set_driver_version, in);
    243}
    244
    245static int set_dma_caps(struct pci_dev *pdev)
    246{
    247	int err;
    248
    249	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
    250	if (err) {
    251		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
    252		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
    253		if (err) {
    254			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
    255			return err;
    256		}
    257	}
    258
    259	dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
    260	return err;
    261}
    262
    263static int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
    264{
    265	struct pci_dev *pdev = dev->pdev;
    266	int err = 0;
    267
    268	mutex_lock(&dev->pci_status_mutex);
    269	if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
    270		err = pci_enable_device(pdev);
    271		if (!err)
    272			dev->pci_status = MLX5_PCI_STATUS_ENABLED;
    273	}
    274	mutex_unlock(&dev->pci_status_mutex);
    275
    276	return err;
    277}
    278
    279static void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
    280{
    281	struct pci_dev *pdev = dev->pdev;
    282
    283	mutex_lock(&dev->pci_status_mutex);
    284	if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
    285		pci_disable_device(pdev);
    286		dev->pci_status = MLX5_PCI_STATUS_DISABLED;
    287	}
    288	mutex_unlock(&dev->pci_status_mutex);
    289}
    290
    291static int request_bar(struct pci_dev *pdev)
    292{
    293	int err = 0;
    294
    295	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
    296		dev_err(&pdev->dev, "Missing registers BAR, aborting\n");
    297		return -ENODEV;
    298	}
    299
    300	err = pci_request_regions(pdev, KBUILD_MODNAME);
    301	if (err)
    302		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
    303
    304	return err;
    305}
    306
    307static void release_bar(struct pci_dev *pdev)
    308{
    309	pci_release_regions(pdev);
    310}
    311
    312struct mlx5_reg_host_endianness {
    313	u8	he;
    314	u8      rsvd[15];
    315};
    316
    317#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
    318
    319enum {
    320	MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
    321				MLX5_DEV_CAP_FLAG_DCT,
    322};
    323
    324static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
    325{
    326	switch (size) {
    327	case 128:
    328		return 0;
    329	case 256:
    330		return 1;
    331	case 512:
    332		return 2;
    333	case 1024:
    334		return 3;
    335	case 2048:
    336		return 4;
    337	case 4096:
    338		return 5;
    339	default:
    340		mlx5_core_warn(dev, "invalid pkey table size %d\n", size);
    341		return 0;
    342	}
    343}
    344
    345static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
    346				   enum mlx5_cap_type cap_type,
    347				   enum mlx5_cap_mode cap_mode)
    348{
    349	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
    350	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
    351	void *out, *hca_caps;
    352	u16 opmod = (cap_type << 1) | (cap_mode & 0x01);
    353	int err;
    354
    355	memset(in, 0, sizeof(in));
    356	out = kzalloc(out_sz, GFP_KERNEL);
    357	if (!out)
    358		return -ENOMEM;
    359
    360	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
    361	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
    362	err = mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
    363	if (err) {
    364		mlx5_core_warn(dev,
    365			       "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
    366			       cap_type, cap_mode, err);
    367		goto query_ex;
    368	}
    369
    370	hca_caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
    371
    372	switch (cap_mode) {
    373	case HCA_CAP_OPMOD_GET_MAX:
    374		memcpy(dev->caps.hca[cap_type]->max, hca_caps,
    375		       MLX5_UN_SZ_BYTES(hca_cap_union));
    376		break;
    377	case HCA_CAP_OPMOD_GET_CUR:
    378		memcpy(dev->caps.hca[cap_type]->cur, hca_caps,
    379		       MLX5_UN_SZ_BYTES(hca_cap_union));
    380		break;
    381	default:
    382		mlx5_core_warn(dev,
    383			       "Tried to query dev cap type(%x) with wrong opmode(%x)\n",
    384			       cap_type, cap_mode);
    385		err = -EINVAL;
    386		break;
    387	}
    388query_ex:
    389	kfree(out);
    390	return err;
    391}
    392
    393int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
    394{
    395	int ret;
    396
    397	ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
    398	if (ret)
    399		return ret;
    400	return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
    401}
    402
    403static int set_caps(struct mlx5_core_dev *dev, void *in, int opmod)
    404{
    405	MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
    406	MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
    407	return mlx5_cmd_exec_in(dev, set_hca_cap, in);
    408}
    409
    410static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void *set_ctx)
    411{
    412	void *set_hca_cap;
    413	int req_endianness;
    414	int err;
    415
    416	if (!MLX5_CAP_GEN(dev, atomic))
    417		return 0;
    418
    419	err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
    420	if (err)
    421		return err;
    422
    423	req_endianness =
    424		MLX5_CAP_ATOMIC(dev,
    425				supported_atomic_req_8B_endianness_mode_1);
    426
    427	if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
    428		return 0;
    429
    430	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
    431
    432	/* Set requestor to host endianness */
    433	MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode,
    434		 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
    435
    436	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
    437}
    438
    439static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
    440{
    441	void *set_hca_cap;
    442	bool do_set = false;
    443	int err;
    444
    445	if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) ||
    446	    !MLX5_CAP_GEN(dev, pg))
    447		return 0;
    448
    449	err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
    450	if (err)
    451		return err;
    452
    453	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
    454	memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ODP]->cur,
    455	       MLX5_ST_SZ_BYTES(odp_cap));
    456
    457#define ODP_CAP_SET_MAX(dev, field)                                            \
    458	do {                                                                   \
    459		u32 _res = MLX5_CAP_ODP_MAX(dev, field);                       \
    460		if (_res) {                                                    \
    461			do_set = true;                                         \
    462			MLX5_SET(odp_cap, set_hca_cap, field, _res);           \
    463		}                                                              \
    464	} while (0)
    465
    466	ODP_CAP_SET_MAX(dev, ud_odp_caps.srq_receive);
    467	ODP_CAP_SET_MAX(dev, rc_odp_caps.srq_receive);
    468	ODP_CAP_SET_MAX(dev, xrc_odp_caps.srq_receive);
    469	ODP_CAP_SET_MAX(dev, xrc_odp_caps.send);
    470	ODP_CAP_SET_MAX(dev, xrc_odp_caps.receive);
    471	ODP_CAP_SET_MAX(dev, xrc_odp_caps.write);
    472	ODP_CAP_SET_MAX(dev, xrc_odp_caps.read);
    473	ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic);
    474	ODP_CAP_SET_MAX(dev, dc_odp_caps.srq_receive);
    475	ODP_CAP_SET_MAX(dev, dc_odp_caps.send);
    476	ODP_CAP_SET_MAX(dev, dc_odp_caps.receive);
    477	ODP_CAP_SET_MAX(dev, dc_odp_caps.write);
    478	ODP_CAP_SET_MAX(dev, dc_odp_caps.read);
    479	ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic);
    480
    481	if (!do_set)
    482		return 0;
    483
    484	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP);
    485}
    486
    487static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev)
    488{
    489	struct devlink *devlink = priv_to_devlink(dev);
    490	union devlink_param_value val;
    491	int err;
    492
    493	err = devlink_param_driverinit_value_get(devlink,
    494						 DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
    495						 &val);
    496	if (!err)
    497		return val.vu32;
    498	mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err);
    499	return err;
    500}
    501
    502static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
    503{
    504	struct mlx5_profile *prof = &dev->profile;
    505	void *set_hca_cap;
    506	int max_uc_list;
    507	int err;
    508
    509	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
    510	if (err)
    511		return err;
    512
    513	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
    514				   capability);
    515	memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL]->cur,
    516	       MLX5_ST_SZ_BYTES(cmd_hca_cap));
    517
    518	mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
    519		      mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)),
    520		      128);
    521	/* we limit the size of the pkey table to 128 entries for now */
    522	MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
    523		 to_fw_pkey_sz(dev, 128));
    524
    525	/* Check log_max_qp from HCA caps to set in current profile */
    526	if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) {
    527		prof->log_max_qp = min_t(u8, 17, MLX5_CAP_GEN_MAX(dev, log_max_qp));
    528	} else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) {
    529		mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
    530			       prof->log_max_qp,
    531			       MLX5_CAP_GEN_MAX(dev, log_max_qp));
    532		prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
    533	}
    534	if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
    535		MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
    536			 prof->log_max_qp);
    537
    538	/* disable cmdif checksum */
    539	MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
    540
    541	/* Enable 4K UAR only when HCA supports it and page size is bigger
    542	 * than 4K.
    543	 */
    544	if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096)
    545		MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1);
    546
    547	MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
    548
    549	if (MLX5_CAP_GEN_MAX(dev, cache_line_128byte))
    550		MLX5_SET(cmd_hca_cap,
    551			 set_hca_cap,
    552			 cache_line_128byte,
    553			 cache_line_size() >= 128 ? 1 : 0);
    554
    555	if (MLX5_CAP_GEN_MAX(dev, dct))
    556		MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1);
    557
    558	if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_event))
    559		MLX5_SET(cmd_hca_cap, set_hca_cap, pci_sync_for_fw_update_event, 1);
    560
    561	if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
    562		MLX5_SET(cmd_hca_cap,
    563			 set_hca_cap,
    564			 num_vhca_ports,
    565			 MLX5_CAP_GEN_MAX(dev, num_vhca_ports));
    566
    567	if (MLX5_CAP_GEN_MAX(dev, release_all_pages))
    568		MLX5_SET(cmd_hca_cap, set_hca_cap, release_all_pages, 1);
    569
    570	if (MLX5_CAP_GEN_MAX(dev, mkey_by_name))
    571		MLX5_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1);
    572
    573	mlx5_vhca_state_cap_handle(dev, set_hca_cap);
    574
    575	if (MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix))
    576		MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix,
    577			 MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix));
    578
    579	if (MLX5_CAP_GEN(dev, roce_rw_supported))
    580		MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev));
    581
    582	max_uc_list = max_uc_list_get_devlink_param(dev);
    583	if (max_uc_list > 0)
    584		MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_current_uc_list,
    585			 ilog2(max_uc_list));
    586
    587	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
    588}
    589
    590/* Cached MLX5_CAP_GEN(dev, roce) can be out of sync this early in the
    591 * boot process.
    592 * In case RoCE cap is writable in FW and user/devlink requested to change the
    593 * cap, we are yet to query the final state of the above cap.
    594 * Hence, the need for this function.
    595 *
    596 * Returns
    597 * True:
    598 * 1) RoCE cap is read only in FW and already disabled
    599 * OR:
    600 * 2) RoCE cap is writable in FW and user/devlink requested it off.
    601 *
    602 * In any other case, return False.
    603 */
    604static bool is_roce_fw_disabled(struct mlx5_core_dev *dev)
    605{
    606	return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_init_enabled(dev)) ||
    607		(!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce));
    608}
    609
    610static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx)
    611{
    612	void *set_hca_cap;
    613	int err;
    614
    615	if (is_roce_fw_disabled(dev))
    616		return 0;
    617
    618	err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE);
    619	if (err)
    620		return err;
    621
    622	if (MLX5_CAP_ROCE(dev, sw_r_roce_src_udp_port) ||
    623	    !MLX5_CAP_ROCE_MAX(dev, sw_r_roce_src_udp_port))
    624		return 0;
    625
    626	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
    627	memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ROCE]->cur,
    628	       MLX5_ST_SZ_BYTES(roce_cap));
    629	MLX5_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1);
    630
    631	err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ROCE);
    632	return err;
    633}
    634
    635static int set_hca_cap(struct mlx5_core_dev *dev)
    636{
    637	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
    638	void *set_ctx;
    639	int err;
    640
    641	set_ctx = kzalloc(set_sz, GFP_KERNEL);
    642	if (!set_ctx)
    643		return -ENOMEM;
    644
    645	err = handle_hca_cap(dev, set_ctx);
    646	if (err) {
    647		mlx5_core_err(dev, "handle_hca_cap failed\n");
    648		goto out;
    649	}
    650
    651	memset(set_ctx, 0, set_sz);
    652	err = handle_hca_cap_atomic(dev, set_ctx);
    653	if (err) {
    654		mlx5_core_err(dev, "handle_hca_cap_atomic failed\n");
    655		goto out;
    656	}
    657
    658	memset(set_ctx, 0, set_sz);
    659	err = handle_hca_cap_odp(dev, set_ctx);
    660	if (err) {
    661		mlx5_core_err(dev, "handle_hca_cap_odp failed\n");
    662		goto out;
    663	}
    664
    665	memset(set_ctx, 0, set_sz);
    666	err = handle_hca_cap_roce(dev, set_ctx);
    667	if (err) {
    668		mlx5_core_err(dev, "handle_hca_cap_roce failed\n");
    669		goto out;
    670	}
    671
    672out:
    673	kfree(set_ctx);
    674	return err;
    675}
    676
    677static int set_hca_ctrl(struct mlx5_core_dev *dev)
    678{
    679	struct mlx5_reg_host_endianness he_in;
    680	struct mlx5_reg_host_endianness he_out;
    681	int err;
    682
    683	if (!mlx5_core_is_pf(dev))
    684		return 0;
    685
    686	memset(&he_in, 0, sizeof(he_in));
    687	he_in.he = MLX5_SET_HOST_ENDIANNESS;
    688	err = mlx5_core_access_reg(dev, &he_in,  sizeof(he_in),
    689					&he_out, sizeof(he_out),
    690					MLX5_REG_HOST_ENDIANNESS, 0, 1);
    691	return err;
    692}
    693
    694static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev)
    695{
    696	int ret = 0;
    697
    698	/* Disable local_lb by default */
    699	if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
    700		ret = mlx5_nic_vport_update_local_lb(dev, false);
    701
    702	return ret;
    703}
    704
    705int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
    706{
    707	u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {};
    708
    709	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
    710	MLX5_SET(enable_hca_in, in, function_id, func_id);
    711	MLX5_SET(enable_hca_in, in, embedded_cpu_function,
    712		 dev->caps.embedded_cpu);
    713	return mlx5_cmd_exec_in(dev, enable_hca, in);
    714}
    715
    716int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
    717{
    718	u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {};
    719
    720	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
    721	MLX5_SET(disable_hca_in, in, function_id, func_id);
    722	MLX5_SET(enable_hca_in, in, embedded_cpu_function,
    723		 dev->caps.embedded_cpu);
    724	return mlx5_cmd_exec_in(dev, disable_hca, in);
    725}
    726
    727static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
    728{
    729	u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {};
    730	u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {};
    731	u32 sup_issi;
    732	int err;
    733
    734	MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
    735	err = mlx5_cmd_exec_inout(dev, query_issi, query_in, query_out);
    736	if (err) {
    737		u32 syndrome = MLX5_GET(query_issi_out, query_out, syndrome);
    738		u8 status = MLX5_GET(query_issi_out, query_out, status);
    739
    740		if (!status || syndrome == MLX5_DRIVER_SYND) {
    741			mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n",
    742				      err, status, syndrome);
    743			return err;
    744		}
    745
    746		mlx5_core_warn(dev, "Query ISSI is not supported by FW, ISSI is 0\n");
    747		dev->issi = 0;
    748		return 0;
    749	}
    750
    751	sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
    752
    753	if (sup_issi & (1 << 1)) {
    754		u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {};
    755
    756		MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
    757		MLX5_SET(set_issi_in, set_in, current_issi, 1);
    758		err = mlx5_cmd_exec_in(dev, set_issi, set_in);
    759		if (err) {
    760			mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n",
    761				      err);
    762			return err;
    763		}
    764
    765		dev->issi = 1;
    766
    767		return 0;
    768	} else if (sup_issi & (1 << 0) || !sup_issi) {
    769		return 0;
    770	}
    771
    772	return -EOPNOTSUPP;
    773}
    774
    775static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
    776			 const struct pci_device_id *id)
    777{
    778	int err = 0;
    779
    780	mutex_init(&dev->pci_status_mutex);
    781	pci_set_drvdata(dev->pdev, dev);
    782
    783	dev->bar_addr = pci_resource_start(pdev, 0);
    784
    785	err = mlx5_pci_enable_device(dev);
    786	if (err) {
    787		mlx5_core_err(dev, "Cannot enable PCI device, aborting\n");
    788		return err;
    789	}
    790
    791	err = request_bar(pdev);
    792	if (err) {
    793		mlx5_core_err(dev, "error requesting BARs, aborting\n");
    794		goto err_disable;
    795	}
    796
    797	pci_set_master(pdev);
    798
    799	err = set_dma_caps(pdev);
    800	if (err) {
    801		mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n");
    802		goto err_clr_master;
    803	}
    804
    805	if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) &&
    806	    pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) &&
    807	    pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128))
    808		mlx5_core_dbg(dev, "Enabling pci atomics failed\n");
    809
    810	dev->iseg_base = dev->bar_addr;
    811	dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
    812	if (!dev->iseg) {
    813		err = -ENOMEM;
    814		mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n");
    815		goto err_clr_master;
    816	}
    817
    818	mlx5_pci_vsc_init(dev);
    819	dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
    820	return 0;
    821
    822err_clr_master:
    823	pci_clear_master(dev->pdev);
    824	release_bar(dev->pdev);
    825err_disable:
    826	mlx5_pci_disable_device(dev);
    827	return err;
    828}
    829
    830static void mlx5_pci_close(struct mlx5_core_dev *dev)
    831{
    832	/* health work might still be active, and it needs pci bar in
    833	 * order to know the NIC state. Therefore, drain the health WQ
    834	 * before removing the pci bars
    835	 */
    836	mlx5_drain_health_wq(dev);
    837	iounmap(dev->iseg);
    838	pci_clear_master(dev->pdev);
    839	release_bar(dev->pdev);
    840	mlx5_pci_disable_device(dev);
    841}
    842
    843static int mlx5_init_once(struct mlx5_core_dev *dev)
    844{
    845	int err;
    846
    847	dev->priv.devcom = mlx5_devcom_register_device(dev);
    848	if (IS_ERR(dev->priv.devcom))
    849		mlx5_core_err(dev, "failed to register with devcom (0x%p)\n",
    850			      dev->priv.devcom);
    851
    852	err = mlx5_query_board_id(dev);
    853	if (err) {
    854		mlx5_core_err(dev, "query board id failed\n");
    855		goto err_devcom;
    856	}
    857
    858	err = mlx5_irq_table_init(dev);
    859	if (err) {
    860		mlx5_core_err(dev, "failed to initialize irq table\n");
    861		goto err_devcom;
    862	}
    863
    864	err = mlx5_eq_table_init(dev);
    865	if (err) {
    866		mlx5_core_err(dev, "failed to initialize eq\n");
    867		goto err_irq_cleanup;
    868	}
    869
    870	err = mlx5_events_init(dev);
    871	if (err) {
    872		mlx5_core_err(dev, "failed to initialize events\n");
    873		goto err_eq_cleanup;
    874	}
    875
    876	err = mlx5_fw_reset_init(dev);
    877	if (err) {
    878		mlx5_core_err(dev, "failed to initialize fw reset events\n");
    879		goto err_events_cleanup;
    880	}
    881
    882	mlx5_cq_debugfs_init(dev);
    883
    884	mlx5_init_reserved_gids(dev);
    885
    886	mlx5_init_clock(dev);
    887
    888	dev->vxlan = mlx5_vxlan_create(dev);
    889	dev->geneve = mlx5_geneve_create(dev);
    890
    891	err = mlx5_init_rl_table(dev);
    892	if (err) {
    893		mlx5_core_err(dev, "Failed to init rate limiting\n");
    894		goto err_tables_cleanup;
    895	}
    896
    897	err = mlx5_mpfs_init(dev);
    898	if (err) {
    899		mlx5_core_err(dev, "Failed to init l2 table %d\n", err);
    900		goto err_rl_cleanup;
    901	}
    902
    903	err = mlx5_sriov_init(dev);
    904	if (err) {
    905		mlx5_core_err(dev, "Failed to init sriov %d\n", err);
    906		goto err_mpfs_cleanup;
    907	}
    908
    909	err = mlx5_eswitch_init(dev);
    910	if (err) {
    911		mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
    912		goto err_sriov_cleanup;
    913	}
    914
    915	err = mlx5_fpga_init(dev);
    916	if (err) {
    917		mlx5_core_err(dev, "Failed to init fpga device %d\n", err);
    918		goto err_eswitch_cleanup;
    919	}
    920
    921	err = mlx5_vhca_event_init(dev);
    922	if (err) {
    923		mlx5_core_err(dev, "Failed to init vhca event notifier %d\n", err);
    924		goto err_fpga_cleanup;
    925	}
    926
    927	err = mlx5_sf_hw_table_init(dev);
    928	if (err) {
    929		mlx5_core_err(dev, "Failed to init SF HW table %d\n", err);
    930		goto err_sf_hw_table_cleanup;
    931	}
    932
    933	err = mlx5_sf_table_init(dev);
    934	if (err) {
    935		mlx5_core_err(dev, "Failed to init SF table %d\n", err);
    936		goto err_sf_table_cleanup;
    937	}
    938
    939	err = mlx5_fs_core_alloc(dev);
    940	if (err) {
    941		mlx5_core_err(dev, "Failed to alloc flow steering\n");
    942		goto err_fs;
    943	}
    944
    945	dev->dm = mlx5_dm_create(dev);
    946	if (IS_ERR(dev->dm))
    947		mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
    948
    949	dev->tracer = mlx5_fw_tracer_create(dev);
    950	dev->hv_vhca = mlx5_hv_vhca_create(dev);
    951	dev->rsc_dump = mlx5_rsc_dump_create(dev);
    952
    953	return 0;
    954
    955err_fs:
    956	mlx5_sf_table_cleanup(dev);
    957err_sf_table_cleanup:
    958	mlx5_sf_hw_table_cleanup(dev);
    959err_sf_hw_table_cleanup:
    960	mlx5_vhca_event_cleanup(dev);
    961err_fpga_cleanup:
    962	mlx5_fpga_cleanup(dev);
    963err_eswitch_cleanup:
    964	mlx5_eswitch_cleanup(dev->priv.eswitch);
    965err_sriov_cleanup:
    966	mlx5_sriov_cleanup(dev);
    967err_mpfs_cleanup:
    968	mlx5_mpfs_cleanup(dev);
    969err_rl_cleanup:
    970	mlx5_cleanup_rl_table(dev);
    971err_tables_cleanup:
    972	mlx5_geneve_destroy(dev->geneve);
    973	mlx5_vxlan_destroy(dev->vxlan);
    974	mlx5_cq_debugfs_cleanup(dev);
    975	mlx5_fw_reset_cleanup(dev);
    976err_events_cleanup:
    977	mlx5_events_cleanup(dev);
    978err_eq_cleanup:
    979	mlx5_eq_table_cleanup(dev);
    980err_irq_cleanup:
    981	mlx5_irq_table_cleanup(dev);
    982err_devcom:
    983	mlx5_devcom_unregister_device(dev->priv.devcom);
    984
    985	return err;
    986}
    987
    988static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
    989{
    990	mlx5_rsc_dump_destroy(dev);
    991	mlx5_hv_vhca_destroy(dev->hv_vhca);
    992	mlx5_fw_tracer_destroy(dev->tracer);
    993	mlx5_dm_cleanup(dev);
    994	mlx5_fs_core_free(dev);
    995	mlx5_sf_table_cleanup(dev);
    996	mlx5_sf_hw_table_cleanup(dev);
    997	mlx5_vhca_event_cleanup(dev);
    998	mlx5_fpga_cleanup(dev);
    999	mlx5_eswitch_cleanup(dev->priv.eswitch);
   1000	mlx5_sriov_cleanup(dev);
   1001	mlx5_mpfs_cleanup(dev);
   1002	mlx5_cleanup_rl_table(dev);
   1003	mlx5_geneve_destroy(dev->geneve);
   1004	mlx5_vxlan_destroy(dev->vxlan);
   1005	mlx5_cleanup_clock(dev);
   1006	mlx5_cleanup_reserved_gids(dev);
   1007	mlx5_cq_debugfs_cleanup(dev);
   1008	mlx5_fw_reset_cleanup(dev);
   1009	mlx5_events_cleanup(dev);
   1010	mlx5_eq_table_cleanup(dev);
   1011	mlx5_irq_table_cleanup(dev);
   1012	mlx5_devcom_unregister_device(dev->priv.devcom);
   1013}
   1014
   1015static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout)
   1016{
   1017	int err;
   1018
   1019	mlx5_core_info(dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
   1020		       fw_rev_min(dev), fw_rev_sub(dev));
   1021
   1022	/* Only PFs hold the relevant PCIe information for this query */
   1023	if (mlx5_core_is_pf(dev))
   1024		pcie_print_link_status(dev->pdev);
   1025
   1026	mlx5_tout_set_def_val(dev);
   1027
   1028	/* wait for firmware to accept initialization segments configurations
   1029	 */
   1030	err = wait_fw_init(dev, timeout,
   1031			   mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL));
   1032	if (err) {
   1033		mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n",
   1034			      timeout);
   1035		return err;
   1036	}
   1037
   1038	err = mlx5_cmd_init(dev);
   1039	if (err) {
   1040		mlx5_core_err(dev, "Failed initializing command interface, aborting\n");
   1041		return err;
   1042	}
   1043
   1044	mlx5_tout_query_iseg(dev);
   1045
   1046	err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_INIT), 0);
   1047	if (err) {
   1048		mlx5_core_err(dev, "Firmware over %llu MS in initializing state, aborting\n",
   1049			      mlx5_tout_ms(dev, FW_INIT));
   1050		goto err_cmd_cleanup;
   1051	}
   1052
   1053	mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP);
   1054
   1055	err = mlx5_core_enable_hca(dev, 0);
   1056	if (err) {
   1057		mlx5_core_err(dev, "enable hca failed\n");
   1058		goto err_cmd_cleanup;
   1059	}
   1060
   1061	err = mlx5_core_set_issi(dev);
   1062	if (err) {
   1063		mlx5_core_err(dev, "failed to set issi\n");
   1064		goto err_disable_hca;
   1065	}
   1066
   1067	err = mlx5_satisfy_startup_pages(dev, 1);
   1068	if (err) {
   1069		mlx5_core_err(dev, "failed to allocate boot pages\n");
   1070		goto err_disable_hca;
   1071	}
   1072
   1073	err = mlx5_tout_query_dtor(dev);
   1074	if (err) {
   1075		mlx5_core_err(dev, "failed to read dtor\n");
   1076		goto reclaim_boot_pages;
   1077	}
   1078
   1079	err = set_hca_ctrl(dev);
   1080	if (err) {
   1081		mlx5_core_err(dev, "set_hca_ctrl failed\n");
   1082		goto reclaim_boot_pages;
   1083	}
   1084
   1085	err = set_hca_cap(dev);
   1086	if (err) {
   1087		mlx5_core_err(dev, "set_hca_cap failed\n");
   1088		goto reclaim_boot_pages;
   1089	}
   1090
   1091	err = mlx5_satisfy_startup_pages(dev, 0);
   1092	if (err) {
   1093		mlx5_core_err(dev, "failed to allocate init pages\n");
   1094		goto reclaim_boot_pages;
   1095	}
   1096
   1097	err = mlx5_cmd_init_hca(dev, sw_owner_id);
   1098	if (err) {
   1099		mlx5_core_err(dev, "init hca failed\n");
   1100		goto reclaim_boot_pages;
   1101	}
   1102
   1103	mlx5_set_driver_version(dev);
   1104
   1105	err = mlx5_query_hca_caps(dev);
   1106	if (err) {
   1107		mlx5_core_err(dev, "query hca failed\n");
   1108		goto reclaim_boot_pages;
   1109	}
   1110
   1111	mlx5_start_health_poll(dev);
   1112
   1113	return 0;
   1114
   1115reclaim_boot_pages:
   1116	mlx5_reclaim_startup_pages(dev);
   1117err_disable_hca:
   1118	mlx5_core_disable_hca(dev, 0);
   1119err_cmd_cleanup:
   1120	mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
   1121	mlx5_cmd_cleanup(dev);
   1122
   1123	return err;
   1124}
   1125
   1126static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
   1127{
   1128	int err;
   1129
   1130	mlx5_stop_health_poll(dev, boot);
   1131	err = mlx5_cmd_teardown_hca(dev);
   1132	if (err) {
   1133		mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
   1134		return err;
   1135	}
   1136	mlx5_reclaim_startup_pages(dev);
   1137	mlx5_core_disable_hca(dev, 0);
   1138	mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
   1139	mlx5_cmd_cleanup(dev);
   1140
   1141	return 0;
   1142}
   1143
   1144static int mlx5_load(struct mlx5_core_dev *dev)
   1145{
   1146	int err;
   1147
   1148	dev->priv.uar = mlx5_get_uars_page(dev);
   1149	if (IS_ERR(dev->priv.uar)) {
   1150		mlx5_core_err(dev, "Failed allocating uar, aborting\n");
   1151		err = PTR_ERR(dev->priv.uar);
   1152		return err;
   1153	}
   1154
   1155	mlx5_events_start(dev);
   1156	mlx5_pagealloc_start(dev);
   1157
   1158	err = mlx5_irq_table_create(dev);
   1159	if (err) {
   1160		mlx5_core_err(dev, "Failed to alloc IRQs\n");
   1161		goto err_irq_table;
   1162	}
   1163
   1164	err = mlx5_eq_table_create(dev);
   1165	if (err) {
   1166		mlx5_core_err(dev, "Failed to create EQs\n");
   1167		goto err_eq_table;
   1168	}
   1169
   1170	err = mlx5_fw_tracer_init(dev->tracer);
   1171	if (err) {
   1172		mlx5_core_err(dev, "Failed to init FW tracer %d\n", err);
   1173		mlx5_fw_tracer_destroy(dev->tracer);
   1174		dev->tracer = NULL;
   1175	}
   1176
   1177	mlx5_fw_reset_events_start(dev);
   1178	mlx5_hv_vhca_init(dev->hv_vhca);
   1179
   1180	err = mlx5_rsc_dump_init(dev);
   1181	if (err) {
   1182		mlx5_core_err(dev, "Failed to init Resource dump %d\n", err);
   1183		mlx5_rsc_dump_destroy(dev);
   1184		dev->rsc_dump = NULL;
   1185	}
   1186
   1187	err = mlx5_fpga_device_start(dev);
   1188	if (err) {
   1189		mlx5_core_err(dev, "fpga device start failed %d\n", err);
   1190		goto err_fpga_start;
   1191	}
   1192
   1193	err = mlx5_fs_core_init(dev);
   1194	if (err) {
   1195		mlx5_core_err(dev, "Failed to init flow steering\n");
   1196		goto err_fs;
   1197	}
   1198
   1199	err = mlx5_core_set_hca_defaults(dev);
   1200	if (err) {
   1201		mlx5_core_err(dev, "Failed to set hca defaults\n");
   1202		goto err_set_hca;
   1203	}
   1204
   1205	mlx5_vhca_event_start(dev);
   1206
   1207	err = mlx5_sf_hw_table_create(dev);
   1208	if (err) {
   1209		mlx5_core_err(dev, "sf table create failed %d\n", err);
   1210		goto err_vhca;
   1211	}
   1212
   1213	err = mlx5_ec_init(dev);
   1214	if (err) {
   1215		mlx5_core_err(dev, "Failed to init embedded CPU\n");
   1216		goto err_ec;
   1217	}
   1218
   1219	mlx5_lag_add_mdev(dev);
   1220	err = mlx5_sriov_attach(dev);
   1221	if (err) {
   1222		mlx5_core_err(dev, "sriov init failed %d\n", err);
   1223		goto err_sriov;
   1224	}
   1225
   1226	mlx5_sf_dev_table_create(dev);
   1227
   1228	return 0;
   1229
   1230err_sriov:
   1231	mlx5_lag_remove_mdev(dev);
   1232	mlx5_ec_cleanup(dev);
   1233err_ec:
   1234	mlx5_sf_hw_table_destroy(dev);
   1235err_vhca:
   1236	mlx5_vhca_event_stop(dev);
   1237err_set_hca:
   1238	mlx5_fs_core_cleanup(dev);
   1239err_fs:
   1240	mlx5_fpga_device_stop(dev);
   1241err_fpga_start:
   1242	mlx5_rsc_dump_cleanup(dev);
   1243	mlx5_hv_vhca_cleanup(dev->hv_vhca);
   1244	mlx5_fw_reset_events_stop(dev);
   1245	mlx5_fw_tracer_cleanup(dev->tracer);
   1246	mlx5_eq_table_destroy(dev);
   1247err_eq_table:
   1248	mlx5_irq_table_destroy(dev);
   1249err_irq_table:
   1250	mlx5_pagealloc_stop(dev);
   1251	mlx5_events_stop(dev);
   1252	mlx5_put_uars_page(dev, dev->priv.uar);
   1253	return err;
   1254}
   1255
   1256static void mlx5_unload(struct mlx5_core_dev *dev)
   1257{
   1258	mlx5_sf_dev_table_destroy(dev);
   1259	mlx5_sriov_detach(dev);
   1260	mlx5_lag_remove_mdev(dev);
   1261	mlx5_ec_cleanup(dev);
   1262	mlx5_sf_hw_table_destroy(dev);
   1263	mlx5_vhca_event_stop(dev);
   1264	mlx5_fs_core_cleanup(dev);
   1265	mlx5_fpga_device_stop(dev);
   1266	mlx5_rsc_dump_cleanup(dev);
   1267	mlx5_hv_vhca_cleanup(dev->hv_vhca);
   1268	mlx5_fw_reset_events_stop(dev);
   1269	mlx5_fw_tracer_cleanup(dev->tracer);
   1270	mlx5_eq_table_destroy(dev);
   1271	mlx5_irq_table_destroy(dev);
   1272	mlx5_pagealloc_stop(dev);
   1273	mlx5_events_stop(dev);
   1274	mlx5_put_uars_page(dev, dev->priv.uar);
   1275}
   1276
   1277int mlx5_init_one(struct mlx5_core_dev *dev)
   1278{
   1279	int err = 0;
   1280
   1281	mutex_lock(&dev->intf_state_mutex);
   1282	dev->state = MLX5_DEVICE_STATE_UP;
   1283
   1284	err = mlx5_function_setup(dev, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
   1285	if (err)
   1286		goto err_function;
   1287
   1288	err = mlx5_init_once(dev);
   1289	if (err) {
   1290		mlx5_core_err(dev, "sw objs init failed\n");
   1291		goto function_teardown;
   1292	}
   1293
   1294	err = mlx5_load(dev);
   1295	if (err)
   1296		goto err_load;
   1297
   1298	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
   1299
   1300	err = mlx5_devlink_register(priv_to_devlink(dev));
   1301	if (err)
   1302		goto err_devlink_reg;
   1303
   1304	err = mlx5_register_device(dev);
   1305	if (err)
   1306		goto err_register;
   1307
   1308	mutex_unlock(&dev->intf_state_mutex);
   1309	return 0;
   1310
   1311err_register:
   1312	mlx5_devlink_unregister(priv_to_devlink(dev));
   1313err_devlink_reg:
   1314	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
   1315	mlx5_unload(dev);
   1316err_load:
   1317	mlx5_cleanup_once(dev);
   1318function_teardown:
   1319	mlx5_function_teardown(dev, true);
   1320err_function:
   1321	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
   1322	mutex_unlock(&dev->intf_state_mutex);
   1323	return err;
   1324}
   1325
   1326void mlx5_uninit_one(struct mlx5_core_dev *dev)
   1327{
   1328	mutex_lock(&dev->intf_state_mutex);
   1329
   1330	mlx5_unregister_device(dev);
   1331	mlx5_devlink_unregister(priv_to_devlink(dev));
   1332
   1333	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
   1334		mlx5_core_warn(dev, "%s: interface is down, NOP\n",
   1335			       __func__);
   1336		mlx5_cleanup_once(dev);
   1337		goto out;
   1338	}
   1339
   1340	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
   1341	mlx5_unload(dev);
   1342	mlx5_cleanup_once(dev);
   1343	mlx5_function_teardown(dev, true);
   1344out:
   1345	mutex_unlock(&dev->intf_state_mutex);
   1346}
   1347
   1348int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery)
   1349{
   1350	int err = 0;
   1351	u64 timeout;
   1352
   1353	mutex_lock(&dev->intf_state_mutex);
   1354	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
   1355		mlx5_core_warn(dev, "interface is up, NOP\n");
   1356		goto out;
   1357	}
   1358	/* remove any previous indication of internal error */
   1359	dev->state = MLX5_DEVICE_STATE_UP;
   1360
   1361	if (recovery)
   1362		timeout = mlx5_tout_ms(dev, FW_PRE_INIT_ON_RECOVERY_TIMEOUT);
   1363	else
   1364		timeout = mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT);
   1365	err = mlx5_function_setup(dev, timeout);
   1366	if (err)
   1367		goto err_function;
   1368
   1369	err = mlx5_load(dev);
   1370	if (err)
   1371		goto err_load;
   1372
   1373	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
   1374
   1375	err = mlx5_attach_device(dev);
   1376	if (err)
   1377		goto err_attach;
   1378
   1379	mutex_unlock(&dev->intf_state_mutex);
   1380	return 0;
   1381
   1382err_attach:
   1383	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
   1384	mlx5_unload(dev);
   1385err_load:
   1386	mlx5_function_teardown(dev, false);
   1387err_function:
   1388	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
   1389out:
   1390	mutex_unlock(&dev->intf_state_mutex);
   1391	return err;
   1392}
   1393
   1394void mlx5_unload_one(struct mlx5_core_dev *dev)
   1395{
   1396	mutex_lock(&dev->intf_state_mutex);
   1397
   1398	mlx5_detach_device(dev);
   1399
   1400	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
   1401		mlx5_core_warn(dev, "%s: interface is down, NOP\n",
   1402			       __func__);
   1403		goto out;
   1404	}
   1405
   1406	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
   1407	mlx5_unload(dev);
   1408	mlx5_function_teardown(dev, false);
   1409out:
   1410	mutex_unlock(&dev->intf_state_mutex);
   1411}
   1412
   1413static const int types[] = {
   1414	MLX5_CAP_GENERAL,
   1415	MLX5_CAP_GENERAL_2,
   1416	MLX5_CAP_ETHERNET_OFFLOADS,
   1417	MLX5_CAP_IPOIB_ENHANCED_OFFLOADS,
   1418	MLX5_CAP_ODP,
   1419	MLX5_CAP_ATOMIC,
   1420	MLX5_CAP_ROCE,
   1421	MLX5_CAP_IPOIB_OFFLOADS,
   1422	MLX5_CAP_FLOW_TABLE,
   1423	MLX5_CAP_ESWITCH_FLOW_TABLE,
   1424	MLX5_CAP_ESWITCH,
   1425	MLX5_CAP_VECTOR_CALC,
   1426	MLX5_CAP_QOS,
   1427	MLX5_CAP_DEBUG,
   1428	MLX5_CAP_DEV_MEM,
   1429	MLX5_CAP_DEV_EVENT,
   1430	MLX5_CAP_TLS,
   1431	MLX5_CAP_VDPA_EMULATION,
   1432	MLX5_CAP_IPSEC,
   1433	MLX5_CAP_PORT_SELECTION,
   1434	MLX5_CAP_DEV_SHAMPO,
   1435};
   1436
   1437static void mlx5_hca_caps_free(struct mlx5_core_dev *dev)
   1438{
   1439	int type;
   1440	int i;
   1441
   1442	for (i = 0; i < ARRAY_SIZE(types); i++) {
   1443		type = types[i];
   1444		kfree(dev->caps.hca[type]);
   1445	}
   1446}
   1447
   1448static int mlx5_hca_caps_alloc(struct mlx5_core_dev *dev)
   1449{
   1450	struct mlx5_hca_cap *cap;
   1451	int type;
   1452	int i;
   1453
   1454	for (i = 0; i < ARRAY_SIZE(types); i++) {
   1455		cap = kzalloc(sizeof(*cap), GFP_KERNEL);
   1456		if (!cap)
   1457			goto err;
   1458		type = types[i];
   1459		dev->caps.hca[type] = cap;
   1460	}
   1461
   1462	return 0;
   1463
   1464err:
   1465	mlx5_hca_caps_free(dev);
   1466	return -ENOMEM;
   1467}
   1468
   1469int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
   1470{
   1471	struct mlx5_priv *priv = &dev->priv;
   1472	int err;
   1473
   1474	memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile));
   1475	INIT_LIST_HEAD(&priv->ctx_list);
   1476	spin_lock_init(&priv->ctx_lock);
   1477	mutex_init(&dev->intf_state_mutex);
   1478
   1479	mutex_init(&priv->bfregs.reg_head.lock);
   1480	mutex_init(&priv->bfregs.wc_head.lock);
   1481	INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
   1482	INIT_LIST_HEAD(&priv->bfregs.wc_head.list);
   1483
   1484	mutex_init(&priv->alloc_mutex);
   1485	mutex_init(&priv->pgdir_mutex);
   1486	INIT_LIST_HEAD(&priv->pgdir_list);
   1487
   1488	priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev));
   1489	priv->dbg.dbg_root = debugfs_create_dir(dev_name(dev->device),
   1490						mlx5_debugfs_root);
   1491	INIT_LIST_HEAD(&priv->traps);
   1492
   1493	err = mlx5_tout_init(dev);
   1494	if (err) {
   1495		mlx5_core_err(dev, "Failed initializing timeouts, aborting\n");
   1496		goto err_timeout_init;
   1497	}
   1498
   1499	err = mlx5_health_init(dev);
   1500	if (err)
   1501		goto err_health_init;
   1502
   1503	err = mlx5_pagealloc_init(dev);
   1504	if (err)
   1505		goto err_pagealloc_init;
   1506
   1507	err = mlx5_adev_init(dev);
   1508	if (err)
   1509		goto err_adev_init;
   1510
   1511	err = mlx5_hca_caps_alloc(dev);
   1512	if (err)
   1513		goto err_hca_caps;
   1514
   1515	return 0;
   1516
   1517err_hca_caps:
   1518	mlx5_adev_cleanup(dev);
   1519err_adev_init:
   1520	mlx5_pagealloc_cleanup(dev);
   1521err_pagealloc_init:
   1522	mlx5_health_cleanup(dev);
   1523err_health_init:
   1524	mlx5_tout_cleanup(dev);
   1525err_timeout_init:
   1526	debugfs_remove(dev->priv.dbg.dbg_root);
   1527	mutex_destroy(&priv->pgdir_mutex);
   1528	mutex_destroy(&priv->alloc_mutex);
   1529	mutex_destroy(&priv->bfregs.wc_head.lock);
   1530	mutex_destroy(&priv->bfregs.reg_head.lock);
   1531	mutex_destroy(&dev->intf_state_mutex);
   1532	return err;
   1533}
   1534
   1535void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
   1536{
   1537	struct mlx5_priv *priv = &dev->priv;
   1538
   1539	mlx5_hca_caps_free(dev);
   1540	mlx5_adev_cleanup(dev);
   1541	mlx5_pagealloc_cleanup(dev);
   1542	mlx5_health_cleanup(dev);
   1543	mlx5_tout_cleanup(dev);
   1544	debugfs_remove_recursive(dev->priv.dbg.dbg_root);
   1545	mutex_destroy(&priv->pgdir_mutex);
   1546	mutex_destroy(&priv->alloc_mutex);
   1547	mutex_destroy(&priv->bfregs.wc_head.lock);
   1548	mutex_destroy(&priv->bfregs.reg_head.lock);
   1549	mutex_destroy(&dev->intf_state_mutex);
   1550}
   1551
   1552static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
   1553{
   1554	struct mlx5_core_dev *dev;
   1555	struct devlink *devlink;
   1556	int err;
   1557
   1558	devlink = mlx5_devlink_alloc(&pdev->dev);
   1559	if (!devlink) {
   1560		dev_err(&pdev->dev, "devlink alloc failed\n");
   1561		return -ENOMEM;
   1562	}
   1563
   1564	dev = devlink_priv(devlink);
   1565	dev->device = &pdev->dev;
   1566	dev->pdev = pdev;
   1567
   1568	dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ?
   1569			 MLX5_COREDEV_VF : MLX5_COREDEV_PF;
   1570
   1571	dev->priv.adev_idx = mlx5_adev_idx_alloc();
   1572	if (dev->priv.adev_idx < 0) {
   1573		err = dev->priv.adev_idx;
   1574		goto adev_init_err;
   1575	}
   1576
   1577	err = mlx5_mdev_init(dev, prof_sel);
   1578	if (err)
   1579		goto mdev_init_err;
   1580
   1581	err = mlx5_pci_init(dev, pdev, id);
   1582	if (err) {
   1583		mlx5_core_err(dev, "mlx5_pci_init failed with error code %d\n",
   1584			      err);
   1585		goto pci_init_err;
   1586	}
   1587
   1588	err = mlx5_init_one(dev);
   1589	if (err) {
   1590		mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n",
   1591			      err);
   1592		goto err_init_one;
   1593	}
   1594
   1595	err = mlx5_crdump_enable(dev);
   1596	if (err)
   1597		dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
   1598
   1599	pci_save_state(pdev);
   1600	devlink_register(devlink);
   1601	return 0;
   1602
   1603err_init_one:
   1604	mlx5_pci_close(dev);
   1605pci_init_err:
   1606	mlx5_mdev_uninit(dev);
   1607mdev_init_err:
   1608	mlx5_adev_idx_free(dev->priv.adev_idx);
   1609adev_init_err:
   1610	mlx5_devlink_free(devlink);
   1611
   1612	return err;
   1613}
   1614
   1615static void remove_one(struct pci_dev *pdev)
   1616{
   1617	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
   1618	struct devlink *devlink = priv_to_devlink(dev);
   1619
   1620	/* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain
   1621	 * fw_reset before unregistering the devlink.
   1622	 */
   1623	mlx5_drain_fw_reset(dev);
   1624	set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
   1625	devlink_unregister(devlink);
   1626	mlx5_sriov_disable(pdev);
   1627	mlx5_crdump_disable(dev);
   1628	mlx5_drain_health_wq(dev);
   1629	mlx5_uninit_one(dev);
   1630	mlx5_pci_close(dev);
   1631	mlx5_mdev_uninit(dev);
   1632	mlx5_adev_idx_free(dev->priv.adev_idx);
   1633	mlx5_devlink_free(devlink);
   1634}
   1635
   1636#define mlx5_pci_trace(dev, fmt, ...) ({ \
   1637	struct mlx5_core_dev *__dev = (dev); \
   1638	mlx5_core_info(__dev, "%s Device state = %d health sensors: %d pci_status: %d. " fmt, \
   1639		       __func__, __dev->state, mlx5_health_check_fatal_sensors(__dev), \
   1640		       __dev->pci_status, ##__VA_ARGS__); \
   1641})
   1642
   1643static const char *result2str(enum pci_ers_result result)
   1644{
   1645	return  result == PCI_ERS_RESULT_NEED_RESET ? "need reset" :
   1646		result == PCI_ERS_RESULT_DISCONNECT ? "disconnect" :
   1647		result == PCI_ERS_RESULT_RECOVERED  ? "recovered" :
   1648		"unknown";
   1649}
   1650
   1651static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
   1652					      pci_channel_state_t state)
   1653{
   1654	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
   1655	enum pci_ers_result res;
   1656
   1657	mlx5_pci_trace(dev, "Enter, pci channel state = %d\n", state);
   1658
   1659	mlx5_enter_error_state(dev, false);
   1660	mlx5_error_sw_reset(dev);
   1661	mlx5_unload_one(dev);
   1662	mlx5_drain_health_wq(dev);
   1663	mlx5_pci_disable_device(dev);
   1664
   1665	res = state == pci_channel_io_perm_failure ?
   1666		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
   1667
   1668	mlx5_pci_trace(dev, "Exit, result = %d, %s\n",  res, result2str(res));
   1669	return res;
   1670}
   1671
   1672/* wait for the device to show vital signs by waiting
   1673 * for the health counter to start counting.
   1674 */
   1675static int wait_vital(struct pci_dev *pdev)
   1676{
   1677	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
   1678	struct mlx5_core_health *health = &dev->priv.health;
   1679	const int niter = 100;
   1680	u32 last_count = 0;
   1681	u32 count;
   1682	int i;
   1683
   1684	for (i = 0; i < niter; i++) {
   1685		count = ioread32be(health->health_counter);
   1686		if (count && count != 0xffffffff) {
   1687			if (last_count && last_count != count) {
   1688				mlx5_core_info(dev,
   1689					       "wait vital counter value 0x%x after %d iterations\n",
   1690					       count, i);
   1691				return 0;
   1692			}
   1693			last_count = count;
   1694		}
   1695		msleep(50);
   1696	}
   1697
   1698	return -ETIMEDOUT;
   1699}
   1700
   1701static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
   1702{
   1703	enum pci_ers_result res = PCI_ERS_RESULT_DISCONNECT;
   1704	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
   1705	int err;
   1706
   1707	mlx5_pci_trace(dev, "Enter\n");
   1708
   1709	err = mlx5_pci_enable_device(dev);
   1710	if (err) {
   1711		mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n",
   1712			      __func__, err);
   1713		goto out;
   1714	}
   1715
   1716	pci_set_master(pdev);
   1717	pci_restore_state(pdev);
   1718	pci_save_state(pdev);
   1719
   1720	err = wait_vital(pdev);
   1721	if (err) {
   1722		mlx5_core_err(dev, "%s: wait vital failed with error code: %d\n",
   1723			      __func__, err);
   1724		goto out;
   1725	}
   1726
   1727	res = PCI_ERS_RESULT_RECOVERED;
   1728out:
   1729	mlx5_pci_trace(dev, "Exit, err = %d, result = %d, %s\n", err, res, result2str(res));
   1730	return res;
   1731}
   1732
   1733static void mlx5_pci_resume(struct pci_dev *pdev)
   1734{
   1735	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
   1736	int err;
   1737
   1738	mlx5_pci_trace(dev, "Enter, loading driver..\n");
   1739
   1740	err = mlx5_load_one(dev, false);
   1741
   1742	mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err,
   1743		       !err ? "recovered" : "Failed");
   1744}
   1745
   1746static const struct pci_error_handlers mlx5_err_handler = {
   1747	.error_detected = mlx5_pci_err_detected,
   1748	.slot_reset	= mlx5_pci_slot_reset,
   1749	.resume		= mlx5_pci_resume
   1750};
   1751
   1752static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
   1753{
   1754	bool fast_teardown = false, force_teardown = false;
   1755	int ret = 1;
   1756
   1757	fast_teardown = MLX5_CAP_GEN(dev, fast_teardown);
   1758	force_teardown = MLX5_CAP_GEN(dev, force_teardown);
   1759
   1760	mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown);
   1761	mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown);
   1762
   1763	if (!fast_teardown && !force_teardown)
   1764		return -EOPNOTSUPP;
   1765
   1766	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
   1767		mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
   1768		return -EAGAIN;
   1769	}
   1770
   1771	/* Panic tear down fw command will stop the PCI bus communication
   1772	 * with the HCA, so the health poll is no longer needed.
   1773	 */
   1774	mlx5_drain_health_wq(dev);
   1775	mlx5_stop_health_poll(dev, false);
   1776
   1777	ret = mlx5_cmd_fast_teardown_hca(dev);
   1778	if (!ret)
   1779		goto succeed;
   1780
   1781	ret = mlx5_cmd_force_teardown_hca(dev);
   1782	if (!ret)
   1783		goto succeed;
   1784
   1785	mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret);
   1786	mlx5_start_health_poll(dev);
   1787	return ret;
   1788
   1789succeed:
   1790	mlx5_enter_error_state(dev, true);
   1791
   1792	/* Some platforms requiring freeing the IRQ's in the shutdown
   1793	 * flow. If they aren't freed they can't be allocated after
   1794	 * kexec. There is no need to cleanup the mlx5_core software
   1795	 * contexts.
   1796	 */
   1797	mlx5_core_eq_free_irqs(dev);
   1798
   1799	return 0;
   1800}
   1801
   1802static void shutdown(struct pci_dev *pdev)
   1803{
   1804	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
   1805	int err;
   1806
   1807	mlx5_core_info(dev, "Shutdown was called\n");
   1808	set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
   1809	err = mlx5_try_fast_unload(dev);
   1810	if (err)
   1811		mlx5_unload_one(dev);
   1812	mlx5_pci_disable_device(dev);
   1813}
   1814
   1815static int mlx5_suspend(struct pci_dev *pdev, pm_message_t state)
   1816{
   1817	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
   1818
   1819	mlx5_unload_one(dev);
   1820
   1821	return 0;
   1822}
   1823
   1824static int mlx5_resume(struct pci_dev *pdev)
   1825{
   1826	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
   1827
   1828	return mlx5_load_one(dev, false);
   1829}
   1830
   1831static const struct pci_device_id mlx5_core_pci_table[] = {
   1832	{ PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTIB) },
   1833	{ PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF},	/* Connect-IB VF */
   1834	{ PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4) },
   1835	{ PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF},	/* ConnectX-4 VF */
   1836	{ PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX) },
   1837	{ PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF},	/* ConnectX-4LX VF */
   1838	{ PCI_VDEVICE(MELLANOX, 0x1017) },			/* ConnectX-5, PCIe 3.0 */
   1839	{ PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF},	/* ConnectX-5 VF */
   1840	{ PCI_VDEVICE(MELLANOX, 0x1019) },			/* ConnectX-5 Ex */
   1841	{ PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF},	/* ConnectX-5 Ex VF */
   1842	{ PCI_VDEVICE(MELLANOX, 0x101b) },			/* ConnectX-6 */
   1843	{ PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF},	/* ConnectX-6 VF */
   1844	{ PCI_VDEVICE(MELLANOX, 0x101d) },			/* ConnectX-6 Dx */
   1845	{ PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF},	/* ConnectX Family mlx5Gen Virtual Function */
   1846	{ PCI_VDEVICE(MELLANOX, 0x101f) },			/* ConnectX-6 LX */
   1847	{ PCI_VDEVICE(MELLANOX, 0x1021) },			/* ConnectX-7 */
   1848	{ PCI_VDEVICE(MELLANOX, 0x1023) },			/* ConnectX-8 */
   1849	{ PCI_VDEVICE(MELLANOX, 0xa2d2) },			/* BlueField integrated ConnectX-5 network controller */
   1850	{ PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF},	/* BlueField integrated ConnectX-5 network controller VF */
   1851	{ PCI_VDEVICE(MELLANOX, 0xa2d6) },			/* BlueField-2 integrated ConnectX-6 Dx network controller */
   1852	{ PCI_VDEVICE(MELLANOX, 0xa2dc) },			/* BlueField-3 integrated ConnectX-7 network controller */
   1853	{ PCI_VDEVICE(MELLANOX, 0xa2df) },			/* BlueField-4 integrated ConnectX-8 network controller */
   1854	{ 0, }
   1855};
   1856
   1857MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
   1858
   1859void mlx5_disable_device(struct mlx5_core_dev *dev)
   1860{
   1861	mlx5_error_sw_reset(dev);
   1862	mlx5_unload_one(dev);
   1863}
   1864
   1865int mlx5_recover_device(struct mlx5_core_dev *dev)
   1866{
   1867	if (!mlx5_core_is_sf(dev)) {
   1868		mlx5_pci_disable_device(dev);
   1869		if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED)
   1870			return -EIO;
   1871	}
   1872
   1873	return mlx5_load_one(dev, true);
   1874}
   1875
   1876static struct pci_driver mlx5_core_driver = {
   1877	.name           = KBUILD_MODNAME,
   1878	.id_table       = mlx5_core_pci_table,
   1879	.probe          = probe_one,
   1880	.remove         = remove_one,
   1881	.suspend        = mlx5_suspend,
   1882	.resume         = mlx5_resume,
   1883	.shutdown	= shutdown,
   1884	.err_handler	= &mlx5_err_handler,
   1885	.sriov_configure   = mlx5_core_sriov_configure,
   1886	.sriov_get_vf_total_msix = mlx5_sriov_get_vf_total_msix,
   1887	.sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count,
   1888};
   1889
   1890/**
   1891 * mlx5_vf_get_core_dev - Get the mlx5 core device from a given VF PCI device if
   1892 *                     mlx5_core is its driver.
   1893 * @pdev: The associated PCI device.
   1894 *
   1895 * Upon return the interface state lock stay held to let caller uses it safely.
   1896 * Caller must ensure to use the returned mlx5 device for a narrow window
   1897 * and put it back with mlx5_vf_put_core_dev() immediately once usage was over.
   1898 *
   1899 * Return: Pointer to the associated mlx5_core_dev or NULL.
   1900 */
   1901struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev)
   1902{
   1903	struct mlx5_core_dev *mdev;
   1904
   1905	mdev = pci_iov_get_pf_drvdata(pdev, &mlx5_core_driver);
   1906	if (IS_ERR(mdev))
   1907		return NULL;
   1908
   1909	mutex_lock(&mdev->intf_state_mutex);
   1910	if (!test_bit(MLX5_INTERFACE_STATE_UP, &mdev->intf_state)) {
   1911		mutex_unlock(&mdev->intf_state_mutex);
   1912		return NULL;
   1913	}
   1914
   1915	return mdev;
   1916}
   1917EXPORT_SYMBOL(mlx5_vf_get_core_dev);
   1918
   1919/**
   1920 * mlx5_vf_put_core_dev - Put the mlx5 core device back.
   1921 * @mdev: The mlx5 core device.
   1922 *
   1923 * Upon return the interface state lock is unlocked and caller should not
   1924 * access the mdev any more.
   1925 */
   1926void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev)
   1927{
   1928	mutex_unlock(&mdev->intf_state_mutex);
   1929}
   1930EXPORT_SYMBOL(mlx5_vf_put_core_dev);
   1931
   1932static void mlx5_core_verify_params(void)
   1933{
   1934	if (prof_sel >= ARRAY_SIZE(profile)) {
   1935		pr_warn("mlx5_core: WARNING: Invalid module parameter prof_sel %d, valid range 0-%zu, changing back to default(%d)\n",
   1936			prof_sel,
   1937			ARRAY_SIZE(profile) - 1,
   1938			MLX5_DEFAULT_PROF);
   1939		prof_sel = MLX5_DEFAULT_PROF;
   1940	}
   1941}
   1942
   1943static int __init init(void)
   1944{
   1945	int err;
   1946
   1947	WARN_ONCE(strcmp(MLX5_ADEV_NAME, KBUILD_MODNAME),
   1948		  "mlx5_core name not in sync with kernel module name");
   1949
   1950	get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
   1951
   1952	mlx5_core_verify_params();
   1953	mlx5_register_debugfs();
   1954
   1955	err = pci_register_driver(&mlx5_core_driver);
   1956	if (err)
   1957		goto err_debug;
   1958
   1959	err = mlx5_sf_driver_register();
   1960	if (err)
   1961		goto err_sf;
   1962
   1963	err = mlx5e_init();
   1964	if (err)
   1965		goto err_en;
   1966
   1967	return 0;
   1968
   1969err_en:
   1970	mlx5_sf_driver_unregister();
   1971err_sf:
   1972	pci_unregister_driver(&mlx5_core_driver);
   1973err_debug:
   1974	mlx5_unregister_debugfs();
   1975	return err;
   1976}
   1977
   1978static void __exit cleanup(void)
   1979{
   1980	mlx5e_cleanup();
   1981	mlx5_sf_driver_unregister();
   1982	pci_unregister_driver(&mlx5_core_driver);
   1983	mlx5_unregister_debugfs();
   1984}
   1985
   1986module_init(init);
   1987module_exit(cleanup);