cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

health.c (26366B)


      1/*
      2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
      3 *
      4 * This software is available to you under a choice of one of two
      5 * licenses.  You may choose to be licensed under the terms of the GNU
      6 * General Public License (GPL) Version 2, available from the file
      7 * COPYING in the main directory of this source tree, or the
      8 * OpenIB.org BSD license below:
      9 *
     10 *     Redistribution and use in source and binary forms, with or
     11 *     without modification, are permitted provided that the following
     12 *     conditions are met:
     13 *
     14 *      - Redistributions of source code must retain the above
     15 *        copyright notice, this list of conditions and the following
     16 *        disclaimer.
     17 *
     18 *      - Redistributions in binary form must reproduce the above
     19 *        copyright notice, this list of conditions and the following
     20 *        disclaimer in the documentation and/or other materials
     21 *        provided with the distribution.
     22 *
     23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     30 * SOFTWARE.
     31 */
     32
     33#include <linux/kernel.h>
     34#include <linux/random.h>
     35#include <linux/vmalloc.h>
     36#include <linux/hardirq.h>
     37#include <linux/mlx5/driver.h>
     38#include <linux/kern_levels.h>
     39#include "mlx5_core.h"
     40#include "lib/eq.h"
     41#include "lib/mlx5.h"
     42#include "lib/pci_vsc.h"
     43#include "lib/tout.h"
     44#include "diag/fw_tracer.h"
     45
     46enum {
     47	MAX_MISSES			= 3,
     48};
     49
     50enum {
     51	MLX5_HEALTH_SYNDR_FW_ERR		= 0x1,
     52	MLX5_HEALTH_SYNDR_IRISC_ERR		= 0x7,
     53	MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR	= 0x8,
     54	MLX5_HEALTH_SYNDR_CRC_ERR		= 0x9,
     55	MLX5_HEALTH_SYNDR_FETCH_PCI_ERR		= 0xa,
     56	MLX5_HEALTH_SYNDR_HW_FTL_ERR		= 0xb,
     57	MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR	= 0xc,
     58	MLX5_HEALTH_SYNDR_EQ_ERR		= 0xd,
     59	MLX5_HEALTH_SYNDR_EQ_INV		= 0xe,
     60	MLX5_HEALTH_SYNDR_FFSER_ERR		= 0xf,
     61	MLX5_HEALTH_SYNDR_HIGH_TEMP		= 0x10
     62};
     63
     64enum {
     65	MLX5_DROP_NEW_HEALTH_WORK,
     66};
     67
     68enum  {
     69	MLX5_SENSOR_NO_ERR		= 0,
     70	MLX5_SENSOR_PCI_COMM_ERR	= 1,
     71	MLX5_SENSOR_PCI_ERR		= 2,
     72	MLX5_SENSOR_NIC_DISABLED	= 3,
     73	MLX5_SENSOR_NIC_SW_RESET	= 4,
     74	MLX5_SENSOR_FW_SYND_RFR		= 5,
     75};
     76
     77enum {
     78	MLX5_SEVERITY_MASK		= 0x7,
     79	MLX5_SEVERITY_VALID_MASK	= 0x8,
     80};
     81
     82u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
     83{
     84	return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7;
     85}
     86
     87void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
     88{
     89	u32 cur_cmdq_addr_l_sz;
     90
     91	cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz);
     92	iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) |
     93		    state << MLX5_NIC_IFC_OFFSET,
     94		    &dev->iseg->cmdq_addr_l_sz);
     95}
     96
     97static bool sensor_pci_not_working(struct mlx5_core_dev *dev)
     98{
     99	struct mlx5_core_health *health = &dev->priv.health;
    100	struct health_buffer __iomem *h = health->health;
    101
    102	/* Offline PCI reads return 0xffffffff */
    103	return (ioread32be(&h->fw_ver) == 0xffffffff);
    104}
    105
    106static int mlx5_health_get_rfr(u8 rfr_severity)
    107{
    108	return rfr_severity >> MLX5_RFR_BIT_OFFSET;
    109}
    110
    111static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
    112{
    113	struct mlx5_core_health *health = &dev->priv.health;
    114	struct health_buffer __iomem *h = health->health;
    115	u8 synd = ioread8(&h->synd);
    116	u8 rfr;
    117
    118	rfr = mlx5_health_get_rfr(ioread8(&h->rfr_severity));
    119
    120	if (rfr && synd)
    121		mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd);
    122	return rfr && synd;
    123}
    124
    125u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev)
    126{
    127	if (sensor_pci_not_working(dev))
    128		return MLX5_SENSOR_PCI_COMM_ERR;
    129	if (pci_channel_offline(dev->pdev))
    130		return MLX5_SENSOR_PCI_ERR;
    131	if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
    132		return MLX5_SENSOR_NIC_DISABLED;
    133	if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET)
    134		return MLX5_SENSOR_NIC_SW_RESET;
    135	if (sensor_fw_synd_rfr(dev))
    136		return MLX5_SENSOR_FW_SYND_RFR;
    137
    138	return MLX5_SENSOR_NO_ERR;
    139}
    140
    141static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock)
    142{
    143	enum mlx5_vsc_state state;
    144	int ret;
    145
    146	if (!mlx5_core_is_pf(dev))
    147		return -EBUSY;
    148
    149	/* Try to lock GW access, this stage doesn't return
    150	 * EBUSY because locked GW does not mean that other PF
    151	 * already started the reset.
    152	 */
    153	ret = mlx5_vsc_gw_lock(dev);
    154	if (ret == -EBUSY)
    155		return -EINVAL;
    156	if (ret)
    157		return ret;
    158
    159	state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK;
    160	/* At this stage, if the return status == EBUSY, then we know
    161	 * for sure that another PF started the reset, so don't allow
    162	 * another reset.
    163	 */
    164	ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state);
    165	if (ret)
    166		mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
    167
    168	/* Unlock GW access */
    169	mlx5_vsc_gw_unlock(dev);
    170
    171	return ret;
    172}
    173
    174static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
    175{
    176	bool supported = (ioread32be(&dev->iseg->initializing) >>
    177			  MLX5_FW_RESET_SUPPORTED_OFFSET) & 1;
    178	u32 fatal_error;
    179
    180	if (!supported)
    181		return false;
    182
    183	/* The reset only needs to be issued by one PF. The health buffer is
    184	 * shared between all functions, and will be cleared during a reset.
    185	 * Check again to avoid a redundant 2nd reset. If the fatal errors was
    186	 * PCI related a reset won't help.
    187	 */
    188	fatal_error = mlx5_health_check_fatal_sensors(dev);
    189	if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
    190	    fatal_error == MLX5_SENSOR_NIC_DISABLED ||
    191	    fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
    192		mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help.");
    193		return false;
    194	}
    195
    196	mlx5_core_warn(dev, "Issuing FW Reset\n");
    197	/* Write the NIC interface field to initiate the reset, the command
    198	 * interface address also resides here, don't overwrite it.
    199	 */
    200	mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET);
    201
    202	return true;
    203}
    204
    205static void enter_error_state(struct mlx5_core_dev *dev, bool force)
    206{
    207	if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */
    208		dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
    209		mlx5_cmd_flush(dev);
    210	}
    211
    212	mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
    213}
    214
    215void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
    216{
    217	bool err_detected = false;
    218
    219	/* Mark the device as fatal in order to abort FW commands */
    220	if ((mlx5_health_check_fatal_sensors(dev) || force) &&
    221	    dev->state == MLX5_DEVICE_STATE_UP) {
    222		dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
    223		err_detected = true;
    224	}
    225	mutex_lock(&dev->intf_state_mutex);
    226	if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
    227		goto unlock;/* a previous error is still being handled */
    228
    229	enter_error_state(dev, force);
    230unlock:
    231	mutex_unlock(&dev->intf_state_mutex);
    232}
    233
    234void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
    235{
    236	unsigned long end, delay_ms = mlx5_tout_ms(dev, PCI_TOGGLE);
    237	int lock = -EBUSY;
    238
    239	mutex_lock(&dev->intf_state_mutex);
    240	if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
    241		goto unlock;
    242
    243	mlx5_core_err(dev, "start\n");
    244
    245	if (mlx5_health_check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
    246		/* Get cr-dump and reset FW semaphore */
    247		lock = lock_sem_sw_reset(dev, true);
    248
    249		if (lock == -EBUSY) {
    250			delay_ms = mlx5_tout_ms(dev, FULL_CRDUMP);
    251			goto recover_from_sw_reset;
    252		}
    253		/* Execute SW reset */
    254		reset_fw_if_needed(dev);
    255	}
    256
    257recover_from_sw_reset:
    258	/* Recover from SW reset */
    259	end = jiffies + msecs_to_jiffies(delay_ms);
    260	do {
    261		if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
    262			break;
    263
    264		msleep(20);
    265	} while (!time_after(jiffies, end));
    266
    267	if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
    268		dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
    269			mlx5_get_nic_state(dev), delay_ms);
    270	}
    271
    272	/* Release FW semaphore if you are the lock owner */
    273	if (!lock)
    274		lock_sem_sw_reset(dev, false);
    275
    276	mlx5_core_err(dev, "end\n");
    277
    278unlock:
    279	mutex_unlock(&dev->intf_state_mutex);
    280}
    281
    282static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
    283{
    284	u8 nic_interface = mlx5_get_nic_state(dev);
    285
    286	switch (nic_interface) {
    287	case MLX5_NIC_IFC_FULL:
    288		mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n");
    289		break;
    290
    291	case MLX5_NIC_IFC_DISABLED:
    292		mlx5_core_warn(dev, "starting teardown\n");
    293		break;
    294
    295	case MLX5_NIC_IFC_NO_DRAM_NIC:
    296		mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
    297		break;
    298
    299	case MLX5_NIC_IFC_SW_RESET:
    300		/* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases:
    301		 * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded
    302		 *    and this is a VF), this is not recoverable by SW reset.
    303		 *    Logging of this is handled elsewhere.
    304		 * 2. FW reset has been issued by another function, driver can
    305		 *    be reloaded to recover after the mode switches to
    306		 *    MLX5_NIC_IFC_DISABLED.
    307		 */
    308		if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR)
    309			mlx5_core_warn(dev, "NIC SW reset in progress\n");
    310		break;
    311
    312	default:
    313		mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
    314			       nic_interface);
    315	}
    316
    317	mlx5_disable_device(dev);
    318}
    319
    320int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
    321{
    322	unsigned long end;
    323
    324	end = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FW_RESET));
    325	while (sensor_pci_not_working(dev)) {
    326		if (time_after(jiffies, end))
    327			return -ETIMEDOUT;
    328		msleep(100);
    329	}
    330	return 0;
    331}
    332
    333static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
    334{
    335	mlx5_core_warn(dev, "handling bad device here\n");
    336	mlx5_handle_bad_state(dev);
    337	if (mlx5_health_wait_pci_up(dev)) {
    338		mlx5_core_err(dev, "health recovery flow aborted, PCI reads still not working\n");
    339		return -EIO;
    340	}
    341	mlx5_core_err(dev, "starting health recovery flow\n");
    342	if (mlx5_recover_device(dev) || mlx5_health_check_fatal_sensors(dev)) {
    343		mlx5_core_err(dev, "health recovery failed\n");
    344		return -EIO;
    345	}
    346
    347	mlx5_core_info(dev, "health recovery succeeded\n");
    348	return 0;
    349}
    350
    351static const char *hsynd_str(u8 synd)
    352{
    353	switch (synd) {
    354	case MLX5_HEALTH_SYNDR_FW_ERR:
    355		return "firmware internal error";
    356	case MLX5_HEALTH_SYNDR_IRISC_ERR:
    357		return "irisc not responding";
    358	case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR:
    359		return "unrecoverable hardware error";
    360	case MLX5_HEALTH_SYNDR_CRC_ERR:
    361		return "firmware CRC error";
    362	case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR:
    363		return "ICM fetch PCI error";
    364	case MLX5_HEALTH_SYNDR_HW_FTL_ERR:
    365		return "HW fatal error\n";
    366	case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR:
    367		return "async EQ buffer overrun";
    368	case MLX5_HEALTH_SYNDR_EQ_ERR:
    369		return "EQ error";
    370	case MLX5_HEALTH_SYNDR_EQ_INV:
    371		return "Invalid EQ referenced";
    372	case MLX5_HEALTH_SYNDR_FFSER_ERR:
    373		return "FFSER error";
    374	case MLX5_HEALTH_SYNDR_HIGH_TEMP:
    375		return "High temperature";
    376	default:
    377		return "unrecognized error";
    378	}
    379}
    380
    381static const char *mlx5_loglevel_str(int level)
    382{
    383	switch (level) {
    384	case LOGLEVEL_EMERG:
    385		return "EMERGENCY";
    386	case LOGLEVEL_ALERT:
    387		return "ALERT";
    388	case LOGLEVEL_CRIT:
    389		return "CRITICAL";
    390	case LOGLEVEL_ERR:
    391		return "ERROR";
    392	case LOGLEVEL_WARNING:
    393		return "WARNING";
    394	case LOGLEVEL_NOTICE:
    395		return "NOTICE";
    396	case LOGLEVEL_INFO:
    397		return "INFO";
    398	case LOGLEVEL_DEBUG:
    399		return "DEBUG";
    400	}
    401	return "Unknown log level";
    402}
    403
    404static int mlx5_health_get_severity(u8 rfr_severity)
    405{
    406	return rfr_severity & MLX5_SEVERITY_VALID_MASK ?
    407	       rfr_severity & MLX5_SEVERITY_MASK : LOGLEVEL_ERR;
    408}
    409
    410static void print_health_info(struct mlx5_core_dev *dev)
    411{
    412	struct mlx5_core_health *health = &dev->priv.health;
    413	struct health_buffer __iomem *h = health->health;
    414	u8 rfr_severity;
    415	int severity;
    416	int i;
    417
    418	/* If the syndrome is 0, the device is OK and no need to print buffer */
    419	if (!ioread8(&h->synd))
    420		return;
    421
    422	if (ioread32be(&h->fw_ver) == 0xFFFFFFFF) {
    423		mlx5_log(dev, LOGLEVEL_ERR, "PCI slot is unavailable\n");
    424		return;
    425	}
    426
    427	rfr_severity = ioread8(&h->rfr_severity);
    428	severity  = mlx5_health_get_severity(rfr_severity);
    429	mlx5_log(dev, severity, "Health issue observed, %s, severity(%d) %s:\n",
    430		 hsynd_str(ioread8(&h->synd)), severity, mlx5_loglevel_str(severity));
    431
    432	for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
    433		mlx5_log(dev, severity, "assert_var[%d] 0x%08x\n", i,
    434			 ioread32be(h->assert_var + i));
    435
    436	mlx5_log(dev, severity, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr));
    437	mlx5_log(dev, severity, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra));
    438	mlx5_log(dev, severity, "fw_ver %d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev),
    439		 fw_rev_sub(dev));
    440	mlx5_log(dev, severity, "time %u\n", ioread32be(&h->time));
    441	mlx5_log(dev, severity, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
    442	mlx5_log(dev, severity, "rfr %d\n", mlx5_health_get_rfr(rfr_severity));
    443	mlx5_log(dev, severity, "severity %d (%s)\n", severity, mlx5_loglevel_str(severity));
    444	mlx5_log(dev, severity, "irisc_index %d\n", ioread8(&h->irisc_index));
    445	mlx5_log(dev, severity, "synd 0x%x: %s\n", ioread8(&h->synd),
    446		 hsynd_str(ioread8(&h->synd)));
    447	mlx5_log(dev, severity, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
    448	mlx5_log(dev, severity, "raw fw_ver 0x%08x\n", ioread32be(&h->fw_ver));
    449}
    450
    451static int
    452mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
    453			  struct devlink_fmsg *fmsg,
    454			  struct netlink_ext_ack *extack)
    455{
    456	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
    457	struct mlx5_core_health *health = &dev->priv.health;
    458	struct health_buffer __iomem *h = health->health;
    459	u8 synd;
    460	int err;
    461
    462	synd = ioread8(&h->synd);
    463	err = devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd);
    464	if (err || !synd)
    465		return err;
    466	return devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd));
    467}
    468
    469struct mlx5_fw_reporter_ctx {
    470	u8 err_synd;
    471	int miss_counter;
    472};
    473
    474static int
    475mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg,
    476			       struct mlx5_fw_reporter_ctx *fw_reporter_ctx)
    477{
    478	int err;
    479
    480	err = devlink_fmsg_u8_pair_put(fmsg, "syndrome",
    481				       fw_reporter_ctx->err_synd);
    482	if (err)
    483		return err;
    484	err = devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter",
    485					fw_reporter_ctx->miss_counter);
    486	if (err)
    487		return err;
    488	return 0;
    489}
    490
    491static int
    492mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
    493				       struct devlink_fmsg *fmsg)
    494{
    495	struct mlx5_core_health *health = &dev->priv.health;
    496	struct health_buffer __iomem *h = health->health;
    497	u8 rfr_severity;
    498	int err;
    499	int i;
    500
    501	if (!ioread8(&h->synd))
    502		return 0;
    503
    504	err = devlink_fmsg_pair_nest_start(fmsg, "health buffer");
    505	if (err)
    506		return err;
    507	err = devlink_fmsg_obj_nest_start(fmsg);
    508	if (err)
    509		return err;
    510	err = devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var");
    511	if (err)
    512		return err;
    513
    514	for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) {
    515		err = devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i));
    516		if (err)
    517			return err;
    518	}
    519	err = devlink_fmsg_arr_pair_nest_end(fmsg);
    520	if (err)
    521		return err;
    522	err = devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr",
    523					ioread32be(&h->assert_exit_ptr));
    524	if (err)
    525		return err;
    526	err = devlink_fmsg_u32_pair_put(fmsg, "assert_callra",
    527					ioread32be(&h->assert_callra));
    528	if (err)
    529		return err;
    530	err = devlink_fmsg_u32_pair_put(fmsg, "time", ioread32be(&h->time));
    531	if (err)
    532		return err;
    533	err = devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id));
    534	if (err)
    535		return err;
    536	rfr_severity = ioread8(&h->rfr_severity);
    537	err = devlink_fmsg_u8_pair_put(fmsg, "rfr", mlx5_health_get_rfr(rfr_severity));
    538	if (err)
    539		return err;
    540	err = devlink_fmsg_u8_pair_put(fmsg, "severity", mlx5_health_get_severity(rfr_severity));
    541	if (err)
    542		return err;
    543	err = devlink_fmsg_u8_pair_put(fmsg, "irisc_index",
    544				       ioread8(&h->irisc_index));
    545	if (err)
    546		return err;
    547	err = devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd));
    548	if (err)
    549		return err;
    550	err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd",
    551					ioread16be(&h->ext_synd));
    552	if (err)
    553		return err;
    554	err = devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver",
    555					ioread32be(&h->fw_ver));
    556	if (err)
    557		return err;
    558	err = devlink_fmsg_obj_nest_end(fmsg);
    559	if (err)
    560		return err;
    561	return devlink_fmsg_pair_nest_end(fmsg);
    562}
    563
    564static int
    565mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter,
    566		      struct devlink_fmsg *fmsg, void *priv_ctx,
    567		      struct netlink_ext_ack *extack)
    568{
    569	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
    570	int err;
    571
    572	err = mlx5_fw_tracer_trigger_core_dump_general(dev);
    573	if (err)
    574		return err;
    575
    576	if (priv_ctx) {
    577		struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
    578
    579		err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
    580		if (err)
    581			return err;
    582	}
    583
    584	err = mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg);
    585	if (err)
    586		return err;
    587	return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg);
    588}
    589
    590static void mlx5_fw_reporter_err_work(struct work_struct *work)
    591{
    592	struct mlx5_fw_reporter_ctx fw_reporter_ctx;
    593	struct mlx5_core_health *health;
    594
    595	health = container_of(work, struct mlx5_core_health, report_work);
    596
    597	if (IS_ERR_OR_NULL(health->fw_reporter))
    598		return;
    599
    600	fw_reporter_ctx.err_synd = health->synd;
    601	fw_reporter_ctx.miss_counter = health->miss_counter;
    602	if (fw_reporter_ctx.err_synd) {
    603		devlink_health_report(health->fw_reporter,
    604				      "FW syndrom reported", &fw_reporter_ctx);
    605		return;
    606	}
    607	if (fw_reporter_ctx.miss_counter)
    608		devlink_health_report(health->fw_reporter,
    609				      "FW miss counter reported",
    610				      &fw_reporter_ctx);
    611}
    612
    613static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
    614		.name = "fw",
    615		.diagnose = mlx5_fw_reporter_diagnose,
    616		.dump = mlx5_fw_reporter_dump,
    617};
    618
    619static int
    620mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
    621			       void *priv_ctx,
    622			       struct netlink_ext_ack *extack)
    623{
    624	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
    625
    626	return mlx5_health_try_recover(dev);
    627}
    628
    629static int
    630mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
    631			    struct devlink_fmsg *fmsg, void *priv_ctx,
    632			    struct netlink_ext_ack *extack)
    633{
    634	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
    635	u32 crdump_size = dev->priv.health.crdump_size;
    636	u32 *cr_data;
    637	int err;
    638
    639	if (!mlx5_core_is_pf(dev))
    640		return -EPERM;
    641
    642	cr_data = kvmalloc(crdump_size, GFP_KERNEL);
    643	if (!cr_data)
    644		return -ENOMEM;
    645	err = mlx5_crdump_collect(dev, cr_data);
    646	if (err)
    647		goto free_data;
    648
    649	if (priv_ctx) {
    650		struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
    651
    652		err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
    653		if (err)
    654			goto free_data;
    655	}
    656
    657	err = devlink_fmsg_binary_pair_put(fmsg, "crdump_data", cr_data, crdump_size);
    658
    659free_data:
    660	kvfree(cr_data);
    661	return err;
    662}
    663
    664static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
    665{
    666	struct mlx5_fw_reporter_ctx fw_reporter_ctx;
    667	struct mlx5_core_health *health;
    668	struct mlx5_core_dev *dev;
    669	struct mlx5_priv *priv;
    670
    671	health = container_of(work, struct mlx5_core_health, fatal_report_work);
    672	priv = container_of(health, struct mlx5_priv, health);
    673	dev = container_of(priv, struct mlx5_core_dev, priv);
    674
    675	enter_error_state(dev, false);
    676	if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
    677		if (mlx5_health_try_recover(dev))
    678			mlx5_core_err(dev, "health recovery failed\n");
    679		return;
    680	}
    681	fw_reporter_ctx.err_synd = health->synd;
    682	fw_reporter_ctx.miss_counter = health->miss_counter;
    683	if (devlink_health_report(health->fw_fatal_reporter,
    684				  "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) {
    685		/* If recovery wasn't performed, due to grace period,
    686		 * unload the driver. This ensures that the driver
    687		 * closes all its resources and it is not subjected to
    688		 * requests from the kernel.
    689		 */
    690		mlx5_core_err(dev, "Driver is in error state. Unloading\n");
    691		mlx5_unload_one(dev);
    692	}
    693}
    694
    695static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
    696		.name = "fw_fatal",
    697		.recover = mlx5_fw_fatal_reporter_recover,
    698		.dump = mlx5_fw_fatal_reporter_dump,
    699};
    700
    701#define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000
    702static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
    703{
    704	struct mlx5_core_health *health = &dev->priv.health;
    705	struct devlink *devlink = priv_to_devlink(dev);
    706
    707	health->fw_reporter =
    708		devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
    709					       0, dev);
    710	if (IS_ERR(health->fw_reporter))
    711		mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
    712			       PTR_ERR(health->fw_reporter));
    713
    714	health->fw_fatal_reporter =
    715		devlink_health_reporter_create(devlink,
    716					       &mlx5_fw_fatal_reporter_ops,
    717					       MLX5_REPORTER_FW_GRACEFUL_PERIOD,
    718					       dev);
    719	if (IS_ERR(health->fw_fatal_reporter))
    720		mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n",
    721			       PTR_ERR(health->fw_fatal_reporter));
    722}
    723
    724static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev)
    725{
    726	struct mlx5_core_health *health = &dev->priv.health;
    727
    728	if (!IS_ERR_OR_NULL(health->fw_reporter))
    729		devlink_health_reporter_destroy(health->fw_reporter);
    730
    731	if (!IS_ERR_OR_NULL(health->fw_fatal_reporter))
    732		devlink_health_reporter_destroy(health->fw_fatal_reporter);
    733}
    734
    735static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev)
    736{
    737	unsigned long next;
    738
    739	get_random_bytes(&next, sizeof(next));
    740	next %= HZ;
    741	next += jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL));
    742
    743	return next;
    744}
    745
    746void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
    747{
    748	struct mlx5_core_health *health = &dev->priv.health;
    749	unsigned long flags;
    750
    751	spin_lock_irqsave(&health->wq_lock, flags);
    752	if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
    753		queue_work(health->wq, &health->fatal_report_work);
    754	else
    755		mlx5_core_err(dev, "new health works are not permitted at this stage\n");
    756	spin_unlock_irqrestore(&health->wq_lock, flags);
    757}
    758
    759#define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60)
    760static void mlx5_health_log_ts_update(struct work_struct *work)
    761{
    762	struct delayed_work *dwork = to_delayed_work(work);
    763	u32 out[MLX5_ST_SZ_DW(mrtc_reg)] = {};
    764	u32 in[MLX5_ST_SZ_DW(mrtc_reg)] = {};
    765	struct mlx5_core_health *health;
    766	struct mlx5_core_dev *dev;
    767	struct mlx5_priv *priv;
    768	u64 now_us;
    769
    770	health = container_of(dwork, struct mlx5_core_health, update_fw_log_ts_work);
    771	priv = container_of(health, struct mlx5_priv, health);
    772	dev = container_of(priv, struct mlx5_core_dev, priv);
    773
    774	now_us =  ktime_to_us(ktime_get_real());
    775
    776	MLX5_SET(mrtc_reg, in, time_h, now_us >> 32);
    777	MLX5_SET(mrtc_reg, in, time_l, now_us & 0xFFFFFFFF);
    778	mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MRTC, 0, 1);
    779
    780	queue_delayed_work(health->wq, &health->update_fw_log_ts_work,
    781			   msecs_to_jiffies(MLX5_MSEC_PER_HOUR));
    782}
    783
    784static void poll_health(struct timer_list *t)
    785{
    786	struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
    787	struct mlx5_core_health *health = &dev->priv.health;
    788	struct health_buffer __iomem *h = health->health;
    789	u32 fatal_error;
    790	u8 prev_synd;
    791	u32 count;
    792
    793	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
    794		goto out;
    795
    796	fatal_error = mlx5_health_check_fatal_sensors(dev);
    797
    798	if (fatal_error && !health->fatal_error) {
    799		mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
    800		dev->priv.health.fatal_error = fatal_error;
    801		print_health_info(dev);
    802		dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
    803		mlx5_trigger_health_work(dev);
    804		return;
    805	}
    806
    807	count = ioread32be(health->health_counter);
    808	if (count == health->prev)
    809		++health->miss_counter;
    810	else
    811		health->miss_counter = 0;
    812
    813	health->prev = count;
    814	if (health->miss_counter == MAX_MISSES) {
    815		mlx5_core_err(dev, "device's health compromised - reached miss count\n");
    816		print_health_info(dev);
    817		queue_work(health->wq, &health->report_work);
    818	}
    819
    820	prev_synd = health->synd;
    821	health->synd = ioread8(&h->synd);
    822	if (health->synd && health->synd != prev_synd)
    823		queue_work(health->wq, &health->report_work);
    824
    825out:
    826	mod_timer(&health->timer, get_next_poll_jiffies(dev));
    827}
    828
    829void mlx5_start_health_poll(struct mlx5_core_dev *dev)
    830{
    831	u64 poll_interval_ms =  mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL);
    832	struct mlx5_core_health *health = &dev->priv.health;
    833
    834	timer_setup(&health->timer, poll_health, 0);
    835	health->fatal_error = MLX5_SENSOR_NO_ERR;
    836	clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
    837	health->health = &dev->iseg->health;
    838	health->health_counter = &dev->iseg->health_counter;
    839
    840	health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms);
    841	add_timer(&health->timer);
    842
    843	if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc))
    844		queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
    845}
    846
    847void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
    848{
    849	struct mlx5_core_health *health = &dev->priv.health;
    850	unsigned long flags;
    851
    852	if (disable_health) {
    853		spin_lock_irqsave(&health->wq_lock, flags);
    854		set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
    855		spin_unlock_irqrestore(&health->wq_lock, flags);
    856	}
    857
    858	del_timer_sync(&health->timer);
    859}
    860
    861void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
    862{
    863	struct mlx5_core_health *health = &dev->priv.health;
    864	unsigned long flags;
    865
    866	spin_lock_irqsave(&health->wq_lock, flags);
    867	set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
    868	spin_unlock_irqrestore(&health->wq_lock, flags);
    869	cancel_delayed_work_sync(&health->update_fw_log_ts_work);
    870	cancel_work_sync(&health->report_work);
    871	cancel_work_sync(&health->fatal_report_work);
    872}
    873
    874void mlx5_health_flush(struct mlx5_core_dev *dev)
    875{
    876	struct mlx5_core_health *health = &dev->priv.health;
    877
    878	flush_workqueue(health->wq);
    879}
    880
    881void mlx5_health_cleanup(struct mlx5_core_dev *dev)
    882{
    883	struct mlx5_core_health *health = &dev->priv.health;
    884
    885	cancel_delayed_work_sync(&health->update_fw_log_ts_work);
    886	destroy_workqueue(health->wq);
    887	mlx5_fw_reporters_destroy(dev);
    888}
    889
    890int mlx5_health_init(struct mlx5_core_dev *dev)
    891{
    892	struct mlx5_core_health *health;
    893	char *name;
    894
    895	mlx5_fw_reporters_create(dev);
    896
    897	health = &dev->priv.health;
    898	name = kmalloc(64, GFP_KERNEL);
    899	if (!name)
    900		goto out_err;
    901
    902	strcpy(name, "mlx5_health");
    903	strcat(name, dev_name(dev->device));
    904	health->wq = create_singlethread_workqueue(name);
    905	kfree(name);
    906	if (!health->wq)
    907		goto out_err;
    908	spin_lock_init(&health->wq_lock);
    909	INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work);
    910	INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work);
    911	INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update);
    912
    913	return 0;
    914
    915out_err:
    916	mlx5_fw_reporters_destroy(dev);
    917	return -ENOMEM;
    918}