cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

health.c (7222B)


      1// SPDX-License-Identifier: GPL-2.0
      2// Copyright (c) 2019 Mellanox Technologies.
      3
      4#include "health.h"
      5#include "lib/eq.h"
      6#include "lib/mlx5.h"
      7
      8int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
      9{
     10	int err;
     11
     12	err = devlink_fmsg_pair_nest_start(fmsg, name);
     13	if (err)
     14		return err;
     15
     16	err = devlink_fmsg_obj_nest_start(fmsg);
     17	if (err)
     18		return err;
     19
     20	return 0;
     21}
     22
     23int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg)
     24{
     25	int err;
     26
     27	err = devlink_fmsg_obj_nest_end(fmsg);
     28	if (err)
     29		return err;
     30
     31	err = devlink_fmsg_pair_nest_end(fmsg);
     32	if (err)
     33		return err;
     34
     35	return 0;
     36}
     37
     38int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
     39{
     40	u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {};
     41	u8 hw_status;
     42	void *cqc;
     43	int err;
     44
     45	err = mlx5_core_query_cq(cq->mdev, &cq->mcq, out);
     46	if (err)
     47		return err;
     48
     49	cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context);
     50	hw_status = MLX5_GET(cqc, cqc, status);
     51
     52	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
     53	if (err)
     54		return err;
     55
     56	err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn);
     57	if (err)
     58		return err;
     59
     60	err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status);
     61	if (err)
     62		return err;
     63
     64	err = devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq));
     65	if (err)
     66		return err;
     67
     68	err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq));
     69	if (err)
     70		return err;
     71
     72	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
     73	if (err)
     74		return err;
     75
     76	return 0;
     77}
     78
     79int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
     80{
     81	u8 cq_log_stride;
     82	u32 cq_sz;
     83	int err;
     84
     85	cq_sz = mlx5_cqwq_get_size(&cq->wq);
     86	cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq);
     87
     88	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
     89	if (err)
     90		return err;
     91
     92	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride));
     93	if (err)
     94		return err;
     95
     96	err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz);
     97	if (err)
     98		return err;
     99
    100	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
    101	if (err)
    102		return err;
    103
    104	return 0;
    105}
    106
    107int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg)
    108{
    109	int err;
    110
    111	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ");
    112	if (err)
    113		return err;
    114
    115	err = devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn);
    116	if (err)
    117		return err;
    118
    119	err = devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn);
    120	if (err)
    121		return err;
    122
    123	err = devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx);
    124	if (err)
    125		return err;
    126
    127	err = devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index);
    128	if (err)
    129		return err;
    130
    131	err = devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core));
    132	if (err)
    133		return err;
    134
    135	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
    136}
    137
    138void mlx5e_health_create_reporters(struct mlx5e_priv *priv)
    139{
    140	mlx5e_reporter_tx_create(priv);
    141	mlx5e_reporter_rx_create(priv);
    142}
    143
    144void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
    145{
    146	mlx5e_reporter_rx_destroy(priv);
    147	mlx5e_reporter_tx_destroy(priv);
    148}
    149
    150void mlx5e_health_channels_update(struct mlx5e_priv *priv)
    151{
    152	if (priv->tx_reporter)
    153		devlink_health_reporter_state_update(priv->tx_reporter,
    154						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
    155	if (priv->rx_reporter)
    156		devlink_health_reporter_state_update(priv->rx_reporter,
    157						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
    158}
    159
    160int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn)
    161{
    162	struct mlx5e_modify_sq_param msp = {};
    163	int err;
    164
    165	msp.curr_state = MLX5_SQC_STATE_ERR;
    166	msp.next_state = MLX5_SQC_STATE_RST;
    167
    168	err = mlx5e_modify_sq(mdev, sqn, &msp);
    169	if (err) {
    170		netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn);
    171		return err;
    172	}
    173
    174	memset(&msp, 0, sizeof(msp));
    175	msp.curr_state = MLX5_SQC_STATE_RST;
    176	msp.next_state = MLX5_SQC_STATE_RDY;
    177
    178	err = mlx5e_modify_sq(mdev, sqn, &msp);
    179	if (err) {
    180		netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn);
    181		return err;
    182	}
    183
    184	return 0;
    185}
    186
    187int mlx5e_health_recover_channels(struct mlx5e_priv *priv)
    188{
    189	int err = 0;
    190
    191	rtnl_lock();
    192	mutex_lock(&priv->state_lock);
    193
    194	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
    195		goto out;
    196
    197	err = mlx5e_safe_reopen_channels(priv);
    198
    199out:
    200	mutex_unlock(&priv->state_lock);
    201	rtnl_unlock();
    202
    203	return err;
    204}
    205
    206int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq,
    207				    struct mlx5e_ch_stats *stats)
    208{
    209	u32 eqe_count;
    210
    211	netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
    212		   eq->core.eqn, eq->core.cons_index, eq->core.irqn);
    213
    214	eqe_count = mlx5_eq_poll_irq_disabled(eq);
    215	if (!eqe_count)
    216		return -EIO;
    217
    218	netdev_err(dev, "Recovered %d eqes on EQ 0x%x\n",
    219		   eqe_count, eq->core.eqn);
    220
    221	stats->eq_rearm++;
    222	return 0;
    223}
    224
    225int mlx5e_health_report(struct mlx5e_priv *priv,
    226			struct devlink_health_reporter *reporter, char *err_str,
    227			struct mlx5e_err_ctx *err_ctx)
    228{
    229	netdev_err(priv->netdev, "%s\n", err_str);
    230
    231	if (!reporter)
    232		return err_ctx->recover(err_ctx->ctx);
    233
    234	return devlink_health_report(reporter, err_str, err_ctx);
    235}
    236
    237#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024
    238static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
    239					const void *value, u32 value_len)
    240
    241{
    242	u32 data_size;
    243	int err = 0;
    244	u32 offset;
    245
    246	for (offset = 0; offset < value_len; offset += data_size) {
    247		data_size = value_len - offset;
    248		if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE)
    249			data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE;
    250		err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
    251		if (err)
    252			break;
    253	}
    254	return err;
    255}
    256
    257int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
    258			       struct devlink_fmsg *fmsg)
    259{
    260	struct mlx5_core_dev *mdev = priv->mdev;
    261	struct mlx5_rsc_dump_cmd *cmd;
    262	struct page *page;
    263	int cmd_err, err;
    264	int end_err;
    265	int size;
    266
    267	if (IS_ERR_OR_NULL(mdev->rsc_dump))
    268		return -EOPNOTSUPP;
    269
    270	page = alloc_page(GFP_KERNEL);
    271	if (!page)
    272		return -ENOMEM;
    273
    274	err = devlink_fmsg_binary_pair_nest_start(fmsg, "data");
    275	if (err)
    276		goto free_page;
    277
    278	cmd = mlx5_rsc_dump_cmd_create(mdev, key);
    279	if (IS_ERR(cmd)) {
    280		err = PTR_ERR(cmd);
    281		goto free_page;
    282	}
    283
    284	do {
    285		cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
    286		if (cmd_err < 0) {
    287			err = cmd_err;
    288			goto destroy_cmd;
    289		}
    290
    291		err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size);
    292		if (err)
    293			goto destroy_cmd;
    294
    295	} while (cmd_err > 0);
    296
    297destroy_cmd:
    298	mlx5_rsc_dump_cmd_destroy(cmd);
    299	end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
    300	if (end_err)
    301		err = end_err;
    302free_page:
    303	__free_page(page);
    304	return err;
    305}
    306
    307int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
    308			    int queue_idx, char *lbl)
    309{
    310	struct mlx5_rsc_key key = {};
    311	int err;
    312
    313	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
    314	key.index1 = queue_idx;
    315	key.size = PAGE_SIZE;
    316	key.num_of_obj1 = 1;
    317
    318	err = devlink_fmsg_obj_nest_start(fmsg);
    319	if (err)
    320		return err;
    321
    322	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl);
    323	if (err)
    324		return err;
    325
    326	err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx);
    327	if (err)
    328		return err;
    329
    330	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
    331	if (err)
    332		return err;
    333
    334	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
    335	if (err)
    336		return err;
    337
    338	return devlink_fmsg_obj_nest_end(fmsg);
    339}