health.c (7222B)
1// SPDX-License-Identifier: GPL-2.0 2// Copyright (c) 2019 Mellanox Technologies. 3 4#include "health.h" 5#include "lib/eq.h" 6#include "lib/mlx5.h" 7 8int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) 9{ 10 int err; 11 12 err = devlink_fmsg_pair_nest_start(fmsg, name); 13 if (err) 14 return err; 15 16 err = devlink_fmsg_obj_nest_start(fmsg); 17 if (err) 18 return err; 19 20 return 0; 21} 22 23int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg) 24{ 25 int err; 26 27 err = devlink_fmsg_obj_nest_end(fmsg); 28 if (err) 29 return err; 30 31 err = devlink_fmsg_pair_nest_end(fmsg); 32 if (err) 33 return err; 34 35 return 0; 36} 37 38int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) 39{ 40 u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {}; 41 u8 hw_status; 42 void *cqc; 43 int err; 44 45 err = mlx5_core_query_cq(cq->mdev, &cq->mcq, out); 46 if (err) 47 return err; 48 49 cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context); 50 hw_status = MLX5_GET(cqc, cqc, status); 51 52 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); 53 if (err) 54 return err; 55 56 err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn); 57 if (err) 58 return err; 59 60 err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status); 61 if (err) 62 return err; 63 64 err = devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq)); 65 if (err) 66 return err; 67 68 err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq)); 69 if (err) 70 return err; 71 72 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 73 if (err) 74 return err; 75 76 return 0; 77} 78 79int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) 80{ 81 u8 cq_log_stride; 82 u32 cq_sz; 83 int err; 84 85 cq_sz = mlx5_cqwq_get_size(&cq->wq); 86 cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq); 87 88 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); 89 if (err) 90 return err; 91 92 err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride)); 93 if (err) 94 return err; 95 96 err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz); 97 if (err) 98 return err; 99 100 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 101 if (err) 102 return err; 103 104 return 0; 105} 106 107int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg) 108{ 109 int err; 110 111 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ"); 112 if (err) 113 return err; 114 115 err = devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn); 116 if (err) 117 return err; 118 119 err = devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn); 120 if (err) 121 return err; 122 123 err = devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx); 124 if (err) 125 return err; 126 127 err = devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index); 128 if (err) 129 return err; 130 131 err = devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core)); 132 if (err) 133 return err; 134 135 return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 136} 137 138void mlx5e_health_create_reporters(struct mlx5e_priv *priv) 139{ 140 mlx5e_reporter_tx_create(priv); 141 mlx5e_reporter_rx_create(priv); 142} 143 144void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv) 145{ 146 mlx5e_reporter_rx_destroy(priv); 147 mlx5e_reporter_tx_destroy(priv); 148} 149 150void mlx5e_health_channels_update(struct mlx5e_priv *priv) 151{ 152 if (priv->tx_reporter) 153 devlink_health_reporter_state_update(priv->tx_reporter, 154 DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); 155 if (priv->rx_reporter) 156 devlink_health_reporter_state_update(priv->rx_reporter, 157 DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); 158} 159 160int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn) 161{ 162 struct mlx5e_modify_sq_param msp = {}; 163 int err; 164 165 msp.curr_state = MLX5_SQC_STATE_ERR; 166 msp.next_state = MLX5_SQC_STATE_RST; 167 168 err = mlx5e_modify_sq(mdev, sqn, &msp); 169 if (err) { 170 netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn); 171 return err; 172 } 173 174 memset(&msp, 0, sizeof(msp)); 175 msp.curr_state = MLX5_SQC_STATE_RST; 176 msp.next_state = MLX5_SQC_STATE_RDY; 177 178 err = mlx5e_modify_sq(mdev, sqn, &msp); 179 if (err) { 180 netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn); 181 return err; 182 } 183 184 return 0; 185} 186 187int mlx5e_health_recover_channels(struct mlx5e_priv *priv) 188{ 189 int err = 0; 190 191 rtnl_lock(); 192 mutex_lock(&priv->state_lock); 193 194 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 195 goto out; 196 197 err = mlx5e_safe_reopen_channels(priv); 198 199out: 200 mutex_unlock(&priv->state_lock); 201 rtnl_unlock(); 202 203 return err; 204} 205 206int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq, 207 struct mlx5e_ch_stats *stats) 208{ 209 u32 eqe_count; 210 211 netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", 212 eq->core.eqn, eq->core.cons_index, eq->core.irqn); 213 214 eqe_count = mlx5_eq_poll_irq_disabled(eq); 215 if (!eqe_count) 216 return -EIO; 217 218 netdev_err(dev, "Recovered %d eqes on EQ 0x%x\n", 219 eqe_count, eq->core.eqn); 220 221 stats->eq_rearm++; 222 return 0; 223} 224 225int mlx5e_health_report(struct mlx5e_priv *priv, 226 struct devlink_health_reporter *reporter, char *err_str, 227 struct mlx5e_err_ctx *err_ctx) 228{ 229 netdev_err(priv->netdev, "%s\n", err_str); 230 231 if (!reporter) 232 return err_ctx->recover(err_ctx->ctx); 233 234 return devlink_health_report(reporter, err_str, err_ctx); 235} 236 237#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024 238static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg, 239 const void *value, u32 value_len) 240 241{ 242 u32 data_size; 243 int err = 0; 244 u32 offset; 245 246 for (offset = 0; offset < value_len; offset += data_size) { 247 data_size = value_len - offset; 248 if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE) 249 data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE; 250 err = devlink_fmsg_binary_put(fmsg, value + offset, data_size); 251 if (err) 252 break; 253 } 254 return err; 255} 256 257int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key, 258 struct devlink_fmsg *fmsg) 259{ 260 struct mlx5_core_dev *mdev = priv->mdev; 261 struct mlx5_rsc_dump_cmd *cmd; 262 struct page *page; 263 int cmd_err, err; 264 int end_err; 265 int size; 266 267 if (IS_ERR_OR_NULL(mdev->rsc_dump)) 268 return -EOPNOTSUPP; 269 270 page = alloc_page(GFP_KERNEL); 271 if (!page) 272 return -ENOMEM; 273 274 err = devlink_fmsg_binary_pair_nest_start(fmsg, "data"); 275 if (err) 276 goto free_page; 277 278 cmd = mlx5_rsc_dump_cmd_create(mdev, key); 279 if (IS_ERR(cmd)) { 280 err = PTR_ERR(cmd); 281 goto free_page; 282 } 283 284 do { 285 cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size); 286 if (cmd_err < 0) { 287 err = cmd_err; 288 goto destroy_cmd; 289 } 290 291 err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size); 292 if (err) 293 goto destroy_cmd; 294 295 } while (cmd_err > 0); 296 297destroy_cmd: 298 mlx5_rsc_dump_cmd_destroy(cmd); 299 end_err = devlink_fmsg_binary_pair_nest_end(fmsg); 300 if (end_err) 301 err = end_err; 302free_page: 303 __free_page(page); 304 return err; 305} 306 307int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 308 int queue_idx, char *lbl) 309{ 310 struct mlx5_rsc_key key = {}; 311 int err; 312 313 key.rsc = MLX5_SGMT_TYPE_FULL_QPC; 314 key.index1 = queue_idx; 315 key.size = PAGE_SIZE; 316 key.num_of_obj1 = 1; 317 318 err = devlink_fmsg_obj_nest_start(fmsg); 319 if (err) 320 return err; 321 322 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl); 323 if (err) 324 return err; 325 326 err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx); 327 if (err) 328 return err; 329 330 err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 331 if (err) 332 return err; 333 334 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 335 if (err) 336 return err; 337 338 return devlink_fmsg_obj_nest_end(fmsg); 339}