cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

events.c (14762B)


      1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
      2// Copyright (c) 2018 Mellanox Technologies
      3
      4#include <linux/mlx5/driver.h>
      5
      6#include "mlx5_core.h"
      7#include "lib/eq.h"
      8#include "lib/mlx5.h"
      9
     10struct mlx5_event_nb {
     11	struct mlx5_nb  nb;
     12	void           *ctx;
     13};
     14
     15/* General events handlers for the low level mlx5_core driver
     16 *
     17 * Other Major feature specific events such as
     18 * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
     19 * separate notifiers callbacks, specifically by those mlx5 components.
     20 */
     21static int any_notifier(struct notifier_block *, unsigned long, void *);
     22static int temp_warn(struct notifier_block *, unsigned long, void *);
     23static int port_module(struct notifier_block *, unsigned long, void *);
     24static int pcie_core(struct notifier_block *, unsigned long, void *);
     25
     26/* handler which forwards the event to events->fw_nh, driver notifiers */
     27static int forward_event(struct notifier_block *, unsigned long, void *);
     28
     29static struct mlx5_nb events_nbs_ref[] = {
     30	/* Events to be processed by mlx5_core */
     31	{.nb.notifier_call = any_notifier,  .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
     32	{.nb.notifier_call = temp_warn,     .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
     33	{.nb.notifier_call = port_module,   .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
     34	{.nb.notifier_call = pcie_core,     .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
     35
     36	/* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
     37	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
     38	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
     39	/* QP/WQ resource events to forward */
     40	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
     41	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG },
     42	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_COMM_EST },
     43	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
     44	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
     45	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
     46	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
     47	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
     48	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
     49	/* SRQ events */
     50	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
     51	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
     52};
     53
     54struct mlx5_events {
     55	struct mlx5_core_dev *dev;
     56	struct workqueue_struct *wq;
     57	struct mlx5_event_nb  notifiers[ARRAY_SIZE(events_nbs_ref)];
     58	/* driver notifier chain for fw events */
     59	struct atomic_notifier_head fw_nh;
     60	/* port module events stats */
     61	struct mlx5_pme_stats pme_stats;
     62	/*pcie_core*/
     63	struct work_struct pcie_core_work;
     64	/* driver notifier chain for sw events */
     65	struct blocking_notifier_head sw_nh;
     66};
     67
     68static const char *eqe_type_str(u8 type)
     69{
     70	switch (type) {
     71	case MLX5_EVENT_TYPE_COMP:
     72		return "MLX5_EVENT_TYPE_COMP";
     73	case MLX5_EVENT_TYPE_PATH_MIG:
     74		return "MLX5_EVENT_TYPE_PATH_MIG";
     75	case MLX5_EVENT_TYPE_COMM_EST:
     76		return "MLX5_EVENT_TYPE_COMM_EST";
     77	case MLX5_EVENT_TYPE_SQ_DRAINED:
     78		return "MLX5_EVENT_TYPE_SQ_DRAINED";
     79	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
     80		return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
     81	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
     82		return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
     83	case MLX5_EVENT_TYPE_CQ_ERROR:
     84		return "MLX5_EVENT_TYPE_CQ_ERROR";
     85	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
     86		return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
     87	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
     88		return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
     89	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
     90		return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
     91	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
     92		return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
     93	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
     94		return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
     95	case MLX5_EVENT_TYPE_INTERNAL_ERROR:
     96		return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
     97	case MLX5_EVENT_TYPE_PORT_CHANGE:
     98		return "MLX5_EVENT_TYPE_PORT_CHANGE";
     99	case MLX5_EVENT_TYPE_GPIO_EVENT:
    100		return "MLX5_EVENT_TYPE_GPIO_EVENT";
    101	case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
    102		return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
    103	case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
    104		return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
    105	case MLX5_EVENT_TYPE_REMOTE_CONFIG:
    106		return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
    107	case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
    108		return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
    109	case MLX5_EVENT_TYPE_STALL_EVENT:
    110		return "MLX5_EVENT_TYPE_STALL_EVENT";
    111	case MLX5_EVENT_TYPE_CMD:
    112		return "MLX5_EVENT_TYPE_CMD";
    113	case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED:
    114		return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
    115	case MLX5_EVENT_TYPE_VHCA_STATE_CHANGE:
    116		return "MLX5_EVENT_TYPE_VHCA_STATE_CHANGE";
    117	case MLX5_EVENT_TYPE_PAGE_REQUEST:
    118		return "MLX5_EVENT_TYPE_PAGE_REQUEST";
    119	case MLX5_EVENT_TYPE_PAGE_FAULT:
    120		return "MLX5_EVENT_TYPE_PAGE_FAULT";
    121	case MLX5_EVENT_TYPE_PPS_EVENT:
    122		return "MLX5_EVENT_TYPE_PPS_EVENT";
    123	case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
    124		return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
    125	case MLX5_EVENT_TYPE_FPGA_ERROR:
    126		return "MLX5_EVENT_TYPE_FPGA_ERROR";
    127	case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
    128		return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
    129	case MLX5_EVENT_TYPE_GENERAL_EVENT:
    130		return "MLX5_EVENT_TYPE_GENERAL_EVENT";
    131	case MLX5_EVENT_TYPE_MONITOR_COUNTER:
    132		return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
    133	case MLX5_EVENT_TYPE_DEVICE_TRACER:
    134		return "MLX5_EVENT_TYPE_DEVICE_TRACER";
    135	default:
    136		return "Unrecognized event";
    137	}
    138}
    139
    140/* handles all FW events, type == eqe->type */
    141static int any_notifier(struct notifier_block *nb,
    142			unsigned long type, void *data)
    143{
    144	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
    145	struct mlx5_events   *events   = event_nb->ctx;
    146	struct mlx5_eqe      *eqe      = data;
    147
    148	mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
    149		      eqe_type_str(eqe->type), eqe->sub_type);
    150	return NOTIFY_OK;
    151}
    152
    153/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
    154static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
    155{
    156	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
    157	struct mlx5_events   *events   = event_nb->ctx;
    158	struct mlx5_eqe      *eqe      = data;
    159	u64 value_lsb;
    160	u64 value_msb;
    161
    162	value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
    163	value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
    164
    165	mlx5_core_warn(events->dev,
    166		       "High temperature on sensors with bit set %llx %llx",
    167		       value_msb, value_lsb);
    168
    169	return NOTIFY_OK;
    170}
    171
    172/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
    173static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
    174{
    175	switch (status) {
    176	case MLX5_MODULE_STATUS_PLUGGED:
    177		return "Cable plugged";
    178	case MLX5_MODULE_STATUS_UNPLUGGED:
    179		return "Cable unplugged";
    180	case MLX5_MODULE_STATUS_ERROR:
    181		return "Cable error";
    182	case MLX5_MODULE_STATUS_DISABLED:
    183		return "Cable disabled";
    184	default:
    185		return "Unknown status";
    186	}
    187}
    188
    189static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
    190{
    191	switch (error) {
    192	case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
    193		return "Power budget exceeded";
    194	case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
    195		return "Long Range for non MLNX cable";
    196	case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
    197		return "Bus stuck (I2C or data shorted)";
    198	case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
    199		return "No EEPROM/retry timeout";
    200	case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
    201		return "Enforce part number list";
    202	case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
    203		return "Unknown identifier";
    204	case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
    205		return "High Temperature";
    206	case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
    207		return "Bad or shorted cable/module";
    208	case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
    209		return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
    210	default:
    211		return "Unknown error";
    212	}
    213}
    214
    215/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
    216static int port_module(struct notifier_block *nb, unsigned long type, void *data)
    217{
    218	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
    219	struct mlx5_events   *events   = event_nb->ctx;
    220	struct mlx5_eqe      *eqe      = data;
    221
    222	enum port_module_event_status_type module_status;
    223	enum port_module_event_error_type error_type;
    224	struct mlx5_eqe_port_module *module_event_eqe;
    225	const char *status_str;
    226	u8 module_num;
    227
    228	module_event_eqe = &eqe->data.port_module;
    229	module_status = module_event_eqe->module_status &
    230			PORT_MODULE_EVENT_MODULE_STATUS_MASK;
    231	error_type = module_event_eqe->error_type &
    232		     PORT_MODULE_EVENT_ERROR_TYPE_MASK;
    233
    234	if (module_status < MLX5_MODULE_STATUS_NUM)
    235		events->pme_stats.status_counters[module_status]++;
    236
    237	if (module_status == MLX5_MODULE_STATUS_ERROR)
    238		if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
    239			events->pme_stats.error_counters[error_type]++;
    240
    241	if (!printk_ratelimit())
    242		return NOTIFY_OK;
    243
    244	module_num = module_event_eqe->module;
    245	status_str = mlx5_pme_status_to_string(module_status);
    246	if (module_status == MLX5_MODULE_STATUS_ERROR) {
    247		const char *error_str = mlx5_pme_error_to_string(error_type);
    248
    249		mlx5_core_err(events->dev,
    250			      "Port module event[error]: module %u, %s, %s\n",
    251			      module_num, status_str, error_str);
    252	} else {
    253		mlx5_core_info(events->dev,
    254			       "Port module event: module %u, %s\n",
    255			       module_num, status_str);
    256	}
    257
    258	return NOTIFY_OK;
    259}
    260
    261enum {
    262	MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0,
    263	MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1,
    264	MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2,
    265};
    266
    267static void mlx5_pcie_event(struct work_struct *work)
    268{
    269	u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0};
    270	u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0};
    271	struct mlx5_events *events;
    272	struct mlx5_core_dev *dev;
    273	u8 power_status;
    274	u16 pci_power;
    275
    276	events = container_of(work, struct mlx5_events, pcie_core_work);
    277	dev  = events->dev;
    278
    279	if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power))
    280		return;
    281
    282	mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
    283			     MLX5_REG_MPEIN, 0, 0);
    284	power_status = MLX5_GET(mpein_reg, out, pwr_status);
    285	pci_power = MLX5_GET(mpein_reg, out, pci_power);
    286
    287	switch (power_status) {
    288	case MLX5_PCI_POWER_COULD_NOT_BE_READ:
    289		mlx5_core_info_rl(dev,
    290				  "PCIe slot power capability was not advertised.\n");
    291		break;
    292	case MLX5_PCI_POWER_INSUFFICIENT_REPORTED:
    293		mlx5_core_warn_rl(dev,
    294				  "Detected insufficient power on the PCIe slot (%uW).\n",
    295				  pci_power);
    296		break;
    297	case MLX5_PCI_POWER_SUFFICIENT_REPORTED:
    298		mlx5_core_info_rl(dev,
    299				  "PCIe slot advertised sufficient power (%uW).\n",
    300				  pci_power);
    301		break;
    302	}
    303}
    304
    305static int pcie_core(struct notifier_block *nb, unsigned long type, void *data)
    306{
    307	struct mlx5_event_nb    *event_nb = mlx5_nb_cof(nb,
    308							struct mlx5_event_nb,
    309							nb);
    310	struct mlx5_events      *events   = event_nb->ctx;
    311	struct mlx5_eqe         *eqe      = data;
    312
    313	switch (eqe->sub_type) {
    314	case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT:
    315			queue_work(events->wq, &events->pcie_core_work);
    316		break;
    317	default:
    318		return NOTIFY_DONE;
    319	}
    320
    321	return NOTIFY_OK;
    322}
    323
    324void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
    325{
    326	*stats = dev->priv.events->pme_stats;
    327}
    328
    329/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
    330static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
    331{
    332	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
    333	struct mlx5_events   *events   = event_nb->ctx;
    334	struct mlx5_eqe      *eqe      = data;
    335
    336	mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
    337		      eqe_type_str(eqe->type), eqe->sub_type);
    338	atomic_notifier_call_chain(&events->fw_nh, event, data);
    339	return NOTIFY_OK;
    340}
    341
    342int mlx5_events_init(struct mlx5_core_dev *dev)
    343{
    344	struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
    345
    346	if (!events)
    347		return -ENOMEM;
    348
    349	ATOMIC_INIT_NOTIFIER_HEAD(&events->fw_nh);
    350	events->dev = dev;
    351	dev->priv.events = events;
    352	events->wq = create_singlethread_workqueue("mlx5_events");
    353	if (!events->wq) {
    354		kfree(events);
    355		return -ENOMEM;
    356	}
    357	INIT_WORK(&events->pcie_core_work, mlx5_pcie_event);
    358	BLOCKING_INIT_NOTIFIER_HEAD(&events->sw_nh);
    359
    360	return 0;
    361}
    362
    363void mlx5_events_cleanup(struct mlx5_core_dev *dev)
    364{
    365	destroy_workqueue(dev->priv.events->wq);
    366	kvfree(dev->priv.events);
    367}
    368
    369void mlx5_events_start(struct mlx5_core_dev *dev)
    370{
    371	struct mlx5_events *events = dev->priv.events;
    372	int i;
    373
    374	for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
    375		events->notifiers[i].nb  = events_nbs_ref[i];
    376		events->notifiers[i].ctx = events;
    377		mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
    378	}
    379}
    380
    381void mlx5_events_stop(struct mlx5_core_dev *dev)
    382{
    383	struct mlx5_events *events = dev->priv.events;
    384	int i;
    385
    386	for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
    387		mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
    388	flush_workqueue(events->wq);
    389}
    390
    391/* This API is used only for processing and forwarding firmware
    392 * events to mlx5 consumer.
    393 */
    394int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
    395{
    396	struct mlx5_events *events = dev->priv.events;
    397
    398	return atomic_notifier_chain_register(&events->fw_nh, nb);
    399}
    400EXPORT_SYMBOL(mlx5_notifier_register);
    401
    402int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
    403{
    404	struct mlx5_events *events = dev->priv.events;
    405
    406	return atomic_notifier_chain_unregister(&events->fw_nh, nb);
    407}
    408EXPORT_SYMBOL(mlx5_notifier_unregister);
    409
    410int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
    411{
    412	return atomic_notifier_call_chain(&events->fw_nh, event, data);
    413}
    414
    415/* This API is used only for processing and forwarding driver-specific
    416 * events to mlx5 consumers.
    417 */
    418int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
    419{
    420	struct mlx5_events *events = dev->priv.events;
    421
    422	return blocking_notifier_chain_register(&events->sw_nh, nb);
    423}
    424
    425int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
    426{
    427	struct mlx5_events *events = dev->priv.events;
    428
    429	return blocking_notifier_chain_unregister(&events->sw_nh, nb);
    430}
    431
    432int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event,
    433				      void *data)
    434{
    435	struct mlx5_events *events = dev->priv.events;
    436
    437	return blocking_notifier_call_chain(&events->sw_nh, event, data);
    438}
    439
    440void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work)
    441{
    442	queue_work(dev->priv.events->wq, work);
    443}