cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

counters.c (25339B)


      1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
      2/*
      3 * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
      4 */
      5
      6#include "mlx5_ib.h"
      7#include <linux/mlx5/eswitch.h>
      8#include "counters.h"
      9#include "ib_rep.h"
     10#include "qp.h"
     11
     12struct mlx5_ib_counter {
     13	const char *name;
     14	size_t offset;
     15	u32 type;
     16};
     17
     18#define INIT_Q_COUNTER(_name)		\
     19	{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
     20
     21static const struct mlx5_ib_counter basic_q_cnts[] = {
     22	INIT_Q_COUNTER(rx_write_requests),
     23	INIT_Q_COUNTER(rx_read_requests),
     24	INIT_Q_COUNTER(rx_atomic_requests),
     25	INIT_Q_COUNTER(out_of_buffer),
     26};
     27
     28static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
     29	INIT_Q_COUNTER(out_of_sequence),
     30};
     31
     32static const struct mlx5_ib_counter retrans_q_cnts[] = {
     33	INIT_Q_COUNTER(duplicate_request),
     34	INIT_Q_COUNTER(rnr_nak_retry_err),
     35	INIT_Q_COUNTER(packet_seq_err),
     36	INIT_Q_COUNTER(implied_nak_seq_err),
     37	INIT_Q_COUNTER(local_ack_timeout_err),
     38};
     39
     40#define INIT_CONG_COUNTER(_name)		\
     41	{ .name = #_name, .offset =	\
     42		MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
     43
     44static const struct mlx5_ib_counter cong_cnts[] = {
     45	INIT_CONG_COUNTER(rp_cnp_ignored),
     46	INIT_CONG_COUNTER(rp_cnp_handled),
     47	INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
     48	INIT_CONG_COUNTER(np_cnp_sent),
     49};
     50
     51static const struct mlx5_ib_counter extended_err_cnts[] = {
     52	INIT_Q_COUNTER(resp_local_length_error),
     53	INIT_Q_COUNTER(resp_cqe_error),
     54	INIT_Q_COUNTER(req_cqe_error),
     55	INIT_Q_COUNTER(req_remote_invalid_request),
     56	INIT_Q_COUNTER(req_remote_access_errors),
     57	INIT_Q_COUNTER(resp_remote_access_errors),
     58	INIT_Q_COUNTER(resp_cqe_flush_error),
     59	INIT_Q_COUNTER(req_cqe_flush_error),
     60};
     61
     62static const struct mlx5_ib_counter roce_accl_cnts[] = {
     63	INIT_Q_COUNTER(roce_adp_retrans),
     64	INIT_Q_COUNTER(roce_adp_retrans_to),
     65	INIT_Q_COUNTER(roce_slow_restart),
     66	INIT_Q_COUNTER(roce_slow_restart_cnps),
     67	INIT_Q_COUNTER(roce_slow_restart_trans),
     68};
     69
     70#define INIT_EXT_PPCNT_COUNTER(_name)		\
     71	{ .name = #_name, .offset =	\
     72	MLX5_BYTE_OFF(ppcnt_reg, \
     73		      counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
     74
     75static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
     76	INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
     77};
     78
     79#define INIT_OP_COUNTER(_name, _type)		\
     80	{ .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type}
     81
     82static const struct mlx5_ib_counter basic_op_cnts[] = {
     83	INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS),
     84};
     85
     86static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = {
     87	INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS),
     88};
     89
     90static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
     91	INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
     92};
     93
     94static int mlx5_ib_read_counters(struct ib_counters *counters,
     95				 struct ib_counters_read_attr *read_attr,
     96				 struct uverbs_attr_bundle *attrs)
     97{
     98	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
     99	struct mlx5_read_counters_attr mread_attr = {};
    100	struct mlx5_ib_flow_counters_desc *desc;
    101	int ret, i;
    102
    103	mutex_lock(&mcounters->mcntrs_mutex);
    104	if (mcounters->cntrs_max_index > read_attr->ncounters) {
    105		ret = -EINVAL;
    106		goto err_bound;
    107	}
    108
    109	mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
    110				 GFP_KERNEL);
    111	if (!mread_attr.out) {
    112		ret = -ENOMEM;
    113		goto err_bound;
    114	}
    115
    116	mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
    117	mread_attr.flags = read_attr->flags;
    118	ret = mcounters->read_counters(counters->device, &mread_attr);
    119	if (ret)
    120		goto err_read;
    121
    122	/* do the pass over the counters data array to assign according to the
    123	 * descriptions and indexing pairs
    124	 */
    125	desc = mcounters->counters_data;
    126	for (i = 0; i < mcounters->ncounters; i++)
    127		read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
    128
    129err_read:
    130	kfree(mread_attr.out);
    131err_bound:
    132	mutex_unlock(&mcounters->mcntrs_mutex);
    133	return ret;
    134}
    135
    136static int mlx5_ib_destroy_counters(struct ib_counters *counters)
    137{
    138	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
    139
    140	mlx5_ib_counters_clear_description(counters);
    141	if (mcounters->hw_cntrs_hndl)
    142		mlx5_fc_destroy(to_mdev(counters->device)->mdev,
    143				mcounters->hw_cntrs_hndl);
    144	return 0;
    145}
    146
    147static int mlx5_ib_create_counters(struct ib_counters *counters,
    148				   struct uverbs_attr_bundle *attrs)
    149{
    150	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
    151
    152	mutex_init(&mcounters->mcntrs_mutex);
    153	return 0;
    154}
    155
    156
    157static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
    158						   u32 port_num)
    159{
    160	return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
    161						   &dev->port[port_num].cnts;
    162}
    163
    164/**
    165 * mlx5_ib_get_counters_id - Returns counters id to use for device+port
    166 * @dev:	Pointer to mlx5 IB device
    167 * @port_num:	Zero based port number
    168 *
    169 * mlx5_ib_get_counters_id() Returns counters set id to use for given
    170 * device port combination in switchdev and non switchdev mode of the
    171 * parent device.
    172 */
    173u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
    174{
    175	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
    176
    177	return cnts->set_id;
    178}
    179
    180static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts)
    181{
    182	struct rdma_hw_stats *stats;
    183	u32 num_hw_counters;
    184	int i;
    185
    186	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
    187			  cnts->num_ext_ppcnt_counters;
    188	stats = rdma_alloc_hw_stats_struct(cnts->descs,
    189					   num_hw_counters +
    190					   cnts->num_op_counters,
    191					   RDMA_HW_STATS_DEFAULT_LIFESPAN);
    192	if (!stats)
    193		return NULL;
    194
    195	for (i = 0; i < cnts->num_op_counters; i++)
    196		set_bit(num_hw_counters + i, stats->is_disabled);
    197
    198	return stats;
    199}
    200
    201static struct rdma_hw_stats *
    202mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
    203{
    204	struct mlx5_ib_dev *dev = to_mdev(ibdev);
    205	const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
    206
    207	return do_alloc_stats(cnts);
    208}
    209
    210static struct rdma_hw_stats *
    211mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
    212{
    213	struct mlx5_ib_dev *dev = to_mdev(ibdev);
    214	const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts;
    215
    216	return do_alloc_stats(cnts);
    217}
    218
    219static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
    220				    const struct mlx5_ib_counters *cnts,
    221				    struct rdma_hw_stats *stats,
    222				    u16 set_id)
    223{
    224	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
    225	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
    226	__be32 val;
    227	int ret, i;
    228
    229	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
    230	MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
    231	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
    232	if (ret)
    233		return ret;
    234
    235	for (i = 0; i < cnts->num_q_counters; i++) {
    236		val = *(__be32 *)((void *)out + cnts->offsets[i]);
    237		stats->value[i] = (u64)be32_to_cpu(val);
    238	}
    239
    240	return 0;
    241}
    242
    243static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
    244					    const struct mlx5_ib_counters *cnts,
    245					    struct rdma_hw_stats *stats)
    246{
    247	int offset = cnts->num_q_counters + cnts->num_cong_counters;
    248	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
    249	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
    250	int ret, i;
    251	void *out;
    252
    253	out = kvzalloc(sz, GFP_KERNEL);
    254	if (!out)
    255		return -ENOMEM;
    256
    257	MLX5_SET(ppcnt_reg, in, local_port, 1);
    258	MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
    259	ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
    260				   0, 0);
    261	if (ret)
    262		goto free;
    263
    264	for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
    265		stats->value[i + offset] =
    266			be64_to_cpup((__be64 *)(out +
    267				    cnts->offsets[i + offset]));
    268free:
    269	kvfree(out);
    270	return ret;
    271}
    272
    273static int do_get_hw_stats(struct ib_device *ibdev,
    274			   struct rdma_hw_stats *stats,
    275			   u32 port_num, int index)
    276{
    277	struct mlx5_ib_dev *dev = to_mdev(ibdev);
    278	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
    279	struct mlx5_core_dev *mdev;
    280	int ret, num_counters;
    281	u32 mdev_port_num;
    282
    283	if (!stats)
    284		return -EINVAL;
    285
    286	num_counters = cnts->num_q_counters +
    287		       cnts->num_cong_counters +
    288		       cnts->num_ext_ppcnt_counters;
    289
    290	/* q_counters are per IB device, query the master mdev */
    291	ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
    292	if (ret)
    293		return ret;
    294
    295	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
    296		ret =  mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
    297		if (ret)
    298			return ret;
    299	}
    300
    301	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
    302		mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
    303						    &mdev_port_num);
    304		if (!mdev) {
    305			/* If port is not affiliated yet, its in down state
    306			 * which doesn't have any counters yet, so it would be
    307			 * zero. So no need to read from the HCA.
    308			 */
    309			goto done;
    310		}
    311		ret = mlx5_lag_query_cong_counters(dev->mdev,
    312						   stats->value +
    313						   cnts->num_q_counters,
    314						   cnts->num_cong_counters,
    315						   cnts->offsets +
    316						   cnts->num_q_counters);
    317
    318		mlx5_ib_put_native_port_mdev(dev, port_num);
    319		if (ret)
    320			return ret;
    321	}
    322
    323done:
    324	return num_counters;
    325}
    326
    327static int do_get_op_stat(struct ib_device *ibdev,
    328			  struct rdma_hw_stats *stats,
    329			  u32 port_num, int index)
    330{
    331	struct mlx5_ib_dev *dev = to_mdev(ibdev);
    332	const struct mlx5_ib_counters *cnts;
    333	const struct mlx5_ib_op_fc *opfcs;
    334	u64 packets = 0, bytes;
    335	u32 type;
    336	int ret;
    337
    338	cnts = get_counters(dev, port_num - 1);
    339	opfcs = cnts->opfcs;
    340	type = *(u32 *)cnts->descs[index].priv;
    341	if (type >= MLX5_IB_OPCOUNTER_MAX)
    342		return -EINVAL;
    343
    344	if (!opfcs[type].fc)
    345		goto out;
    346
    347	ret = mlx5_fc_query(dev->mdev, opfcs[type].fc,
    348			    &packets, &bytes);
    349	if (ret)
    350		return ret;
    351
    352out:
    353	stats->value[index] = packets;
    354	return index;
    355}
    356
    357static int do_get_op_stats(struct ib_device *ibdev,
    358			   struct rdma_hw_stats *stats,
    359			   u32 port_num)
    360{
    361	struct mlx5_ib_dev *dev = to_mdev(ibdev);
    362	const struct mlx5_ib_counters *cnts;
    363	int index, ret, num_hw_counters;
    364
    365	cnts = get_counters(dev, port_num - 1);
    366	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
    367			  cnts->num_ext_ppcnt_counters;
    368	for (index = num_hw_counters;
    369	     index < (num_hw_counters + cnts->num_op_counters); index++) {
    370		ret = do_get_op_stat(ibdev, stats, port_num, index);
    371		if (ret != index)
    372			return ret;
    373	}
    374
    375	return cnts->num_op_counters;
    376}
    377
    378static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
    379				struct rdma_hw_stats *stats,
    380				u32 port_num, int index)
    381{
    382	int num_counters, num_hw_counters, num_op_counters;
    383	struct mlx5_ib_dev *dev = to_mdev(ibdev);
    384	const struct mlx5_ib_counters *cnts;
    385
    386	cnts = get_counters(dev, port_num - 1);
    387	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
    388		cnts->num_ext_ppcnt_counters;
    389	num_counters = num_hw_counters + cnts->num_op_counters;
    390
    391	if (index < 0 || index > num_counters)
    392		return -EINVAL;
    393	else if (index > 0 && index < num_hw_counters)
    394		return do_get_hw_stats(ibdev, stats, port_num, index);
    395	else if (index >= num_hw_counters && index < num_counters)
    396		return do_get_op_stat(ibdev, stats, port_num, index);
    397
    398	num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index);
    399	if (num_hw_counters < 0)
    400		return num_hw_counters;
    401
    402	num_op_counters = do_get_op_stats(ibdev, stats, port_num);
    403	if (num_op_counters < 0)
    404		return num_op_counters;
    405
    406	return num_hw_counters + num_op_counters;
    407}
    408
    409static struct rdma_hw_stats *
    410mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
    411{
    412	struct mlx5_ib_dev *dev = to_mdev(counter->device);
    413	const struct mlx5_ib_counters *cnts =
    414		get_counters(dev, counter->port - 1);
    415
    416	return do_alloc_stats(cnts);
    417}
    418
    419static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
    420{
    421	struct mlx5_ib_dev *dev = to_mdev(counter->device);
    422	const struct mlx5_ib_counters *cnts =
    423		get_counters(dev, counter->port - 1);
    424
    425	return mlx5_ib_query_q_counters(dev->mdev, cnts,
    426					counter->stats, counter->id);
    427}
    428
    429static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
    430{
    431	struct mlx5_ib_dev *dev = to_mdev(counter->device);
    432	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
    433
    434	if (!counter->id)
    435		return 0;
    436
    437	MLX5_SET(dealloc_q_counter_in, in, opcode,
    438		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
    439	MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
    440	return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
    441}
    442
    443static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
    444				   struct ib_qp *qp)
    445{
    446	struct mlx5_ib_dev *dev = to_mdev(qp->device);
    447	int err;
    448
    449	if (!counter->id) {
    450		u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
    451		u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
    452
    453		MLX5_SET(alloc_q_counter_in, in, opcode,
    454			 MLX5_CMD_OP_ALLOC_Q_COUNTER);
    455		MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
    456		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
    457		if (err)
    458			return err;
    459		counter->id =
    460			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
    461	}
    462
    463	err = mlx5_ib_qp_set_counter(qp, counter);
    464	if (err)
    465		goto fail_set_counter;
    466
    467	return 0;
    468
    469fail_set_counter:
    470	mlx5_ib_counter_dealloc(counter);
    471	counter->id = 0;
    472
    473	return err;
    474}
    475
    476static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
    477{
    478	return mlx5_ib_qp_set_counter(qp, NULL);
    479}
    480
    481static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
    482				  struct rdma_stat_desc *descs, size_t *offsets)
    483{
    484	int i;
    485	int j = 0;
    486
    487	for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
    488		descs[j].name = basic_q_cnts[i].name;
    489		offsets[j] = basic_q_cnts[i].offset;
    490	}
    491
    492	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
    493		for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
    494			descs[j].name = out_of_seq_q_cnts[i].name;
    495			offsets[j] = out_of_seq_q_cnts[i].offset;
    496		}
    497	}
    498
    499	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
    500		for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
    501			descs[j].name = retrans_q_cnts[i].name;
    502			offsets[j] = retrans_q_cnts[i].offset;
    503		}
    504	}
    505
    506	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
    507		for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
    508			descs[j].name = extended_err_cnts[i].name;
    509			offsets[j] = extended_err_cnts[i].offset;
    510		}
    511	}
    512
    513	if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
    514		for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
    515			descs[j].name = roce_accl_cnts[i].name;
    516			offsets[j] = roce_accl_cnts[i].offset;
    517		}
    518	}
    519
    520	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
    521		for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
    522			descs[j].name = cong_cnts[i].name;
    523			offsets[j] = cong_cnts[i].offset;
    524		}
    525	}
    526
    527	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
    528		for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
    529			descs[j].name = ext_ppcnt_cnts[i].name;
    530			offsets[j] = ext_ppcnt_cnts[i].offset;
    531		}
    532	}
    533
    534	for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) {
    535		descs[j].name = basic_op_cnts[i].name;
    536		descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
    537		descs[j].priv = &basic_op_cnts[i].type;
    538	}
    539
    540	if (MLX5_CAP_FLOWTABLE(dev->mdev,
    541			       ft_field_support_2_nic_receive_rdma.bth_opcode)) {
    542		for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) {
    543			descs[j].name = rdmarx_cnp_op_cnts[i].name;
    544			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
    545			descs[j].priv = &rdmarx_cnp_op_cnts[i].type;
    546		}
    547	}
    548
    549	if (MLX5_CAP_FLOWTABLE(dev->mdev,
    550			       ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
    551		for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) {
    552			descs[j].name = rdmatx_cnp_op_cnts[i].name;
    553			descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
    554			descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
    555		}
    556	}
    557}
    558
    559
    560static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
    561				    struct mlx5_ib_counters *cnts)
    562{
    563	u32 num_counters, num_op_counters;
    564
    565	num_counters = ARRAY_SIZE(basic_q_cnts);
    566
    567	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
    568		num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
    569
    570	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
    571		num_counters += ARRAY_SIZE(retrans_q_cnts);
    572
    573	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
    574		num_counters += ARRAY_SIZE(extended_err_cnts);
    575
    576	if (MLX5_CAP_GEN(dev->mdev, roce_accl))
    577		num_counters += ARRAY_SIZE(roce_accl_cnts);
    578
    579	cnts->num_q_counters = num_counters;
    580
    581	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
    582		cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
    583		num_counters += ARRAY_SIZE(cong_cnts);
    584	}
    585	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
    586		cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
    587		num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
    588	}
    589
    590	num_op_counters = ARRAY_SIZE(basic_op_cnts);
    591
    592	if (MLX5_CAP_FLOWTABLE(dev->mdev,
    593			       ft_field_support_2_nic_receive_rdma.bth_opcode))
    594		num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
    595
    596	if (MLX5_CAP_FLOWTABLE(dev->mdev,
    597			       ft_field_support_2_nic_transmit_rdma.bth_opcode))
    598		num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts);
    599
    600	cnts->num_op_counters = num_op_counters;
    601	num_counters += num_op_counters;
    602	cnts->descs = kcalloc(num_counters,
    603			      sizeof(struct rdma_stat_desc), GFP_KERNEL);
    604	if (!cnts->descs)
    605		return -ENOMEM;
    606
    607	cnts->offsets = kcalloc(num_counters,
    608				sizeof(*cnts->offsets), GFP_KERNEL);
    609	if (!cnts->offsets)
    610		goto err;
    611
    612	return 0;
    613
    614err:
    615	kfree(cnts->descs);
    616	cnts->descs = NULL;
    617	return -ENOMEM;
    618}
    619
    620static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
    621{
    622	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
    623	int num_cnt_ports;
    624	int i, j;
    625
    626	num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
    627
    628	MLX5_SET(dealloc_q_counter_in, in, opcode,
    629		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
    630
    631	for (i = 0; i < num_cnt_ports; i++) {
    632		if (dev->port[i].cnts.set_id) {
    633			MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
    634				 dev->port[i].cnts.set_id);
    635			mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
    636		}
    637		kfree(dev->port[i].cnts.descs);
    638		kfree(dev->port[i].cnts.offsets);
    639
    640		for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) {
    641			if (!dev->port[i].cnts.opfcs[j].fc)
    642				continue;
    643
    644			if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
    645				mlx5_ib_fs_remove_op_fc(dev,
    646					&dev->port[i].cnts.opfcs[j], j);
    647			mlx5_fc_destroy(dev->mdev,
    648					dev->port[i].cnts.opfcs[j].fc);
    649			dev->port[i].cnts.opfcs[j].fc = NULL;
    650		}
    651	}
    652}
    653
    654static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
    655{
    656	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
    657	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
    658	int num_cnt_ports;
    659	int err = 0;
    660	int i;
    661	bool is_shared;
    662
    663	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
    664	is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
    665	num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
    666
    667	for (i = 0; i < num_cnt_ports; i++) {
    668		err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
    669		if (err)
    670			goto err_alloc;
    671
    672		mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs,
    673				      dev->port[i].cnts.offsets);
    674
    675		MLX5_SET(alloc_q_counter_in, in, uid,
    676			 is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
    677
    678		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
    679		if (err) {
    680			mlx5_ib_warn(dev,
    681				     "couldn't allocate queue counter for port %d, err %d\n",
    682				     i + 1, err);
    683			goto err_alloc;
    684		}
    685
    686		dev->port[i].cnts.set_id =
    687			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
    688	}
    689	return 0;
    690
    691err_alloc:
    692	mlx5_ib_dealloc_counters(dev);
    693	return err;
    694}
    695
    696static int read_flow_counters(struct ib_device *ibdev,
    697			      struct mlx5_read_counters_attr *read_attr)
    698{
    699	struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
    700	struct mlx5_ib_dev *dev = to_mdev(ibdev);
    701
    702	return mlx5_fc_query(dev->mdev, fc,
    703			     &read_attr->out[IB_COUNTER_PACKETS],
    704			     &read_attr->out[IB_COUNTER_BYTES]);
    705}
    706
    707/* flow counters currently expose two counters packets and bytes */
    708#define FLOW_COUNTERS_NUM 2
    709static int counters_set_description(
    710	struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
    711	struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
    712{
    713	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
    714	u32 cntrs_max_index = 0;
    715	int i;
    716
    717	if (counters_type != MLX5_IB_COUNTERS_FLOW)
    718		return -EINVAL;
    719
    720	/* init the fields for the object */
    721	mcounters->type = counters_type;
    722	mcounters->read_counters = read_flow_counters;
    723	mcounters->counters_num = FLOW_COUNTERS_NUM;
    724	mcounters->ncounters = ncounters;
    725	/* each counter entry have both description and index pair */
    726	for (i = 0; i < ncounters; i++) {
    727		if (desc_data[i].description > IB_COUNTER_BYTES)
    728			return -EINVAL;
    729
    730		if (cntrs_max_index <= desc_data[i].index)
    731			cntrs_max_index = desc_data[i].index + 1;
    732	}
    733
    734	mutex_lock(&mcounters->mcntrs_mutex);
    735	mcounters->counters_data = desc_data;
    736	mcounters->cntrs_max_index = cntrs_max_index;
    737	mutex_unlock(&mcounters->mcntrs_mutex);
    738
    739	return 0;
    740}
    741
    742#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
    743int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
    744				   struct mlx5_ib_create_flow *ucmd)
    745{
    746	struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
    747	struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
    748	struct mlx5_ib_flow_counters_desc *desc_data = NULL;
    749	bool hw_hndl = false;
    750	int ret = 0;
    751
    752	if (ucmd && ucmd->ncounters_data != 0) {
    753		cntrs_data = ucmd->data;
    754		if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
    755			return -EINVAL;
    756
    757		desc_data = kcalloc(cntrs_data->ncounters,
    758				    sizeof(*desc_data),
    759				    GFP_KERNEL);
    760		if (!desc_data)
    761			return  -ENOMEM;
    762
    763		if (copy_from_user(desc_data,
    764				   u64_to_user_ptr(cntrs_data->counters_data),
    765				   sizeof(*desc_data) * cntrs_data->ncounters)) {
    766			ret = -EFAULT;
    767			goto free;
    768		}
    769	}
    770
    771	if (!mcounters->hw_cntrs_hndl) {
    772		mcounters->hw_cntrs_hndl = mlx5_fc_create(
    773			to_mdev(ibcounters->device)->mdev, false);
    774		if (IS_ERR(mcounters->hw_cntrs_hndl)) {
    775			ret = PTR_ERR(mcounters->hw_cntrs_hndl);
    776			goto free;
    777		}
    778		hw_hndl = true;
    779	}
    780
    781	if (desc_data) {
    782		/* counters already bound to at least one flow */
    783		if (mcounters->cntrs_max_index) {
    784			ret = -EINVAL;
    785			goto free_hndl;
    786		}
    787
    788		ret = counters_set_description(ibcounters,
    789					       MLX5_IB_COUNTERS_FLOW,
    790					       desc_data,
    791					       cntrs_data->ncounters);
    792		if (ret)
    793			goto free_hndl;
    794
    795	} else if (!mcounters->cntrs_max_index) {
    796		/* counters not bound yet, must have udata passed */
    797		ret = -EINVAL;
    798		goto free_hndl;
    799	}
    800
    801	return 0;
    802
    803free_hndl:
    804	if (hw_hndl) {
    805		mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
    806				mcounters->hw_cntrs_hndl);
    807		mcounters->hw_cntrs_hndl = NULL;
    808	}
    809free:
    810	kfree(desc_data);
    811	return ret;
    812}
    813
    814void mlx5_ib_counters_clear_description(struct ib_counters *counters)
    815{
    816	struct mlx5_ib_mcounters *mcounters;
    817
    818	if (!counters || atomic_read(&counters->usecnt) != 1)
    819		return;
    820
    821	mcounters = to_mcounters(counters);
    822
    823	mutex_lock(&mcounters->mcntrs_mutex);
    824	kfree(mcounters->counters_data);
    825	mcounters->counters_data = NULL;
    826	mcounters->cntrs_max_index = 0;
    827	mutex_unlock(&mcounters->mcntrs_mutex);
    828}
    829
    830static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
    831			       unsigned int index, bool enable)
    832{
    833	struct mlx5_ib_dev *dev = to_mdev(device);
    834	struct mlx5_ib_counters *cnts;
    835	struct mlx5_ib_op_fc *opfc;
    836	u32 num_hw_counters, type;
    837	int ret;
    838
    839	cnts = &dev->port[port - 1].cnts;
    840	num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
    841		cnts->num_ext_ppcnt_counters;
    842	if (index < num_hw_counters ||
    843	    index >= (num_hw_counters + cnts->num_op_counters))
    844		return -EINVAL;
    845
    846	if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
    847		return -EINVAL;
    848
    849	type = *(u32 *)cnts->descs[index].priv;
    850	if (type >= MLX5_IB_OPCOUNTER_MAX)
    851		return -EINVAL;
    852
    853	opfc = &cnts->opfcs[type];
    854
    855	if (enable) {
    856		if (opfc->fc)
    857			return -EEXIST;
    858
    859		opfc->fc = mlx5_fc_create(dev->mdev, false);
    860		if (IS_ERR(opfc->fc))
    861			return PTR_ERR(opfc->fc);
    862
    863		ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type);
    864		if (ret) {
    865			mlx5_fc_destroy(dev->mdev, opfc->fc);
    866			opfc->fc = NULL;
    867		}
    868		return ret;
    869	}
    870
    871	if (!opfc->fc)
    872		return -EINVAL;
    873
    874	mlx5_ib_fs_remove_op_fc(dev, opfc, type);
    875	mlx5_fc_destroy(dev->mdev, opfc->fc);
    876	opfc->fc = NULL;
    877	return 0;
    878}
    879
    880static const struct ib_device_ops hw_stats_ops = {
    881	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
    882	.get_hw_stats = mlx5_ib_get_hw_stats,
    883	.counter_bind_qp = mlx5_ib_counter_bind_qp,
    884	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
    885	.counter_dealloc = mlx5_ib_counter_dealloc,
    886	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
    887	.counter_update_stats = mlx5_ib_counter_update_stats,
    888	.modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ?
    889			  mlx5_ib_modify_stat : NULL,
    890};
    891
    892static const struct ib_device_ops hw_switchdev_stats_ops = {
    893	.alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats,
    894	.get_hw_stats = mlx5_ib_get_hw_stats,
    895	.counter_bind_qp = mlx5_ib_counter_bind_qp,
    896	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
    897	.counter_dealloc = mlx5_ib_counter_dealloc,
    898	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
    899	.counter_update_stats = mlx5_ib_counter_update_stats,
    900};
    901
    902static const struct ib_device_ops counters_ops = {
    903	.create_counters = mlx5_ib_create_counters,
    904	.destroy_counters = mlx5_ib_destroy_counters,
    905	.read_counters = mlx5_ib_read_counters,
    906
    907	INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
    908};
    909
    910int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
    911{
    912	ib_set_device_ops(&dev->ib_dev, &counters_ops);
    913
    914	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
    915		return 0;
    916
    917	if (is_mdev_switchdev_mode(dev->mdev))
    918		ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops);
    919	else
    920		ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
    921	return mlx5_ib_alloc_counters(dev);
    922}
    923
    924void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
    925{
    926	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
    927		return;
    928
    929	mlx5_ib_dealloc_counters(dev);
    930}