cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

nldev.c (67628B)


      1/*
      2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
      3 *
      4 * Redistribution and use in source and binary forms, with or without
      5 * modification, are permitted provided that the following conditions are met:
      6 *
      7 * 1. Redistributions of source code must retain the above copyright
      8 *    notice, this list of conditions and the following disclaimer.
      9 * 2. Redistributions in binary form must reproduce the above copyright
     10 *    notice, this list of conditions and the following disclaimer in the
     11 *    documentation and/or other materials provided with the distribution.
     12 * 3. Neither the names of the copyright holders nor the names of its
     13 *    contributors may be used to endorse or promote products derived from
     14 *    this software without specific prior written permission.
     15 *
     16 * Alternatively, this software may be distributed under the terms of the
     17 * GNU General Public License ("GPL") version 2 as published by the Free
     18 * Software Foundation.
     19 *
     20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30 * POSSIBILITY OF SUCH DAMAGE.
     31 */
     32
     33#include <linux/module.h>
     34#include <linux/pid.h>
     35#include <linux/pid_namespace.h>
     36#include <linux/mutex.h>
     37#include <net/netlink.h>
     38#include <rdma/rdma_cm.h>
     39#include <rdma/rdma_netlink.h>
     40
     41#include "core_priv.h"
     42#include "cma_priv.h"
     43#include "restrack.h"
     44#include "uverbs.h"
     45
     46typedef int (*res_fill_func_t)(struct sk_buff*, bool,
     47			       struct rdma_restrack_entry*, uint32_t);
     48
     49/*
     50 * Sort array elements by the netlink attribute name
     51 */
     52static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
     53	[RDMA_NLDEV_ATTR_CHARDEV]		= { .type = NLA_U64 },
     54	[RDMA_NLDEV_ATTR_CHARDEV_ABI]		= { .type = NLA_U64 },
     55	[RDMA_NLDEV_ATTR_CHARDEV_NAME]		= { .type = NLA_NUL_STRING,
     56					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
     57	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type = NLA_NUL_STRING,
     58					.len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
     59	[RDMA_NLDEV_ATTR_DEV_DIM]               = { .type = NLA_U8 },
     60	[RDMA_NLDEV_ATTR_DEV_INDEX]		= { .type = NLA_U32 },
     61	[RDMA_NLDEV_ATTR_DEV_NAME]		= { .type = NLA_NUL_STRING,
     62					.len = IB_DEVICE_NAME_MAX },
     63	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE]		= { .type = NLA_U8 },
     64	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type = NLA_NUL_STRING,
     65					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
     66	[RDMA_NLDEV_ATTR_DRIVER]		= { .type = NLA_NESTED },
     67	[RDMA_NLDEV_ATTR_DRIVER_ENTRY]		= { .type = NLA_NESTED },
     68	[RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE]	= { .type = NLA_U8 },
     69	[RDMA_NLDEV_ATTR_DRIVER_STRING]		= { .type = NLA_NUL_STRING,
     70					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
     71	[RDMA_NLDEV_ATTR_DRIVER_S32]		= { .type = NLA_S32 },
     72	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
     73	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
     74	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
     75	[RDMA_NLDEV_ATTR_FW_VERSION]		= { .type = NLA_NUL_STRING,
     76					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
     77	[RDMA_NLDEV_ATTR_LID]			= { .type = NLA_U32 },
     78	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
     79					.len = IFNAMSIZ },
     80	[RDMA_NLDEV_ATTR_LMC]			= { .type = NLA_U8 },
     81	[RDMA_NLDEV_ATTR_NDEV_INDEX]		= { .type = NLA_U32 },
     82	[RDMA_NLDEV_ATTR_NDEV_NAME]		= { .type = NLA_NUL_STRING,
     83					.len = IFNAMSIZ },
     84	[RDMA_NLDEV_ATTR_NODE_GUID]		= { .type = NLA_U64 },
     85	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },
     86	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE]	= { .type = NLA_U8 },
     87	[RDMA_NLDEV_ATTR_PORT_STATE]		= { .type = NLA_U8 },
     88	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },
     89	[RDMA_NLDEV_ATTR_RES_CM_IDN]		= { .type = NLA_U32 },
     90	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
     91	[RDMA_NLDEV_ATTR_RES_CQ]		= { .type = NLA_NESTED },
     92	[RDMA_NLDEV_ATTR_RES_CQE]		= { .type = NLA_U32 },
     93	[RDMA_NLDEV_ATTR_RES_CQN]		= { .type = NLA_U32 },
     94	[RDMA_NLDEV_ATTR_RES_CQ_ENTRY]		= { .type = NLA_NESTED },
     95	[RDMA_NLDEV_ATTR_RES_CTX]		= { .type = NLA_NESTED },
     96	[RDMA_NLDEV_ATTR_RES_CTXN]		= { .type = NLA_U32 },
     97	[RDMA_NLDEV_ATTR_RES_CTX_ENTRY]		= { .type = NLA_NESTED },
     98	[RDMA_NLDEV_ATTR_RES_DST_ADDR]		= {
     99			.len = sizeof(struct __kernel_sockaddr_storage) },
    100	[RDMA_NLDEV_ATTR_RES_IOVA]		= { .type = NLA_U64 },
    101	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
    102					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
    103	[RDMA_NLDEV_ATTR_RES_LKEY]		= { .type = NLA_U32 },
    104	[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]	= { .type = NLA_U32 },
    105	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 },
    106	[RDMA_NLDEV_ATTR_RES_MR]		= { .type = NLA_NESTED },
    107	[RDMA_NLDEV_ATTR_RES_MRLEN]		= { .type = NLA_U64 },
    108	[RDMA_NLDEV_ATTR_RES_MRN]		= { .type = NLA_U32 },
    109	[RDMA_NLDEV_ATTR_RES_MR_ENTRY]		= { .type = NLA_NESTED },
    110	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]	= { .type = NLA_U8 },
    111	[RDMA_NLDEV_ATTR_RES_PD]		= { .type = NLA_NESTED },
    112	[RDMA_NLDEV_ATTR_RES_PDN]		= { .type = NLA_U32 },
    113	[RDMA_NLDEV_ATTR_RES_PD_ENTRY]		= { .type = NLA_NESTED },
    114	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
    115	[RDMA_NLDEV_ATTR_RES_POLL_CTX]		= { .type = NLA_U8 },
    116	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
    117	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
    118	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
    119	[RDMA_NLDEV_ATTR_RES_RAW]		= { .type = NLA_BINARY },
    120	[RDMA_NLDEV_ATTR_RES_RKEY]		= { .type = NLA_U32 },
    121	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
    122	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
    123	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 },
    124	[RDMA_NLDEV_ATTR_RES_SRC_ADDR]		= {
    125			.len = sizeof(struct __kernel_sockaddr_storage) },
    126	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 },
    127	[RDMA_NLDEV_ATTR_RES_SUMMARY]		= { .type = NLA_NESTED },
    128	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED },
    129	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
    130	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
    131					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
    132	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 },
    133	[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
    134	[RDMA_NLDEV_ATTR_RES_USECNT]		= { .type = NLA_U64 },
    135	[RDMA_NLDEV_ATTR_RES_SRQ]		= { .type = NLA_NESTED },
    136	[RDMA_NLDEV_ATTR_RES_SRQN]		= { .type = NLA_U32 },
    137	[RDMA_NLDEV_ATTR_RES_SRQ_ENTRY]		= { .type = NLA_NESTED },
    138	[RDMA_NLDEV_ATTR_MIN_RANGE]		= { .type = NLA_U32 },
    139	[RDMA_NLDEV_ATTR_MAX_RANGE]		= { .type = NLA_U32 },
    140	[RDMA_NLDEV_ATTR_SM_LID]		= { .type = NLA_U32 },
    141	[RDMA_NLDEV_ATTR_SUBNET_PREFIX]		= { .type = NLA_U64 },
    142	[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]	= { .type = NLA_U32 },
    143	[RDMA_NLDEV_ATTR_STAT_MODE]		= { .type = NLA_U32 },
    144	[RDMA_NLDEV_ATTR_STAT_RES]		= { .type = NLA_U32 },
    145	[RDMA_NLDEV_ATTR_STAT_COUNTER]		= { .type = NLA_NESTED },
    146	[RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY]	= { .type = NLA_NESTED },
    147	[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]       = { .type = NLA_U32 },
    148	[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]       = { .type = NLA_NESTED },
    149	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY]  = { .type = NLA_NESTED },
    150	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
    151	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
    152	[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID]	= { .type = NLA_U64 },
    153	[RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID]	= { .type = NLA_U32 },
    154	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
    155	[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]	= { .type = NLA_U8 },
    156	[RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK]	= { .type = NLA_U8 },
    157	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX]	= { .type = NLA_U32 },
    158	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 },
    159};
    160
    161static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
    162				      enum rdma_nldev_print_type print_type)
    163{
    164	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
    165		return -EMSGSIZE;
    166	if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
    167	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
    168		return -EMSGSIZE;
    169
    170	return 0;
    171}
    172
    173static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
    174				   enum rdma_nldev_print_type print_type,
    175				   u32 value)
    176{
    177	if (put_driver_name_print_type(msg, name, print_type))
    178		return -EMSGSIZE;
    179	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
    180		return -EMSGSIZE;
    181
    182	return 0;
    183}
    184
    185static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
    186				   enum rdma_nldev_print_type print_type,
    187				   u64 value)
    188{
    189	if (put_driver_name_print_type(msg, name, print_type))
    190		return -EMSGSIZE;
    191	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
    192			      RDMA_NLDEV_ATTR_PAD))
    193		return -EMSGSIZE;
    194
    195	return 0;
    196}
    197
    198int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
    199			      const char *str)
    200{
    201	if (put_driver_name_print_type(msg, name,
    202				       RDMA_NLDEV_PRINT_TYPE_UNSPEC))
    203		return -EMSGSIZE;
    204	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
    205		return -EMSGSIZE;
    206
    207	return 0;
    208}
    209EXPORT_SYMBOL(rdma_nl_put_driver_string);
    210
    211int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
    212{
    213	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
    214				       value);
    215}
    216EXPORT_SYMBOL(rdma_nl_put_driver_u32);
    217
    218int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
    219			       u32 value)
    220{
    221	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
    222				       value);
    223}
    224EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
    225
    226int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
    227{
    228	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
    229				       value);
    230}
    231EXPORT_SYMBOL(rdma_nl_put_driver_u64);
    232
    233int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
    234{
    235	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
    236				       value);
    237}
    238EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
    239
    240static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
    241{
    242	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
    243		return -EMSGSIZE;
    244	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
    245			   dev_name(&device->dev)))
    246		return -EMSGSIZE;
    247
    248	return 0;
    249}
    250
    251static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
    252{
    253	char fw[IB_FW_VERSION_NAME_MAX];
    254	int ret = 0;
    255	u32 port;
    256
    257	if (fill_nldev_handle(msg, device))
    258		return -EMSGSIZE;
    259
    260	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
    261		return -EMSGSIZE;
    262
    263	BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
    264	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
    265			      device->attrs.device_cap_flags,
    266			      RDMA_NLDEV_ATTR_PAD))
    267		return -EMSGSIZE;
    268
    269	ib_get_device_fw_str(device, fw);
    270	/* Device without FW has strlen(fw) = 0 */
    271	if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
    272		return -EMSGSIZE;
    273
    274	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
    275			      be64_to_cpu(device->node_guid),
    276			      RDMA_NLDEV_ATTR_PAD))
    277		return -EMSGSIZE;
    278	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
    279			      be64_to_cpu(device->attrs.sys_image_guid),
    280			      RDMA_NLDEV_ATTR_PAD))
    281		return -EMSGSIZE;
    282	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
    283		return -EMSGSIZE;
    284	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
    285		return -EMSGSIZE;
    286
    287	/*
    288	 * Link type is determined on first port and mlx4 device
    289	 * which can potentially have two different link type for the same
    290	 * IB device is considered as better to be avoided in the future,
    291	 */
    292	port = rdma_start_port(device);
    293	if (rdma_cap_opa_mad(device, port))
    294		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
    295	else if (rdma_protocol_ib(device, port))
    296		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
    297	else if (rdma_protocol_iwarp(device, port))
    298		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
    299	else if (rdma_protocol_roce(device, port))
    300		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
    301	else if (rdma_protocol_usnic(device, port))
    302		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
    303				     "usnic");
    304	return ret;
    305}
    306
    307static int fill_port_info(struct sk_buff *msg,
    308			  struct ib_device *device, u32 port,
    309			  const struct net *net)
    310{
    311	struct net_device *netdev = NULL;
    312	struct ib_port_attr attr;
    313	int ret;
    314	u64 cap_flags = 0;
    315
    316	if (fill_nldev_handle(msg, device))
    317		return -EMSGSIZE;
    318
    319	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
    320		return -EMSGSIZE;
    321
    322	ret = ib_query_port(device, port, &attr);
    323	if (ret)
    324		return ret;
    325
    326	if (rdma_protocol_ib(device, port)) {
    327		BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
    328				sizeof(attr.port_cap_flags2)) > sizeof(u64));
    329		cap_flags = attr.port_cap_flags |
    330			((u64)attr.port_cap_flags2 << 32);
    331		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
    332				      cap_flags, RDMA_NLDEV_ATTR_PAD))
    333			return -EMSGSIZE;
    334		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
    335				      attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
    336			return -EMSGSIZE;
    337		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
    338			return -EMSGSIZE;
    339		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
    340			return -EMSGSIZE;
    341		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
    342			return -EMSGSIZE;
    343	}
    344	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
    345		return -EMSGSIZE;
    346	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
    347		return -EMSGSIZE;
    348
    349	netdev = ib_device_get_netdev(device, port);
    350	if (netdev && net_eq(dev_net(netdev), net)) {
    351		ret = nla_put_u32(msg,
    352				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
    353		if (ret)
    354			goto out;
    355		ret = nla_put_string(msg,
    356				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
    357	}
    358
    359out:
    360	if (netdev)
    361		dev_put(netdev);
    362	return ret;
    363}
    364
    365static int fill_res_info_entry(struct sk_buff *msg,
    366			       const char *name, u64 curr)
    367{
    368	struct nlattr *entry_attr;
    369
    370	entry_attr = nla_nest_start_noflag(msg,
    371					   RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
    372	if (!entry_attr)
    373		return -EMSGSIZE;
    374
    375	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
    376		goto err;
    377	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
    378			      RDMA_NLDEV_ATTR_PAD))
    379		goto err;
    380
    381	nla_nest_end(msg, entry_attr);
    382	return 0;
    383
    384err:
    385	nla_nest_cancel(msg, entry_attr);
    386	return -EMSGSIZE;
    387}
    388
    389static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
    390{
    391	static const char * const names[RDMA_RESTRACK_MAX] = {
    392		[RDMA_RESTRACK_PD] = "pd",
    393		[RDMA_RESTRACK_CQ] = "cq",
    394		[RDMA_RESTRACK_QP] = "qp",
    395		[RDMA_RESTRACK_CM_ID] = "cm_id",
    396		[RDMA_RESTRACK_MR] = "mr",
    397		[RDMA_RESTRACK_CTX] = "ctx",
    398		[RDMA_RESTRACK_SRQ] = "srq",
    399	};
    400
    401	struct nlattr *table_attr;
    402	int ret, i, curr;
    403
    404	if (fill_nldev_handle(msg, device))
    405		return -EMSGSIZE;
    406
    407	table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
    408	if (!table_attr)
    409		return -EMSGSIZE;
    410
    411	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
    412		if (!names[i])
    413			continue;
    414		curr = rdma_restrack_count(device, i);
    415		ret = fill_res_info_entry(msg, names[i], curr);
    416		if (ret)
    417			goto err;
    418	}
    419
    420	nla_nest_end(msg, table_attr);
    421	return 0;
    422
    423err:
    424	nla_nest_cancel(msg, table_attr);
    425	return ret;
    426}
    427
    428static int fill_res_name_pid(struct sk_buff *msg,
    429			     struct rdma_restrack_entry *res)
    430{
    431	int err = 0;
    432
    433	/*
    434	 * For user resources, user is should read /proc/PID/comm to get the
    435	 * name of the task file.
    436	 */
    437	if (rdma_is_kernel_res(res)) {
    438		err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
    439				     res->kern_name);
    440	} else {
    441		pid_t pid;
    442
    443		pid = task_pid_vnr(res->task);
    444		/*
    445		 * Task is dead and in zombie state.
    446		 * There is no need to print PID anymore.
    447		 */
    448		if (pid)
    449			/*
    450			 * This part is racy, task can be killed and PID will
    451			 * be zero right here but it is ok, next query won't
    452			 * return PID. We don't promise real-time reflection
    453			 * of SW objects.
    454			 */
    455			err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
    456	}
    457
    458	return err ? -EMSGSIZE : 0;
    459}
    460
    461static int fill_res_qp_entry_query(struct sk_buff *msg,
    462				   struct rdma_restrack_entry *res,
    463				   struct ib_device *dev,
    464				   struct ib_qp *qp)
    465{
    466	struct ib_qp_init_attr qp_init_attr;
    467	struct ib_qp_attr qp_attr;
    468	int ret;
    469
    470	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
    471	if (ret)
    472		return ret;
    473
    474	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
    475		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
    476				qp_attr.dest_qp_num))
    477			goto err;
    478		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
    479				qp_attr.rq_psn))
    480			goto err;
    481	}
    482
    483	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
    484		goto err;
    485
    486	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
    487	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
    488		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
    489			       qp_attr.path_mig_state))
    490			goto err;
    491	}
    492	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
    493		goto err;
    494	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
    495		goto err;
    496
    497	if (dev->ops.fill_res_qp_entry)
    498		return dev->ops.fill_res_qp_entry(msg, qp);
    499	return 0;
    500
    501err:	return -EMSGSIZE;
    502}
    503
    504static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
    505			     struct rdma_restrack_entry *res, uint32_t port)
    506{
    507	struct ib_qp *qp = container_of(res, struct ib_qp, res);
    508	struct ib_device *dev = qp->device;
    509	int ret;
    510
    511	if (port && port != qp->port)
    512		return -EAGAIN;
    513
    514	/* In create_qp() port is not set yet */
    515	if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
    516		return -EINVAL;
    517
    518	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
    519	if (ret)
    520		return -EMSGSIZE;
    521
    522	if (!rdma_is_kernel_res(res) &&
    523	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
    524		return -EMSGSIZE;
    525
    526	ret = fill_res_name_pid(msg, res);
    527	if (ret)
    528		return -EMSGSIZE;
    529
    530	return fill_res_qp_entry_query(msg, res, dev, qp);
    531}
    532
    533static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
    534				 struct rdma_restrack_entry *res, uint32_t port)
    535{
    536	struct ib_qp *qp = container_of(res, struct ib_qp, res);
    537	struct ib_device *dev = qp->device;
    538
    539	if (port && port != qp->port)
    540		return -EAGAIN;
    541	if (!dev->ops.fill_res_qp_entry_raw)
    542		return -EINVAL;
    543	return dev->ops.fill_res_qp_entry_raw(msg, qp);
    544}
    545
    546static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
    547				struct rdma_restrack_entry *res, uint32_t port)
    548{
    549	struct rdma_id_private *id_priv =
    550				container_of(res, struct rdma_id_private, res);
    551	struct ib_device *dev = id_priv->id.device;
    552	struct rdma_cm_id *cm_id = &id_priv->id;
    553
    554	if (port && port != cm_id->port_num)
    555		return 0;
    556
    557	if (cm_id->port_num &&
    558	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
    559		goto err;
    560
    561	if (id_priv->qp_num) {
    562		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
    563			goto err;
    564		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
    565			goto err;
    566	}
    567
    568	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
    569		goto err;
    570
    571	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
    572		goto err;
    573
    574	if (cm_id->route.addr.src_addr.ss_family &&
    575	    nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
    576		    sizeof(cm_id->route.addr.src_addr),
    577		    &cm_id->route.addr.src_addr))
    578		goto err;
    579	if (cm_id->route.addr.dst_addr.ss_family &&
    580	    nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
    581		    sizeof(cm_id->route.addr.dst_addr),
    582		    &cm_id->route.addr.dst_addr))
    583		goto err;
    584
    585	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
    586		goto err;
    587
    588	if (fill_res_name_pid(msg, res))
    589		goto err;
    590
    591	if (dev->ops.fill_res_cm_id_entry)
    592		return dev->ops.fill_res_cm_id_entry(msg, cm_id);
    593	return 0;
    594
    595err: return -EMSGSIZE;
    596}
    597
    598static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
    599			     struct rdma_restrack_entry *res, uint32_t port)
    600{
    601	struct ib_cq *cq = container_of(res, struct ib_cq, res);
    602	struct ib_device *dev = cq->device;
    603
    604	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
    605		return -EMSGSIZE;
    606	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
    607			      atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
    608		return -EMSGSIZE;
    609
    610	/* Poll context is only valid for kernel CQs */
    611	if (rdma_is_kernel_res(res) &&
    612	    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
    613		return -EMSGSIZE;
    614
    615	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
    616		return -EMSGSIZE;
    617
    618	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
    619		return -EMSGSIZE;
    620	if (!rdma_is_kernel_res(res) &&
    621	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
    622			cq->uobject->uevent.uobject.context->res.id))
    623		return -EMSGSIZE;
    624
    625	if (fill_res_name_pid(msg, res))
    626		return -EMSGSIZE;
    627
    628	return (dev->ops.fill_res_cq_entry) ?
    629		dev->ops.fill_res_cq_entry(msg, cq) : 0;
    630}
    631
    632static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
    633				 struct rdma_restrack_entry *res, uint32_t port)
    634{
    635	struct ib_cq *cq = container_of(res, struct ib_cq, res);
    636	struct ib_device *dev = cq->device;
    637
    638	if (!dev->ops.fill_res_cq_entry_raw)
    639		return -EINVAL;
    640	return dev->ops.fill_res_cq_entry_raw(msg, cq);
    641}
    642
    643static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
    644			     struct rdma_restrack_entry *res, uint32_t port)
    645{
    646	struct ib_mr *mr = container_of(res, struct ib_mr, res);
    647	struct ib_device *dev = mr->pd->device;
    648
    649	if (has_cap_net_admin) {
    650		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
    651			return -EMSGSIZE;
    652		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
    653			return -EMSGSIZE;
    654	}
    655
    656	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
    657			      RDMA_NLDEV_ATTR_PAD))
    658		return -EMSGSIZE;
    659
    660	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
    661		return -EMSGSIZE;
    662
    663	if (!rdma_is_kernel_res(res) &&
    664	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
    665		return -EMSGSIZE;
    666
    667	if (fill_res_name_pid(msg, res))
    668		return -EMSGSIZE;
    669
    670	return (dev->ops.fill_res_mr_entry) ?
    671		       dev->ops.fill_res_mr_entry(msg, mr) :
    672		       0;
    673}
    674
    675static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
    676				 struct rdma_restrack_entry *res, uint32_t port)
    677{
    678	struct ib_mr *mr = container_of(res, struct ib_mr, res);
    679	struct ib_device *dev = mr->pd->device;
    680
    681	if (!dev->ops.fill_res_mr_entry_raw)
    682		return -EINVAL;
    683	return dev->ops.fill_res_mr_entry_raw(msg, mr);
    684}
    685
    686static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
    687			     struct rdma_restrack_entry *res, uint32_t port)
    688{
    689	struct ib_pd *pd = container_of(res, struct ib_pd, res);
    690
    691	if (has_cap_net_admin) {
    692		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
    693				pd->local_dma_lkey))
    694			goto err;
    695		if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
    696		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
    697				pd->unsafe_global_rkey))
    698			goto err;
    699	}
    700	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
    701			      atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
    702		goto err;
    703
    704	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
    705		goto err;
    706
    707	if (!rdma_is_kernel_res(res) &&
    708	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
    709			pd->uobject->context->res.id))
    710		goto err;
    711
    712	return fill_res_name_pid(msg, res);
    713
    714err:	return -EMSGSIZE;
    715}
    716
    717static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin,
    718			      struct rdma_restrack_entry *res, uint32_t port)
    719{
    720	struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res);
    721
    722	if (rdma_is_kernel_res(res))
    723		return 0;
    724
    725	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id))
    726		return -EMSGSIZE;
    727
    728	return fill_res_name_pid(msg, res);
    729}
    730
    731static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range,
    732				   uint32_t max_range)
    733{
    734	struct nlattr *entry_attr;
    735
    736	if (!min_range)
    737		return 0;
    738
    739	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
    740	if (!entry_attr)
    741		return -EMSGSIZE;
    742
    743	if (min_range == max_range) {
    744		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range))
    745			goto err;
    746	} else {
    747		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range))
    748			goto err;
    749		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range))
    750			goto err;
    751	}
    752	nla_nest_end(msg, entry_attr);
    753	return 0;
    754
    755err:
    756	nla_nest_cancel(msg, entry_attr);
    757	return -EMSGSIZE;
    758}
    759
    760static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq)
    761{
    762	uint32_t min_range = 0, prev = 0;
    763	struct rdma_restrack_entry *res;
    764	struct rdma_restrack_root *rt;
    765	struct nlattr *table_attr;
    766	struct ib_qp *qp = NULL;
    767	unsigned long id = 0;
    768
    769	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
    770	if (!table_attr)
    771		return -EMSGSIZE;
    772
    773	rt = &srq->device->res[RDMA_RESTRACK_QP];
    774	xa_lock(&rt->xa);
    775	xa_for_each(&rt->xa, id, res) {
    776		if (!rdma_restrack_get(res))
    777			continue;
    778
    779		qp = container_of(res, struct ib_qp, res);
    780		if (!qp->srq || (qp->srq->res.id != srq->res.id)) {
    781			rdma_restrack_put(res);
    782			continue;
    783		}
    784
    785		if (qp->qp_num < prev)
    786			/* qp_num should be ascending */
    787			goto err_loop;
    788
    789		if (min_range == 0) {
    790			min_range = qp->qp_num;
    791		} else if (qp->qp_num > (prev + 1)) {
    792			if (fill_res_range_qp_entry(msg, min_range, prev))
    793				goto err_loop;
    794
    795			min_range = qp->qp_num;
    796		}
    797		prev = qp->qp_num;
    798		rdma_restrack_put(res);
    799	}
    800
    801	xa_unlock(&rt->xa);
    802
    803	if (fill_res_range_qp_entry(msg, min_range, prev))
    804		goto err;
    805
    806	nla_nest_end(msg, table_attr);
    807	return 0;
    808
    809err_loop:
    810	rdma_restrack_put(res);
    811	xa_unlock(&rt->xa);
    812err:
    813	nla_nest_cancel(msg, table_attr);
    814	return -EMSGSIZE;
    815}
    816
    817static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin,
    818			      struct rdma_restrack_entry *res, uint32_t port)
    819{
    820	struct ib_srq *srq = container_of(res, struct ib_srq, res);
    821
    822	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id))
    823		goto err;
    824
    825	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type))
    826		goto err;
    827
    828	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id))
    829		goto err;
    830
    831	if (ib_srq_has_cq(srq->srq_type)) {
    832		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN,
    833				srq->ext.cq->res.id))
    834			goto err;
    835	}
    836
    837	if (fill_res_srq_qps(msg, srq))
    838		goto err;
    839
    840	return fill_res_name_pid(msg, res);
    841
    842err:
    843	return -EMSGSIZE;
    844}
    845
    846static int fill_stat_counter_mode(struct sk_buff *msg,
    847				  struct rdma_counter *counter)
    848{
    849	struct rdma_counter_mode *m = &counter->mode;
    850
    851	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
    852		return -EMSGSIZE;
    853
    854	if (m->mode == RDMA_COUNTER_MODE_AUTO) {
    855		if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
    856		    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
    857			return -EMSGSIZE;
    858
    859		if ((m->mask & RDMA_COUNTER_MASK_PID) &&
    860		    fill_res_name_pid(msg, &counter->res))
    861			return -EMSGSIZE;
    862	}
    863
    864	return 0;
    865}
    866
    867static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
    868{
    869	struct nlattr *entry_attr;
    870
    871	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
    872	if (!entry_attr)
    873		return -EMSGSIZE;
    874
    875	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
    876		goto err;
    877
    878	nla_nest_end(msg, entry_attr);
    879	return 0;
    880
    881err:
    882	nla_nest_cancel(msg, entry_attr);
    883	return -EMSGSIZE;
    884}
    885
    886static int fill_stat_counter_qps(struct sk_buff *msg,
    887				 struct rdma_counter *counter)
    888{
    889	struct rdma_restrack_entry *res;
    890	struct rdma_restrack_root *rt;
    891	struct nlattr *table_attr;
    892	struct ib_qp *qp = NULL;
    893	unsigned long id = 0;
    894	int ret = 0;
    895
    896	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
    897
    898	rt = &counter->device->res[RDMA_RESTRACK_QP];
    899	xa_lock(&rt->xa);
    900	xa_for_each(&rt->xa, id, res) {
    901		qp = container_of(res, struct ib_qp, res);
    902		if (!qp->counter || (qp->counter->id != counter->id))
    903			continue;
    904
    905		ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
    906		if (ret)
    907			goto err;
    908	}
    909
    910	xa_unlock(&rt->xa);
    911	nla_nest_end(msg, table_attr);
    912	return 0;
    913
    914err:
    915	xa_unlock(&rt->xa);
    916	nla_nest_cancel(msg, table_attr);
    917	return ret;
    918}
    919
    920int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
    921				 u64 value)
    922{
    923	struct nlattr *entry_attr;
    924
    925	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
    926	if (!entry_attr)
    927		return -EMSGSIZE;
    928
    929	if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
    930			   name))
    931		goto err;
    932	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
    933			      value, RDMA_NLDEV_ATTR_PAD))
    934		goto err;
    935
    936	nla_nest_end(msg, entry_attr);
    937	return 0;
    938
    939err:
    940	nla_nest_cancel(msg, entry_attr);
    941	return -EMSGSIZE;
    942}
    943EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
    944
    945static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
    946			      struct rdma_restrack_entry *res, uint32_t port)
    947{
    948	struct ib_mr *mr = container_of(res, struct ib_mr, res);
    949	struct ib_device *dev = mr->pd->device;
    950
    951	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
    952		goto err;
    953
    954	if (dev->ops.fill_stat_mr_entry)
    955		return dev->ops.fill_stat_mr_entry(msg, mr);
    956	return 0;
    957
    958err:
    959	return -EMSGSIZE;
    960}
    961
    962static int fill_stat_counter_hwcounters(struct sk_buff *msg,
    963					struct rdma_counter *counter)
    964{
    965	struct rdma_hw_stats *st = counter->stats;
    966	struct nlattr *table_attr;
    967	int i;
    968
    969	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
    970	if (!table_attr)
    971		return -EMSGSIZE;
    972
    973	mutex_lock(&st->lock);
    974	for (i = 0; i < st->num_counters; i++) {
    975		if (test_bit(i, st->is_disabled))
    976			continue;
    977		if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
    978						 st->value[i]))
    979			goto err;
    980	}
    981	mutex_unlock(&st->lock);
    982
    983	nla_nest_end(msg, table_attr);
    984	return 0;
    985
    986err:
    987	mutex_unlock(&st->lock);
    988	nla_nest_cancel(msg, table_attr);
    989	return -EMSGSIZE;
    990}
    991
    992static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
    993				  struct rdma_restrack_entry *res,
    994				  uint32_t port)
    995{
    996	struct rdma_counter *counter =
    997		container_of(res, struct rdma_counter, res);
    998
    999	if (port && port != counter->port)
   1000		return -EAGAIN;
   1001
   1002	/* Dump it even query failed */
   1003	rdma_counter_query_stats(counter);
   1004
   1005	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
   1006	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
   1007	    fill_stat_counter_mode(msg, counter) ||
   1008	    fill_stat_counter_qps(msg, counter) ||
   1009	    fill_stat_counter_hwcounters(msg, counter))
   1010		return -EMSGSIZE;
   1011
   1012	return 0;
   1013}
   1014
   1015static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
   1016			  struct netlink_ext_ack *extack)
   1017{
   1018	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
   1019	struct ib_device *device;
   1020	struct sk_buff *msg;
   1021	u32 index;
   1022	int err;
   1023
   1024	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
   1025				     nldev_policy, extack);
   1026	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
   1027		return -EINVAL;
   1028
   1029	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
   1030
   1031	device = ib_device_get_by_index(sock_net(skb->sk), index);
   1032	if (!device)
   1033		return -EINVAL;
   1034
   1035	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
   1036	if (!msg) {
   1037		err = -ENOMEM;
   1038		goto err;
   1039	}
   1040
   1041	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
   1042			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
   1043			0, 0);
   1044
   1045	err = fill_dev_info(msg, device);
   1046	if (err)
   1047		goto err_free;
   1048
   1049	nlmsg_end(msg, nlh);
   1050
   1051	ib_device_put(device);
   1052	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
   1053
   1054err_free:
   1055	nlmsg_free(msg);
   1056err:
   1057	ib_device_put(device);
   1058	return err;
   1059}
   1060
   1061static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
   1062			  struct netlink_ext_ack *extack)
   1063{
   1064	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
   1065	struct ib_device *device;
   1066	u32 index;
   1067	int err;
   1068
   1069	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
   1070				     nldev_policy, extack);
   1071	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
   1072		return -EINVAL;
   1073
   1074	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
   1075	device = ib_device_get_by_index(sock_net(skb->sk), index);
   1076	if (!device)
   1077		return -EINVAL;
   1078
   1079	if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
   1080		char name[IB_DEVICE_NAME_MAX] = {};
   1081
   1082		nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
   1083			    IB_DEVICE_NAME_MAX);
   1084		if (strlen(name) == 0) {
   1085			err = -EINVAL;
   1086			goto done;
   1087		}
   1088		err = ib_device_rename(device, name);
   1089		goto done;
   1090	}
   1091
   1092	if (tb[RDMA_NLDEV_NET_NS_FD]) {
   1093		u32 ns_fd;
   1094
   1095		ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
   1096		err = ib_device_set_netns_put(skb, device, ns_fd);
   1097		goto put_done;
   1098	}
   1099
   1100	if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
   1101		u8 use_dim;
   1102
   1103		use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
   1104		err = ib_device_set_dim(device,  use_dim);
   1105		goto done;
   1106	}
   1107
   1108done:
   1109	ib_device_put(device);
   1110put_done:
   1111	return err;
   1112}
   1113
   1114static int _nldev_get_dumpit(struct ib_device *device,
   1115			     struct sk_buff *skb,
   1116			     struct netlink_callback *cb,
   1117			     unsigned int idx)
   1118{
   1119	int start = cb->args[0];
   1120	struct nlmsghdr *nlh;
   1121
   1122	if (idx < start)
   1123		return 0;
   1124
   1125	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
   1126			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
   1127			0, NLM_F_MULTI);
   1128
   1129	if (fill_dev_info(skb, device)) {
   1130		nlmsg_cancel(skb, nlh);
   1131		goto out;
   1132	}
   1133
   1134	nlmsg_end(skb, nlh);
   1135
   1136	idx++;
   1137
   1138out:	cb->args[0] = idx;
   1139	return skb->len;
   1140}
   1141
   1142static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
   1143{
   1144	/*
   1145	 * There is no need to take lock, because
   1146	 * we are relying on ib_core's locking.
   1147	 */
   1148	return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
   1149}
   1150
   1151static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
   1152			       struct netlink_ext_ack *extack)
   1153{
   1154	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
   1155	struct ib_device *device;
   1156	struct sk_buff *msg;
   1157	u32 index;
   1158	u32 port;
   1159	int err;
   1160
   1161	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
   1162				     nldev_policy, extack);
   1163	if (err ||
   1164	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
   1165	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
   1166		return -EINVAL;
   1167
   1168	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
   1169	device = ib_device_get_by_index(sock_net(skb->sk), index);
   1170	if (!device)
   1171		return -EINVAL;
   1172
   1173	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
   1174	if (!rdma_is_port_valid(device, port)) {
   1175		err = -EINVAL;
   1176		goto err;
   1177	}
   1178
   1179	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
   1180	if (!msg) {
   1181		err = -ENOMEM;
   1182		goto err;
   1183	}
   1184
   1185	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
   1186			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
   1187			0, 0);
   1188
   1189	err = fill_port_info(msg, device, port, sock_net(skb->sk));
   1190	if (err)
   1191		goto err_free;
   1192
   1193	nlmsg_end(msg, nlh);
   1194	ib_device_put(device);
   1195
   1196	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
   1197
   1198err_free:
   1199	nlmsg_free(msg);
   1200err:
   1201	ib_device_put(device);
   1202	return err;
   1203}
   1204
   1205static int nldev_port_get_dumpit(struct sk_buff *skb,
   1206				 struct netlink_callback *cb)
   1207{
   1208	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
   1209	struct ib_device *device;
   1210	int start = cb->args[0];
   1211	struct nlmsghdr *nlh;
   1212	u32 idx = 0;
   1213	u32 ifindex;
   1214	int err;
   1215	unsigned int p;
   1216
   1217	err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
   1218				     nldev_policy, NULL);
   1219	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
   1220		return -EINVAL;
   1221
   1222	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
   1223	device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
   1224	if (!device)
   1225		return -EINVAL;
   1226
   1227	rdma_for_each_port (device, p) {
   1228		/*
   1229		 * The dumpit function returns all information from specific
   1230		 * index. This specific index is taken from the netlink
   1231		 * messages request sent by user and it is available
   1232		 * in cb->args[0].
   1233		 *
   1234		 * Usually, the user doesn't fill this field and it causes
   1235		 * to return everything.
   1236		 *
   1237		 */
   1238		if (idx < start) {
   1239			idx++;
   1240			continue;
   1241		}
   1242
   1243		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
   1244				cb->nlh->nlmsg_seq,
   1245				RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
   1246						 RDMA_NLDEV_CMD_PORT_GET),
   1247				0, NLM_F_MULTI);
   1248
   1249		if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
   1250			nlmsg_cancel(skb, nlh);
   1251			goto out;
   1252		}
   1253		idx++;
   1254		nlmsg_end(skb, nlh);
   1255	}
   1256
   1257out:
   1258	ib_device_put(device);
   1259	cb->args[0] = idx;
   1260	return skb->len;
   1261}
   1262
   1263static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
   1264			      struct netlink_ext_ack *extack)
   1265{
   1266	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
   1267	struct ib_device *device;
   1268	struct sk_buff *msg;
   1269	u32 index;
   1270	int ret;
   1271
   1272	ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
   1273				     nldev_policy, extack);
   1274	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
   1275		return -EINVAL;
   1276
   1277	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
   1278	device = ib_device_get_by_index(sock_net(skb->sk), index);
   1279	if (!device)
   1280		return -EINVAL;
   1281
   1282	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
   1283	if (!msg) {
   1284		ret = -ENOMEM;
   1285		goto err;
   1286	}
   1287
   1288	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
   1289			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
   1290			0, 0);
   1291
   1292	ret = fill_res_info(msg, device);
   1293	if (ret)
   1294		goto err_free;
   1295
   1296	nlmsg_end(msg, nlh);
   1297	ib_device_put(device);
   1298	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
   1299
   1300err_free:
   1301	nlmsg_free(msg);
   1302err:
   1303	ib_device_put(device);
   1304	return ret;
   1305}
   1306
   1307static int _nldev_res_get_dumpit(struct ib_device *device,
   1308				 struct sk_buff *skb,
   1309				 struct netlink_callback *cb,
   1310				 unsigned int idx)
   1311{
   1312	int start = cb->args[0];
   1313	struct nlmsghdr *nlh;
   1314
   1315	if (idx < start)
   1316		return 0;
   1317
   1318	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
   1319			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
   1320			0, NLM_F_MULTI);
   1321
   1322	if (fill_res_info(skb, device)) {
   1323		nlmsg_cancel(skb, nlh);
   1324		goto out;
   1325	}
   1326	nlmsg_end(skb, nlh);
   1327
   1328	idx++;
   1329
   1330out:
   1331	cb->args[0] = idx;
   1332	return skb->len;
   1333}
   1334
   1335static int nldev_res_get_dumpit(struct sk_buff *skb,
   1336				struct netlink_callback *cb)
   1337{
   1338	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
   1339}
   1340
   1341struct nldev_fill_res_entry {
   1342	enum rdma_nldev_attr nldev_attr;
   1343	u8 flags;
   1344	u32 entry;
   1345	u32 id;
   1346};
   1347
   1348enum nldev_res_flags {
   1349	NLDEV_PER_DEV = 1 << 0,
   1350};
   1351
   1352static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
   1353	[RDMA_RESTRACK_QP] = {
   1354		.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
   1355		.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
   1356		.id = RDMA_NLDEV_ATTR_RES_LQPN,
   1357	},
   1358	[RDMA_RESTRACK_CM_ID] = {
   1359		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
   1360		.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
   1361		.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
   1362	},
   1363	[RDMA_RESTRACK_CQ] = {
   1364		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
   1365		.flags = NLDEV_PER_DEV,
   1366		.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
   1367		.id = RDMA_NLDEV_ATTR_RES_CQN,
   1368	},
   1369	[RDMA_RESTRACK_MR] = {
   1370		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
   1371		.flags = NLDEV_PER_DEV,
   1372		.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
   1373		.id = RDMA_NLDEV_ATTR_RES_MRN,
   1374	},
   1375	[RDMA_RESTRACK_PD] = {
   1376		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
   1377		.flags = NLDEV_PER_DEV,
   1378		.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
   1379		.id = RDMA_NLDEV_ATTR_RES_PDN,
   1380	},
   1381	[RDMA_RESTRACK_COUNTER] = {
   1382		.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
   1383		.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
   1384		.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
   1385	},
   1386	[RDMA_RESTRACK_CTX] = {
   1387		.nldev_attr = RDMA_NLDEV_ATTR_RES_CTX,
   1388		.flags = NLDEV_PER_DEV,
   1389		.entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY,
   1390		.id = RDMA_NLDEV_ATTR_RES_CTXN,
   1391	},
   1392	[RDMA_RESTRACK_SRQ] = {
   1393		.nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ,
   1394		.flags = NLDEV_PER_DEV,
   1395		.entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY,
   1396		.id = RDMA_NLDEV_ATTR_RES_SRQN,
   1397	},
   1398
   1399};
   1400
   1401static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
   1402			       struct netlink_ext_ack *extack,
   1403			       enum rdma_restrack_type res_type,
   1404			       res_fill_func_t fill_func)
   1405{
   1406	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
   1407	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
   1408	struct rdma_restrack_entry *res;
   1409	struct ib_device *device;
   1410	u32 index, id, port = 0;
   1411	bool has_cap_net_admin;
   1412	struct sk_buff *msg;
   1413	int ret;
   1414
   1415	ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
   1416				     nldev_policy, extack);
   1417	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
   1418		return -EINVAL;
   1419
   1420	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
   1421	device = ib_device_get_by_index(sock_net(skb->sk), index);
   1422	if (!device)
   1423		return -EINVAL;
   1424
   1425	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
   1426		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
   1427		if (!rdma_is_port_valid(device, port)) {
   1428			ret = -EINVAL;
   1429			goto err;
   1430		}
   1431	}
   1432
   1433	if ((port && fe->flags & NLDEV_PER_DEV) ||
   1434	    (!port && ~fe->flags & NLDEV_PER_DEV)) {
   1435		ret = -EINVAL;
   1436		goto err;
   1437	}
   1438
   1439	id = nla_get_u32(tb[fe->id]);
   1440	res = rdma_restrack_get_byid(device, res_type, id);
   1441	if (IS_ERR(res)) {
   1442		ret = PTR_ERR(res);
   1443		goto err;
   1444	}
   1445
   1446	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
   1447	if (!msg) {
   1448		ret = -ENOMEM;
   1449		goto err_get;
   1450	}
   1451
   1452	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
   1453			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
   1454					 RDMA_NL_GET_OP(nlh->nlmsg_type)),
   1455			0, 0);
   1456
   1457	if (fill_nldev_handle(msg, device)) {
   1458		ret = -EMSGSIZE;
   1459		goto err_free;
   1460	}
   1461
   1462	has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
   1463
   1464	ret = fill_func(msg, has_cap_net_admin, res, port);
   1465	if (ret)
   1466		goto err_free;
   1467
   1468	rdma_restrack_put(res);
   1469	nlmsg_end(msg, nlh);
   1470	ib_device_put(device);
   1471	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
   1472
   1473err_free:
   1474	nlmsg_free(msg);
   1475err_get:
   1476	rdma_restrack_put(res);
   1477err:
   1478	ib_device_put(device);
   1479	return ret;
   1480}
   1481
   1482static int res_get_common_dumpit(struct sk_buff *skb,
   1483				 struct netlink_callback *cb,
   1484				 enum rdma_restrack_type res_type,
   1485				 res_fill_func_t fill_func)
   1486{
   1487	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
   1488	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
   1489	struct rdma_restrack_entry *res;
   1490	struct rdma_restrack_root *rt;
   1491	int err, ret = 0, idx = 0;
   1492	struct nlattr *table_attr;
   1493	struct nlattr *entry_attr;
   1494	struct ib_device *device;
   1495	int start = cb->args[0];
   1496	bool has_cap_net_admin;
   1497	struct nlmsghdr *nlh;
   1498	unsigned long id;
   1499	u32 index, port = 0;
   1500	bool filled = false;
   1501
   1502	err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
   1503				     nldev_policy, NULL);
   1504	/*
   1505	 * Right now, we are expecting the device index to get res information,
   1506	 * but it is possible to extend this code to return all devices in
   1507	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
   1508	 * if it doesn't exist, we will iterate over all devices.
   1509	 *
   1510	 * But it is not needed for now.
   1511	 */
   1512	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
   1513		return -EINVAL;
   1514
   1515	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
   1516	device = ib_device_get_by_index(sock_net(skb->sk), index);
   1517	if (!device)
   1518		return -EINVAL;
   1519
   1520	/*
   1521	 * If no PORT_INDEX is supplied, we will return all QPs from that device
   1522	 */
   1523	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
   1524		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
   1525		if (!rdma_is_port_valid(device, port)) {
   1526			ret = -EINVAL;
   1527			goto err_index;
   1528		}
   1529	}
   1530
   1531	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
   1532			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
   1533					 RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
   1534			0, NLM_F_MULTI);
   1535
   1536	if (fill_nldev_handle(skb, device)) {
   1537		ret = -EMSGSIZE;
   1538		goto err;
   1539	}
   1540
   1541	table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
   1542	if (!table_attr) {
   1543		ret = -EMSGSIZE;
   1544		goto err;
   1545	}
   1546
   1547	has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
   1548
   1549	rt = &device->res[res_type];
   1550	xa_lock(&rt->xa);
   1551	/*
   1552	 * FIXME: if the skip ahead is something common this loop should
   1553	 * use xas_for_each & xas_pause to optimize, we can have a lot of
   1554	 * objects.
   1555	 */
   1556	xa_for_each(&rt->xa, id, res) {
   1557		if (idx < start || !rdma_restrack_get(res))
   1558			goto next;
   1559
   1560		xa_unlock(&rt->xa);
   1561
   1562		filled = true;
   1563
   1564		entry_attr = nla_nest_start_noflag(skb, fe->entry);
   1565		if (!entry_attr) {
   1566			ret = -EMSGSIZE;
   1567			rdma_restrack_put(res);
   1568			goto msg_full;
   1569		}
   1570
   1571		ret = fill_func(skb, has_cap_net_admin, res, port);
   1572
   1573		rdma_restrack_put(res);
   1574
   1575		if (ret) {
   1576			nla_nest_cancel(skb, entry_attr);
   1577			if (ret == -EMSGSIZE)
   1578				goto msg_full;
   1579			if (ret == -EAGAIN)
   1580				goto again;
   1581			goto res_err;
   1582		}
   1583		nla_nest_end(skb, entry_attr);
   1584again:		xa_lock(&rt->xa);
   1585next:		idx++;
   1586	}
   1587	xa_unlock(&rt->xa);
   1588
   1589msg_full:
   1590	nla_nest_end(skb, table_attr);
   1591	nlmsg_end(skb, nlh);
   1592	cb->args[0] = idx;
   1593
   1594	/*
   1595	 * No more entries to fill, cancel the message and
   1596	 * return 0 to mark end of dumpit.
   1597	 */
   1598	if (!filled)
   1599		goto err;
   1600
   1601	ib_device_put(device);
   1602	return skb->len;
   1603
   1604res_err:
   1605	nla_nest_cancel(skb, table_attr);
   1606
   1607err:
   1608	nlmsg_cancel(skb, nlh);
   1609
   1610err_index:
   1611	ib_device_put(device);
   1612	return ret;
   1613}
   1614
   1615#define RES_GET_FUNCS(name, type)                                              \
   1616	static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
   1617						 struct netlink_callback *cb)  \
   1618	{                                                                      \
   1619		return res_get_common_dumpit(skb, cb, type,                    \
   1620					     fill_res_##name##_entry);         \
   1621	}                                                                      \
   1622	static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
   1623					       struct nlmsghdr *nlh,           \
   1624					       struct netlink_ext_ack *extack) \
   1625	{                                                                      \
   1626		return res_get_common_doit(skb, nlh, extack, type,             \
   1627					   fill_res_##name##_entry);           \
   1628	}
   1629
   1630RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
   1631RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
   1632RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
   1633RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
   1634RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
   1635RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
   1636RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
   1637RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
   1638RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
   1639RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX);
   1640RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ);
   1641
   1642static LIST_HEAD(link_ops);
   1643static DECLARE_RWSEM(link_ops_rwsem);
   1644
   1645static const struct rdma_link_ops *link_ops_get(const char *type)
   1646{
   1647	const struct rdma_link_ops *ops;
   1648
   1649	list_for_each_entry(ops, &link_ops, list) {
   1650		if (!strcmp(ops->type, type))
   1651			goto out;
   1652	}
   1653	ops = NULL;
   1654out:
   1655	return ops;
   1656}
   1657
   1658void rdma_link_register(struct rdma_link_ops *ops)
   1659{
   1660	down_write(&link_ops_rwsem);
   1661	if (WARN_ON_ONCE(link_ops_get(ops->type)))
   1662		goto out;
   1663	list_add(&ops->list, &link_ops);
   1664out:
   1665	up_write(&link_ops_rwsem);
   1666}
   1667EXPORT_SYMBOL(rdma_link_register);
   1668
   1669void rdma_link_unregister(struct rdma_link_ops *ops)
   1670{
   1671	down_write(&link_ops_rwsem);
   1672	list_del(&ops->list);
   1673	up_write(&link_ops_rwsem);
   1674}
   1675EXPORT_SYMBOL(rdma_link_unregister);
   1676
   1677static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
   1678			  struct netlink_ext_ack *extack)
   1679{
   1680	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
   1681	char ibdev_name[IB_DEVICE_NAME_MAX];
   1682	const struct rdma_link_ops *ops;
   1683	char ndev_name[IFNAMSIZ];
   1684	struct net_device *ndev;
   1685	char type[IFNAMSIZ];
   1686	int err;
   1687
   1688	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
   1689				     nldev_policy, extack);
   1690	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
   1691	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
   1692		return -EINVAL;
   1693
   1694	nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
   1695		    sizeof(ibdev_name));
   1696	if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
   1697		return -EINVAL;
   1698
   1699	nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
   1700	nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
   1701		    sizeof(ndev_name));
   1702
   1703	ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
   1704	if (!ndev)
   1705		return -ENODEV;
   1706
   1707	down_read(&link_ops_rwsem);
   1708	ops = link_ops_get(type);
   1709#ifdef CONFIG_MODULES
   1710	if (!ops) {
   1711		up_read(&link_ops_rwsem);
   1712		request_module("rdma-link-%s", type);
   1713		down_read(&link_ops_rwsem);
   1714		ops = link_ops_get(type);
   1715	}
   1716#endif
   1717	err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
   1718	up_read(&link_ops_rwsem);
   1719	dev_put(ndev);
   1720
   1721	return err;
   1722}
   1723
   1724static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
   1725			  struct netlink_ext_ack *extack)
   1726{
   1727	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
   1728	struct ib_device *device;
   1729	u32 index;
   1730	int err;
   1731
   1732	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
   1733				     nldev_policy, extack);
   1734	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
   1735		return -EINVAL;
   1736
   1737	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
   1738	device = ib_device_get_by_index(sock_net(skb->sk), index);
   1739	if (!device)
   1740		return -EINVAL;
   1741
   1742	if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
   1743		ib_device_put(device);
   1744		return -EINVAL;
   1745	}
   1746
   1747	ib_unregister_device_and_put(device);
   1748	return 0;
   1749}
   1750
   1751static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
   1752			     struct netlink_ext_ack *extack)
   1753{
   1754	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
   1755	char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
   1756	struct ib_client_nl_info data = {};
   1757	struct ib_device *ibdev = NULL;
   1758	struct sk_buff *msg;
   1759	u32 index;
   1760	int err;
   1761
   1762	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
   1763			  extack);
   1764	if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
   1765		return -EINVAL;
   1766
   1767	nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
   1768		    sizeof(client_name));
   1769
   1770	if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
   1771		index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
   1772		ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
   1773		if (!ibdev)
   1774			return -EINVAL;
   1775
   1776		if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
   1777			data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
   1778			if (!rdma_is_port_valid(ibdev, data.port)) {
   1779				err = -EINVAL;
   1780				goto out_put;
   1781			}
   1782		} else {
   1783			data.port = -1;
   1784		}
   1785	} else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
   1786		return -EINVAL;
   1787	}
   1788
   1789	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
   1790	if (!msg) {
   1791		err = -ENOMEM;
   1792		goto out_put;
   1793	}
   1794	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
   1795			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
   1796					 RDMA_NLDEV_CMD_GET_CHARDEV),
   1797			0, 0);
   1798
   1799	data.nl_msg = msg;
   1800	err = ib_get_client_nl_info(ibdev, client_name, &data);
   1801	if (err)
   1802		goto out_nlmsg;
   1803
   1804	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
   1805				huge_encode_dev(data.cdev->devt),
   1806				RDMA_NLDEV_ATTR_PAD);
   1807	if (err)
   1808		goto out_data;
   1809	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
   1810				RDMA_NLDEV_ATTR_PAD);
   1811	if (err)
   1812		goto out_data;
   1813	if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
   1814			   dev_name(data.cdev))) {
   1815		err = -EMSGSIZE;
   1816		goto out_data;
   1817	}
   1818
   1819	nlmsg_end(msg, nlh);
   1820	put_device(data.cdev);
   1821	if (ibdev)
   1822		ib_device_put(ibdev);
   1823	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
   1824
   1825out_data:
   1826	put_device(data.cdev);
   1827out_nlmsg:
   1828	nlmsg_free(msg);
   1829out_put:
   1830	if (ibdev)
   1831		ib_device_put(ibdev);
   1832	return err;
   1833}
   1834
   1835static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
   1836			      struct netlink_ext_ack *extack)
   1837{
   1838	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
   1839	struct sk_buff *msg;
   1840	int err;
   1841
   1842	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
   1843			  nldev_policy, extack);
   1844	if (err)
   1845		return err;
   1846
   1847	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
   1848	if (!msg)
   1849		return -ENOMEM;
   1850
   1851	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
   1852			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
   1853					 RDMA_NLDEV_CMD_SYS_GET),
   1854			0, 0);
   1855
   1856	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
   1857			 (u8)ib_devices_shared_netns);
   1858	if (err) {
   1859		nlmsg_free(msg);
   1860		return err;
   1861	}
   1862
   1863	/*
   1864	 * Copy-on-fork is supported.
   1865	 * See commits:
   1866	 * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes")
   1867	 * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm")
   1868	 * for more details. Don't backport this without them.
   1869	 *
   1870	 * Return value ignored on purpose, assume copy-on-fork is not
   1871	 * supported in case of failure.
   1872	 */
   1873	nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1);
   1874
   1875	nlmsg_end(msg, nlh);
   1876	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
   1877}
   1878
   1879static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
   1880				  struct netlink_ext_ack *extack)
   1881{
   1882	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
   1883	u8 enable;
   1884	int err;
   1885
   1886	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
   1887			  nldev_policy, extack);
   1888	if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
   1889		return -EINVAL;
   1890
   1891	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
   1892	/* Only 0 and 1 are supported */
   1893	if (enable > 1)
   1894		return -EINVAL;
   1895
   1896	err = rdma_compatdev_set(enable);
   1897	return err;
   1898}
   1899
   1900static int nldev_stat_set_mode_doit(struct sk_buff *msg,
   1901				    struct netlink_ext_ack *extack,
   1902				    struct nlattr *tb[],
   1903				    struct ib_device *device, u32 port)
   1904{
   1905	u32 mode, mask = 0, qpn, cntn = 0;
   1906	int ret;
   1907
   1908	/* Currently only counter for QP is supported */
   1909	if (!tb[RDMA_NLDEV_ATTR_STAT_RES] ||
   1910	    nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
   1911		return -EINVAL;
   1912
   1913	mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
   1914	if (mode == RDMA_COUNTER_MODE_AUTO) {
   1915		if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
   1916			mask = nla_get_u32(
   1917				tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
   1918		return rdma_counter_set_auto_mode(device, port, mask, extack);
   1919	}
   1920
   1921	if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
   1922		return -EINVAL;
   1923
   1924	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
   1925	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
   1926		cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
   1927		ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
   1928		if (ret)
   1929			return ret;
   1930	} else {
   1931		ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn);
   1932		if (ret)
   1933			return ret;
   1934	}
   1935
   1936	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
   1937	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
   1938		ret = -EMSGSIZE;
   1939		goto err_fill;
   1940	}
   1941
   1942	return 0;
   1943
   1944err_fill:
   1945	rdma_counter_unbind_qpn(device, port, qpn, cntn);
   1946	return ret;
   1947}
   1948
   1949static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
   1950					       struct ib_device *device,
   1951					       u32 port)
   1952{
   1953	struct rdma_hw_stats *stats;
   1954	struct nlattr *entry_attr;
   1955	unsigned long *target;
   1956	int rem, i, ret = 0;
   1957	u32 index;
   1958
   1959	stats = ib_get_hw_stats_port(device, port);
   1960	if (!stats)
   1961		return -EINVAL;
   1962
   1963	target = kcalloc(BITS_TO_LONGS(stats->num_counters),
   1964			 sizeof(*stats->is_disabled), GFP_KERNEL);
   1965	if (!target)
   1966		return -ENOMEM;
   1967
   1968	nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS],
   1969			    rem) {
   1970		index = nla_get_u32(entry_attr);
   1971		if ((index >= stats->num_counters) ||
   1972		    !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) {
   1973			ret = -EINVAL;
   1974			goto out;
   1975		}
   1976
   1977		set_bit(index, target);
   1978	}
   1979
   1980	for (i = 0; i < stats->num_counters; i++) {
   1981		if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL))
   1982			continue;
   1983
   1984		ret = rdma_counter_modify(device, port, i, test_bit(i, target));
   1985		if (ret)
   1986			goto out;
   1987	}
   1988
   1989out:
   1990	kfree(target);
   1991	return ret;
   1992}
   1993
   1994static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
   1995			       struct netlink_ext_ack *extack)
   1996{
   1997	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
   1998	struct ib_device *device;
   1999	struct sk_buff *msg;
   2000	u32 index, port;
   2001	int ret;
   2002
   2003	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
   2004			  extack);
   2005	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
   2006	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
   2007		return -EINVAL;
   2008
   2009	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
   2010	device = ib_device_get_by_index(sock_net(skb->sk), index);
   2011	if (!device)
   2012		return -EINVAL;
   2013
   2014	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
   2015	if (!rdma_is_port_valid(device, port)) {
   2016		ret = -EINVAL;
   2017		goto err_put_device;
   2018	}
   2019
   2020	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] &&
   2021	    !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
   2022		ret = -EINVAL;
   2023		goto err_put_device;
   2024	}
   2025
   2026	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
   2027	if (!msg) {
   2028		ret = -ENOMEM;
   2029		goto err_put_device;
   2030	}
   2031	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
   2032			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
   2033					 RDMA_NLDEV_CMD_STAT_SET),
   2034			0, 0);
   2035	if (fill_nldev_handle(msg, device) ||
   2036	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
   2037		ret = -EMSGSIZE;
   2038		goto err_free_msg;
   2039	}
   2040
   2041	if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
   2042		ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port);
   2043		if (ret)
   2044			goto err_free_msg;
   2045	}
   2046
   2047	if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
   2048		ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
   2049		if (ret)
   2050			goto err_free_msg;
   2051	}
   2052
   2053	nlmsg_end(msg, nlh);
   2054	ib_device_put(device);
   2055	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
   2056
   2057err_free_msg:
   2058	nlmsg_free(msg);
   2059err_put_device:
   2060	ib_device_put(device);
   2061	return ret;
   2062}
   2063
   2064static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
   2065			       struct netlink_ext_ack *extack)
   2066{
   2067	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
   2068	struct ib_device *device;
   2069	struct sk_buff *msg;
   2070	u32 index, port, qpn, cntn;
   2071	int ret;
   2072
   2073	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
   2074			  nldev_policy, extack);
   2075	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
   2076	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
   2077	    !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
   2078	    !tb[RDMA_NLDEV_ATTR_RES_LQPN])
   2079		return -EINVAL;
   2080
   2081	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
   2082		return -EINVAL;
   2083
   2084	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
   2085	device = ib_device_get_by_index(sock_net(skb->sk), index);
   2086	if (!device)
   2087		return -EINVAL;
   2088
   2089	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
   2090	if (!rdma_is_port_valid(device, port)) {
   2091		ret = -EINVAL;
   2092		goto err;
   2093	}
   2094
   2095	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
   2096	if (!msg) {
   2097		ret = -ENOMEM;
   2098		goto err;
   2099	}
   2100	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
   2101			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
   2102					 RDMA_NLDEV_CMD_STAT_SET),
   2103			0, 0);
   2104
   2105	cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
   2106	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
   2107	if (fill_nldev_handle(msg, device) ||
   2108	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
   2109	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
   2110	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
   2111		ret = -EMSGSIZE;
   2112		goto err_fill;
   2113	}
   2114
   2115	ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
   2116	if (ret)
   2117		goto err_fill;
   2118
   2119	nlmsg_end(msg, nlh);
   2120	ib_device_put(device);
   2121	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
   2122
   2123err_fill:
   2124	nlmsg_free(msg);
   2125err:
   2126	ib_device_put(device);
   2127	return ret;
   2128}
   2129
   2130static int stat_get_doit_default_counter(struct sk_buff *skb,
   2131					 struct nlmsghdr *nlh,
   2132					 struct netlink_ext_ack *extack,
   2133					 struct nlattr *tb[])
   2134{
   2135	struct rdma_hw_stats *stats;
   2136	struct nlattr *table_attr;
   2137	struct ib_device *device;
   2138	int ret, num_cnts, i;
   2139	struct sk_buff *msg;
   2140	u32 index, port;
   2141	u64 v;
   2142
   2143	if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
   2144		return -EINVAL;
   2145
   2146	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
   2147	device = ib_device_get_by_index(sock_net(skb->sk), index);
   2148	if (!device)
   2149		return -EINVAL;
   2150
   2151	if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) {
   2152		ret = -EINVAL;
   2153		goto err;
   2154	}
   2155
   2156	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
   2157	stats = ib_get_hw_stats_port(device, port);
   2158	if (!stats) {
   2159		ret = -EINVAL;
   2160		goto err;
   2161	}
   2162
   2163	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
   2164	if (!msg) {
   2165		ret = -ENOMEM;
   2166		goto err;
   2167	}
   2168
   2169	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
   2170			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
   2171					 RDMA_NLDEV_CMD_STAT_GET),
   2172			0, 0);
   2173
   2174	if (fill_nldev_handle(msg, device) ||
   2175	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
   2176		ret = -EMSGSIZE;
   2177		goto err_msg;
   2178	}
   2179
   2180	mutex_lock(&stats->lock);
   2181
   2182	num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
   2183	if (num_cnts < 0) {
   2184		ret = -EINVAL;
   2185		goto err_stats;
   2186	}
   2187
   2188	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
   2189	if (!table_attr) {
   2190		ret = -EMSGSIZE;
   2191		goto err_stats;
   2192	}
   2193	for (i = 0; i < num_cnts; i++) {
   2194		if (test_bit(i, stats->is_disabled))
   2195			continue;
   2196
   2197		v = stats->value[i] +
   2198			rdma_counter_get_hwstat_value(device, port, i);
   2199		if (rdma_nl_stat_hwcounter_entry(msg,
   2200						 stats->descs[i].name, v)) {
   2201			ret = -EMSGSIZE;
   2202			goto err_table;
   2203		}
   2204	}
   2205	nla_nest_end(msg, table_attr);
   2206
   2207	mutex_unlock(&stats->lock);
   2208	nlmsg_end(msg, nlh);
   2209	ib_device_put(device);
   2210	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
   2211
   2212err_table:
   2213	nla_nest_cancel(msg, table_attr);
   2214err_stats:
   2215	mutex_unlock(&stats->lock);
   2216err_msg:
   2217	nlmsg_free(msg);
   2218err:
   2219	ib_device_put(device);
   2220	return ret;
   2221}
   2222
   2223static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
   2224			    struct netlink_ext_ack *extack, struct nlattr *tb[])
   2225
   2226{
   2227	static enum rdma_nl_counter_mode mode;
   2228	static enum rdma_nl_counter_mask mask;
   2229	struct ib_device *device;
   2230	struct sk_buff *msg;
   2231	u32 index, port;
   2232	int ret;
   2233
   2234	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
   2235		return nldev_res_get_counter_doit(skb, nlh, extack);
   2236
   2237	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
   2238	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
   2239		return -EINVAL;
   2240
   2241	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
   2242	device = ib_device_get_by_index(sock_net(skb->sk), index);
   2243	if (!device)
   2244		return -EINVAL;
   2245
   2246	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
   2247	if (!rdma_is_port_valid(device, port)) {
   2248		ret = -EINVAL;
   2249		goto err;
   2250	}
   2251
   2252	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
   2253	if (!msg) {
   2254		ret = -ENOMEM;
   2255		goto err;
   2256	}
   2257
   2258	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
   2259			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
   2260					 RDMA_NLDEV_CMD_STAT_GET),
   2261			0, 0);
   2262
   2263	ret = rdma_counter_get_mode(device, port, &mode, &mask);
   2264	if (ret)
   2265		goto err_msg;
   2266
   2267	if (fill_nldev_handle(msg, device) ||
   2268	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
   2269	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
   2270		ret = -EMSGSIZE;
   2271		goto err_msg;
   2272	}
   2273
   2274	if ((mode == RDMA_COUNTER_MODE_AUTO) &&
   2275	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
   2276		ret = -EMSGSIZE;
   2277		goto err_msg;
   2278	}
   2279
   2280	nlmsg_end(msg, nlh);
   2281	ib_device_put(device);
   2282	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
   2283
   2284err_msg:
   2285	nlmsg_free(msg);
   2286err:
   2287	ib_device_put(device);
   2288	return ret;
   2289}
   2290
   2291static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
   2292			       struct netlink_ext_ack *extack)
   2293{
   2294	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
   2295	int ret;
   2296
   2297	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
   2298			  nldev_policy, extack);
   2299	if (ret)
   2300		return -EINVAL;
   2301
   2302	if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
   2303		return stat_get_doit_default_counter(skb, nlh, extack, tb);
   2304
   2305	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
   2306	case RDMA_NLDEV_ATTR_RES_QP:
   2307		ret = stat_get_doit_qp(skb, nlh, extack, tb);
   2308		break;
   2309	case RDMA_NLDEV_ATTR_RES_MR:
   2310		ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
   2311					  fill_stat_mr_entry);
   2312		break;
   2313	default:
   2314		ret = -EINVAL;
   2315		break;
   2316	}
   2317
   2318	return ret;
   2319}
   2320
   2321static int nldev_stat_get_dumpit(struct sk_buff *skb,
   2322				 struct netlink_callback *cb)
   2323{
   2324	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
   2325	int ret;
   2326
   2327	ret = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
   2328			  nldev_policy, NULL);
   2329	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
   2330		return -EINVAL;
   2331
   2332	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
   2333	case RDMA_NLDEV_ATTR_RES_QP:
   2334		ret = nldev_res_get_counter_dumpit(skb, cb);
   2335		break;
   2336	case RDMA_NLDEV_ATTR_RES_MR:
   2337		ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
   2338					    fill_stat_mr_entry);
   2339		break;
   2340	default:
   2341		ret = -EINVAL;
   2342		break;
   2343	}
   2344
   2345	return ret;
   2346}
   2347
   2348static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
   2349					      struct nlmsghdr *nlh,
   2350					      struct netlink_ext_ack *extack)
   2351{
   2352	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry;
   2353	struct rdma_hw_stats *stats;
   2354	struct ib_device *device;
   2355	struct sk_buff *msg;
   2356	u32 devid, port;
   2357	int ret, i;
   2358
   2359	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
   2360			  nldev_policy, extack);
   2361	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
   2362	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
   2363		return -EINVAL;
   2364
   2365	devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
   2366	device = ib_device_get_by_index(sock_net(skb->sk), devid);
   2367	if (!device)
   2368		return -EINVAL;
   2369
   2370	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
   2371	if (!rdma_is_port_valid(device, port)) {
   2372		ret = -EINVAL;
   2373		goto err;
   2374	}
   2375
   2376	stats = ib_get_hw_stats_port(device, port);
   2377	if (!stats) {
   2378		ret = -EINVAL;
   2379		goto err;
   2380	}
   2381
   2382	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
   2383	if (!msg) {
   2384		ret = -ENOMEM;
   2385		goto err;
   2386	}
   2387
   2388	nlh = nlmsg_put(
   2389		msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
   2390		RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS),
   2391		0, 0);
   2392
   2393	ret = -EMSGSIZE;
   2394	if (fill_nldev_handle(msg, device) ||
   2395	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
   2396		goto err_msg;
   2397
   2398	table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
   2399	if (!table)
   2400		goto err_msg;
   2401
   2402	mutex_lock(&stats->lock);
   2403	for (i = 0; i < stats->num_counters; i++) {
   2404		entry = nla_nest_start(msg,
   2405				       RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
   2406		if (!entry)
   2407			goto err_msg_table;
   2408
   2409		if (nla_put_string(msg,
   2410				   RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
   2411				   stats->descs[i].name) ||
   2412		    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i))
   2413			goto err_msg_entry;
   2414
   2415		if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) &&
   2416		    (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC,
   2417				!test_bit(i, stats->is_disabled))))
   2418			goto err_msg_entry;
   2419
   2420		nla_nest_end(msg, entry);
   2421	}
   2422	mutex_unlock(&stats->lock);
   2423
   2424	nla_nest_end(msg, table);
   2425	nlmsg_end(msg, nlh);
   2426	ib_device_put(device);
   2427	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
   2428
   2429err_msg_entry:
   2430	nla_nest_cancel(msg, entry);
   2431err_msg_table:
   2432	mutex_unlock(&stats->lock);
   2433	nla_nest_cancel(msg, table);
   2434err_msg:
   2435	nlmsg_free(msg);
   2436err:
   2437	ib_device_put(device);
   2438	return ret;
   2439}
   2440
   2441static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
   2442	[RDMA_NLDEV_CMD_GET] = {
   2443		.doit = nldev_get_doit,
   2444		.dump = nldev_get_dumpit,
   2445	},
   2446	[RDMA_NLDEV_CMD_GET_CHARDEV] = {
   2447		.doit = nldev_get_chardev,
   2448	},
   2449	[RDMA_NLDEV_CMD_SET] = {
   2450		.doit = nldev_set_doit,
   2451		.flags = RDMA_NL_ADMIN_PERM,
   2452	},
   2453	[RDMA_NLDEV_CMD_NEWLINK] = {
   2454		.doit = nldev_newlink,
   2455		.flags = RDMA_NL_ADMIN_PERM,
   2456	},
   2457	[RDMA_NLDEV_CMD_DELLINK] = {
   2458		.doit = nldev_dellink,
   2459		.flags = RDMA_NL_ADMIN_PERM,
   2460	},
   2461	[RDMA_NLDEV_CMD_PORT_GET] = {
   2462		.doit = nldev_port_get_doit,
   2463		.dump = nldev_port_get_dumpit,
   2464	},
   2465	[RDMA_NLDEV_CMD_RES_GET] = {
   2466		.doit = nldev_res_get_doit,
   2467		.dump = nldev_res_get_dumpit,
   2468	},
   2469	[RDMA_NLDEV_CMD_RES_QP_GET] = {
   2470		.doit = nldev_res_get_qp_doit,
   2471		.dump = nldev_res_get_qp_dumpit,
   2472	},
   2473	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
   2474		.doit = nldev_res_get_cm_id_doit,
   2475		.dump = nldev_res_get_cm_id_dumpit,
   2476	},
   2477	[RDMA_NLDEV_CMD_RES_CQ_GET] = {
   2478		.doit = nldev_res_get_cq_doit,
   2479		.dump = nldev_res_get_cq_dumpit,
   2480	},
   2481	[RDMA_NLDEV_CMD_RES_MR_GET] = {
   2482		.doit = nldev_res_get_mr_doit,
   2483		.dump = nldev_res_get_mr_dumpit,
   2484	},
   2485	[RDMA_NLDEV_CMD_RES_PD_GET] = {
   2486		.doit = nldev_res_get_pd_doit,
   2487		.dump = nldev_res_get_pd_dumpit,
   2488	},
   2489	[RDMA_NLDEV_CMD_RES_CTX_GET] = {
   2490		.doit = nldev_res_get_ctx_doit,
   2491		.dump = nldev_res_get_ctx_dumpit,
   2492	},
   2493	[RDMA_NLDEV_CMD_RES_SRQ_GET] = {
   2494		.doit = nldev_res_get_srq_doit,
   2495		.dump = nldev_res_get_srq_dumpit,
   2496	},
   2497	[RDMA_NLDEV_CMD_SYS_GET] = {
   2498		.doit = nldev_sys_get_doit,
   2499	},
   2500	[RDMA_NLDEV_CMD_SYS_SET] = {
   2501		.doit = nldev_set_sys_set_doit,
   2502	},
   2503	[RDMA_NLDEV_CMD_STAT_SET] = {
   2504		.doit = nldev_stat_set_doit,
   2505		.flags = RDMA_NL_ADMIN_PERM,
   2506	},
   2507	[RDMA_NLDEV_CMD_STAT_GET] = {
   2508		.doit = nldev_stat_get_doit,
   2509		.dump = nldev_stat_get_dumpit,
   2510	},
   2511	[RDMA_NLDEV_CMD_STAT_DEL] = {
   2512		.doit = nldev_stat_del_doit,
   2513		.flags = RDMA_NL_ADMIN_PERM,
   2514	},
   2515	[RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
   2516		.doit = nldev_res_get_qp_raw_doit,
   2517		.dump = nldev_res_get_qp_raw_dumpit,
   2518		.flags = RDMA_NL_ADMIN_PERM,
   2519	},
   2520	[RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
   2521		.doit = nldev_res_get_cq_raw_doit,
   2522		.dump = nldev_res_get_cq_raw_dumpit,
   2523		.flags = RDMA_NL_ADMIN_PERM,
   2524	},
   2525	[RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
   2526		.doit = nldev_res_get_mr_raw_doit,
   2527		.dump = nldev_res_get_mr_raw_dumpit,
   2528		.flags = RDMA_NL_ADMIN_PERM,
   2529	},
   2530	[RDMA_NLDEV_CMD_STAT_GET_STATUS] = {
   2531		.doit = nldev_stat_get_counter_status_doit,
   2532	},
   2533};
   2534
   2535void __init nldev_init(void)
   2536{
   2537	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
   2538}
   2539
   2540void __exit nldev_exit(void)
   2541{
   2542	rdma_nl_unregister(RDMA_NL_NLDEV);
   2543}
   2544
   2545MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);