cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

smc_pnet.c (30886B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
      4 *
      5 *  Generic netlink support functions to configure an SMC-R PNET table
      6 *
      7 *  Copyright IBM Corp. 2016
      8 *
      9 *  Author(s):  Thomas Richter <tmricht@linux.vnet.ibm.com>
     10 */
     11
     12#include <linux/module.h>
     13#include <linux/list.h>
     14#include <linux/ctype.h>
     15#include <linux/mutex.h>
     16#include <net/netlink.h>
     17#include <net/genetlink.h>
     18
     19#include <uapi/linux/if.h>
     20#include <uapi/linux/smc.h>
     21
     22#include <rdma/ib_verbs.h>
     23
     24#include <net/netns/generic.h>
     25#include "smc_netns.h"
     26
     27#include "smc_pnet.h"
     28#include "smc_ib.h"
     29#include "smc_ism.h"
     30#include "smc_core.h"
     31
     32static struct net_device *__pnet_find_base_ndev(struct net_device *ndev);
     33static struct net_device *pnet_find_base_ndev(struct net_device *ndev);
     34
     35static const struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
     36	[SMC_PNETID_NAME] = {
     37		.type = NLA_NUL_STRING,
     38		.len = SMC_MAX_PNETID_LEN
     39	},
     40	[SMC_PNETID_ETHNAME] = {
     41		.type = NLA_NUL_STRING,
     42		.len = IFNAMSIZ - 1
     43	},
     44	[SMC_PNETID_IBNAME] = {
     45		.type = NLA_NUL_STRING,
     46		.len = IB_DEVICE_NAME_MAX - 1
     47	},
     48	[SMC_PNETID_IBPORT] = { .type = NLA_U8 }
     49};
     50
     51static struct genl_family smc_pnet_nl_family;
     52
     53enum smc_pnet_nametype {
     54	SMC_PNET_ETH	= 1,
     55	SMC_PNET_IB	= 2,
     56};
     57
     58/* pnet entry stored in pnet table */
     59struct smc_pnetentry {
     60	struct list_head list;
     61	char pnet_name[SMC_MAX_PNETID_LEN + 1];
     62	enum smc_pnet_nametype type;
     63	union {
     64		struct {
     65			char eth_name[IFNAMSIZ + 1];
     66			struct net_device *ndev;
     67			netdevice_tracker dev_tracker;
     68		};
     69		struct {
     70			char ib_name[IB_DEVICE_NAME_MAX + 1];
     71			u8 ib_port;
     72		};
     73	};
     74};
     75
     76/* Check if the pnetid is set */
     77bool smc_pnet_is_pnetid_set(u8 *pnetid)
     78{
     79	if (pnetid[0] == 0 || pnetid[0] == _S)
     80		return false;
     81	return true;
     82}
     83
     84/* Check if two given pnetids match */
     85static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2)
     86{
     87	int i;
     88
     89	for (i = 0; i < SMC_MAX_PNETID_LEN; i++) {
     90		if ((pnetid1[i] == 0 || pnetid1[i] == _S) &&
     91		    (pnetid2[i] == 0 || pnetid2[i] == _S))
     92			break;
     93		if (pnetid1[i] != pnetid2[i])
     94			return false;
     95	}
     96	return true;
     97}
     98
     99/* Remove a pnetid from the pnet table.
    100 */
    101static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
    102{
    103	struct smc_pnetentry *pnetelem, *tmp_pe;
    104	struct smc_pnettable *pnettable;
    105	struct smc_ib_device *ibdev;
    106	struct smcd_dev *smcd_dev;
    107	struct smc_net *sn;
    108	int rc = -ENOENT;
    109	int ibport;
    110
    111	/* get pnettable for namespace */
    112	sn = net_generic(net, smc_net_id);
    113	pnettable = &sn->pnettable;
    114
    115	/* remove table entry */
    116	mutex_lock(&pnettable->lock);
    117	list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
    118				 list) {
    119		if (!pnet_name ||
    120		    smc_pnet_match(pnetelem->pnet_name, pnet_name)) {
    121			list_del(&pnetelem->list);
    122			if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) {
    123				dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker);
    124				pr_warn_ratelimited("smc: net device %s "
    125						    "erased user defined "
    126						    "pnetid %.16s\n",
    127						    pnetelem->eth_name,
    128						    pnetelem->pnet_name);
    129			}
    130			kfree(pnetelem);
    131			rc = 0;
    132		}
    133	}
    134	mutex_unlock(&pnettable->lock);
    135
    136	/* if this is not the initial namespace, stop here */
    137	if (net != &init_net)
    138		return rc;
    139
    140	/* remove ib devices */
    141	mutex_lock(&smc_ib_devices.mutex);
    142	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
    143		for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
    144			if (ibdev->pnetid_by_user[ibport] &&
    145			    (!pnet_name ||
    146			     smc_pnet_match(pnet_name,
    147					    ibdev->pnetid[ibport]))) {
    148				pr_warn_ratelimited("smc: ib device %s ibport "
    149						    "%d erased user defined "
    150						    "pnetid %.16s\n",
    151						    ibdev->ibdev->name,
    152						    ibport + 1,
    153						    ibdev->pnetid[ibport]);
    154				memset(ibdev->pnetid[ibport], 0,
    155				       SMC_MAX_PNETID_LEN);
    156				ibdev->pnetid_by_user[ibport] = false;
    157				rc = 0;
    158			}
    159		}
    160	}
    161	mutex_unlock(&smc_ib_devices.mutex);
    162	/* remove smcd devices */
    163	mutex_lock(&smcd_dev_list.mutex);
    164	list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
    165		if (smcd_dev->pnetid_by_user &&
    166		    (!pnet_name ||
    167		     smc_pnet_match(pnet_name, smcd_dev->pnetid))) {
    168			pr_warn_ratelimited("smc: smcd device %s "
    169					    "erased user defined pnetid "
    170					    "%.16s\n", dev_name(&smcd_dev->dev),
    171					    smcd_dev->pnetid);
    172			memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN);
    173			smcd_dev->pnetid_by_user = false;
    174			rc = 0;
    175		}
    176	}
    177	mutex_unlock(&smcd_dev_list.mutex);
    178	return rc;
    179}
    180
    181/* Add the reference to a given network device to the pnet table.
    182 */
    183static int smc_pnet_add_by_ndev(struct net_device *ndev)
    184{
    185	struct smc_pnetentry *pnetelem, *tmp_pe;
    186	struct smc_pnettable *pnettable;
    187	struct net *net = dev_net(ndev);
    188	struct smc_net *sn;
    189	int rc = -ENOENT;
    190
    191	/* get pnettable for namespace */
    192	sn = net_generic(net, smc_net_id);
    193	pnettable = &sn->pnettable;
    194
    195	mutex_lock(&pnettable->lock);
    196	list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
    197		if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev &&
    198		    !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) {
    199			dev_hold_track(ndev, &pnetelem->dev_tracker, GFP_ATOMIC);
    200			pnetelem->ndev = ndev;
    201			rc = 0;
    202			pr_warn_ratelimited("smc: adding net device %s with "
    203					    "user defined pnetid %.16s\n",
    204					    pnetelem->eth_name,
    205					    pnetelem->pnet_name);
    206			break;
    207		}
    208	}
    209	mutex_unlock(&pnettable->lock);
    210	return rc;
    211}
    212
    213/* Remove the reference to a given network device from the pnet table.
    214 */
    215static int smc_pnet_remove_by_ndev(struct net_device *ndev)
    216{
    217	struct smc_pnetentry *pnetelem, *tmp_pe;
    218	struct smc_pnettable *pnettable;
    219	struct net *net = dev_net(ndev);
    220	struct smc_net *sn;
    221	int rc = -ENOENT;
    222
    223	/* get pnettable for namespace */
    224	sn = net_generic(net, smc_net_id);
    225	pnettable = &sn->pnettable;
    226
    227	mutex_lock(&pnettable->lock);
    228	list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
    229		if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) {
    230			dev_put_track(pnetelem->ndev, &pnetelem->dev_tracker);
    231			pnetelem->ndev = NULL;
    232			rc = 0;
    233			pr_warn_ratelimited("smc: removing net device %s with "
    234					    "user defined pnetid %.16s\n",
    235					    pnetelem->eth_name,
    236					    pnetelem->pnet_name);
    237			break;
    238		}
    239	}
    240	mutex_unlock(&pnettable->lock);
    241	return rc;
    242}
    243
    244/* Apply pnetid to ib device when no pnetid is set.
    245 */
    246static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port,
    247			      char *pnet_name)
    248{
    249	bool applied = false;
    250
    251	mutex_lock(&smc_ib_devices.mutex);
    252	if (!smc_pnet_is_pnetid_set(ib_dev->pnetid[ib_port - 1])) {
    253		memcpy(ib_dev->pnetid[ib_port - 1], pnet_name,
    254		       SMC_MAX_PNETID_LEN);
    255		ib_dev->pnetid_by_user[ib_port - 1] = true;
    256		applied = true;
    257	}
    258	mutex_unlock(&smc_ib_devices.mutex);
    259	return applied;
    260}
    261
    262/* Apply pnetid to smcd device when no pnetid is set.
    263 */
    264static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name)
    265{
    266	bool applied = false;
    267
    268	mutex_lock(&smcd_dev_list.mutex);
    269	if (!smc_pnet_is_pnetid_set(smcd_dev->pnetid)) {
    270		memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN);
    271		smcd_dev->pnetid_by_user = true;
    272		applied = true;
    273	}
    274	mutex_unlock(&smcd_dev_list.mutex);
    275	return applied;
    276}
    277
    278/* The limit for pnetid is 16 characters.
    279 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
    280 * Lower case letters are converted to upper case.
    281 * Interior blanks should not be used.
    282 */
    283static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
    284{
    285	char *bf = skip_spaces(pnet_name);
    286	size_t len = strlen(bf);
    287	char *end = bf + len;
    288
    289	if (!len)
    290		return false;
    291	while (--end >= bf && isspace(*end))
    292		;
    293	if (end - bf >= SMC_MAX_PNETID_LEN)
    294		return false;
    295	while (bf <= end) {
    296		if (!isalnum(*bf))
    297			return false;
    298		*pnetid++ = islower(*bf) ? toupper(*bf) : *bf;
    299		bf++;
    300	}
    301	*pnetid = '\0';
    302	return true;
    303}
    304
    305/* Find an infiniband device by a given name. The device might not exist. */
    306static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
    307{
    308	struct smc_ib_device *ibdev;
    309
    310	mutex_lock(&smc_ib_devices.mutex);
    311	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
    312		if (!strncmp(ibdev->ibdev->name, ib_name,
    313			     sizeof(ibdev->ibdev->name)) ||
    314		    (ibdev->ibdev->dev.parent &&
    315		     !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
    316			     IB_DEVICE_NAME_MAX - 1))) {
    317			goto out;
    318		}
    319	}
    320	ibdev = NULL;
    321out:
    322	mutex_unlock(&smc_ib_devices.mutex);
    323	return ibdev;
    324}
    325
    326/* Find an smcd device by a given name. The device might not exist. */
    327static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name)
    328{
    329	struct smcd_dev *smcd_dev;
    330
    331	mutex_lock(&smcd_dev_list.mutex);
    332	list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
    333		if (!strncmp(dev_name(&smcd_dev->dev), smcd_name,
    334			     IB_DEVICE_NAME_MAX - 1))
    335			goto out;
    336	}
    337	smcd_dev = NULL;
    338out:
    339	mutex_unlock(&smcd_dev_list.mutex);
    340	return smcd_dev;
    341}
    342
    343static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
    344			    char *eth_name, char *pnet_name)
    345{
    346	struct smc_pnetentry *tmp_pe, *new_pe;
    347	struct net_device *ndev, *base_ndev;
    348	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
    349	bool new_netdev;
    350	int rc;
    351
    352	/* check if (base) netdev already has a pnetid. If there is one, we do
    353	 * not want to add a pnet table entry
    354	 */
    355	rc = -EEXIST;
    356	ndev = dev_get_by_name(net, eth_name);	/* dev_hold() */
    357	if (ndev) {
    358		base_ndev = pnet_find_base_ndev(ndev);
    359		if (!smc_pnetid_by_dev_port(base_ndev->dev.parent,
    360					    base_ndev->dev_port, ndev_pnetid))
    361			goto out_put;
    362	}
    363
    364	/* add a new netdev entry to the pnet table if there isn't one */
    365	rc = -ENOMEM;
    366	new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL);
    367	if (!new_pe)
    368		goto out_put;
    369	new_pe->type = SMC_PNET_ETH;
    370	memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
    371	strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
    372	rc = -EEXIST;
    373	new_netdev = true;
    374	mutex_lock(&pnettable->lock);
    375	list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
    376		if (tmp_pe->type == SMC_PNET_ETH &&
    377		    !strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) {
    378			new_netdev = false;
    379			break;
    380		}
    381	}
    382	if (new_netdev) {
    383		if (ndev) {
    384			new_pe->ndev = ndev;
    385			netdev_tracker_alloc(ndev, &new_pe->dev_tracker,
    386					     GFP_ATOMIC);
    387		}
    388		list_add_tail(&new_pe->list, &pnettable->pnetlist);
    389		mutex_unlock(&pnettable->lock);
    390	} else {
    391		mutex_unlock(&pnettable->lock);
    392		kfree(new_pe);
    393		goto out_put;
    394	}
    395	if (ndev)
    396		pr_warn_ratelimited("smc: net device %s "
    397				    "applied user defined pnetid %.16s\n",
    398				    new_pe->eth_name, new_pe->pnet_name);
    399	return 0;
    400
    401out_put:
    402	dev_put(ndev);
    403	return rc;
    404}
    405
    406static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
    407			   u8 ib_port, char *pnet_name)
    408{
    409	struct smc_pnetentry *tmp_pe, *new_pe;
    410	struct smc_ib_device *ib_dev;
    411	bool smcddev_applied = true;
    412	bool ibdev_applied = true;
    413	struct smcd_dev *smcd_dev;
    414	bool new_ibdev;
    415
    416	/* try to apply the pnetid to active devices */
    417	ib_dev = smc_pnet_find_ib(ib_name);
    418	if (ib_dev) {
    419		ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name);
    420		if (ibdev_applied)
    421			pr_warn_ratelimited("smc: ib device %s ibport %d "
    422					    "applied user defined pnetid "
    423					    "%.16s\n", ib_dev->ibdev->name,
    424					    ib_port,
    425					    ib_dev->pnetid[ib_port - 1]);
    426	}
    427	smcd_dev = smc_pnet_find_smcd(ib_name);
    428	if (smcd_dev) {
    429		smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name);
    430		if (smcddev_applied)
    431			pr_warn_ratelimited("smc: smcd device %s "
    432					    "applied user defined pnetid "
    433					    "%.16s\n", dev_name(&smcd_dev->dev),
    434					    smcd_dev->pnetid);
    435	}
    436	/* Apply fails when a device has a hardware-defined pnetid set, do not
    437	 * add a pnet table entry in that case.
    438	 */
    439	if (!ibdev_applied || !smcddev_applied)
    440		return -EEXIST;
    441
    442	/* add a new ib entry to the pnet table if there isn't one */
    443	new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL);
    444	if (!new_pe)
    445		return -ENOMEM;
    446	new_pe->type = SMC_PNET_IB;
    447	memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
    448	strncpy(new_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX);
    449	new_pe->ib_port = ib_port;
    450
    451	new_ibdev = true;
    452	mutex_lock(&pnettable->lock);
    453	list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
    454		if (tmp_pe->type == SMC_PNET_IB &&
    455		    !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
    456			new_ibdev = false;
    457			break;
    458		}
    459	}
    460	if (new_ibdev) {
    461		list_add_tail(&new_pe->list, &pnettable->pnetlist);
    462		mutex_unlock(&pnettable->lock);
    463	} else {
    464		mutex_unlock(&pnettable->lock);
    465		kfree(new_pe);
    466	}
    467	return (new_ibdev) ? 0 : -EEXIST;
    468}
    469
    470/* Append a pnetid to the end of the pnet table if not already on this list.
    471 */
    472static int smc_pnet_enter(struct net *net, struct nlattr *tb[])
    473{
    474	char pnet_name[SMC_MAX_PNETID_LEN + 1];
    475	struct smc_pnettable *pnettable;
    476	bool new_netdev = false;
    477	bool new_ibdev = false;
    478	struct smc_net *sn;
    479	u8 ibport = 1;
    480	char *string;
    481	int rc;
    482
    483	/* get pnettable for namespace */
    484	sn = net_generic(net, smc_net_id);
    485	pnettable = &sn->pnettable;
    486
    487	rc = -EINVAL;
    488	if (!tb[SMC_PNETID_NAME])
    489		goto error;
    490	string = (char *)nla_data(tb[SMC_PNETID_NAME]);
    491	if (!smc_pnetid_valid(string, pnet_name))
    492		goto error;
    493
    494	if (tb[SMC_PNETID_ETHNAME]) {
    495		string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
    496		rc = smc_pnet_add_eth(pnettable, net, string, pnet_name);
    497		if (!rc)
    498			new_netdev = true;
    499		else if (rc != -EEXIST)
    500			goto error;
    501	}
    502
    503	/* if this is not the initial namespace, stop here */
    504	if (net != &init_net)
    505		return new_netdev ? 0 : -EEXIST;
    506
    507	rc = -EINVAL;
    508	if (tb[SMC_PNETID_IBNAME]) {
    509		string = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
    510		string = strim(string);
    511		if (tb[SMC_PNETID_IBPORT]) {
    512			ibport = nla_get_u8(tb[SMC_PNETID_IBPORT]);
    513			if (ibport < 1 || ibport > SMC_MAX_PORTS)
    514				goto error;
    515		}
    516		rc = smc_pnet_add_ib(pnettable, string, ibport, pnet_name);
    517		if (!rc)
    518			new_ibdev = true;
    519		else if (rc != -EEXIST)
    520			goto error;
    521	}
    522	return (new_netdev || new_ibdev) ? 0 : -EEXIST;
    523
    524error:
    525	return rc;
    526}
    527
    528/* Convert an smc_pnetentry to a netlink attribute sequence */
    529static int smc_pnet_set_nla(struct sk_buff *msg,
    530			    struct smc_pnetentry *pnetelem)
    531{
    532	if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name))
    533		return -1;
    534	if (pnetelem->type == SMC_PNET_ETH) {
    535		if (nla_put_string(msg, SMC_PNETID_ETHNAME,
    536				   pnetelem->eth_name))
    537			return -1;
    538	} else {
    539		if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a"))
    540			return -1;
    541	}
    542	if (pnetelem->type == SMC_PNET_IB) {
    543		if (nla_put_string(msg, SMC_PNETID_IBNAME, pnetelem->ib_name) ||
    544		    nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
    545			return -1;
    546	} else {
    547		if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") ||
    548		    nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff))
    549			return -1;
    550	}
    551
    552	return 0;
    553}
    554
    555static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
    556{
    557	struct net *net = genl_info_net(info);
    558
    559	return smc_pnet_enter(net, info->attrs);
    560}
    561
    562static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
    563{
    564	struct net *net = genl_info_net(info);
    565
    566	if (!info->attrs[SMC_PNETID_NAME])
    567		return -EINVAL;
    568	return smc_pnet_remove_by_pnetid(net,
    569				(char *)nla_data(info->attrs[SMC_PNETID_NAME]));
    570}
    571
    572static int smc_pnet_dump_start(struct netlink_callback *cb)
    573{
    574	cb->args[0] = 0;
    575	return 0;
    576}
    577
    578static int smc_pnet_dumpinfo(struct sk_buff *skb,
    579			     u32 portid, u32 seq, u32 flags,
    580			     struct smc_pnetentry *pnetelem)
    581{
    582	void *hdr;
    583
    584	hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family,
    585			  flags, SMC_PNETID_GET);
    586	if (!hdr)
    587		return -ENOMEM;
    588	if (smc_pnet_set_nla(skb, pnetelem) < 0) {
    589		genlmsg_cancel(skb, hdr);
    590		return -EMSGSIZE;
    591	}
    592	genlmsg_end(skb, hdr);
    593	return 0;
    594}
    595
    596static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
    597			  u32 seq, u8 *pnetid, int start_idx)
    598{
    599	struct smc_pnettable *pnettable;
    600	struct smc_pnetentry *pnetelem;
    601	struct smc_net *sn;
    602	int idx = 0;
    603
    604	/* get pnettable for namespace */
    605	sn = net_generic(net, smc_net_id);
    606	pnettable = &sn->pnettable;
    607
    608	/* dump pnettable entries */
    609	mutex_lock(&pnettable->lock);
    610	list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
    611		if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
    612			continue;
    613		if (idx++ < start_idx)
    614			continue;
    615		/* if this is not the initial namespace, dump only netdev */
    616		if (net != &init_net && pnetelem->type != SMC_PNET_ETH)
    617			continue;
    618		if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
    619				      pnetelem)) {
    620			--idx;
    621			break;
    622		}
    623	}
    624	mutex_unlock(&pnettable->lock);
    625	return idx;
    626}
    627
    628static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
    629{
    630	struct net *net = sock_net(skb->sk);
    631	int idx;
    632
    633	idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid,
    634			     cb->nlh->nlmsg_seq, NULL, cb->args[0]);
    635
    636	cb->args[0] = idx;
    637	return skb->len;
    638}
    639
    640/* Retrieve one PNETID entry */
    641static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
    642{
    643	struct net *net = genl_info_net(info);
    644	struct sk_buff *msg;
    645	void *hdr;
    646
    647	if (!info->attrs[SMC_PNETID_NAME])
    648		return -EINVAL;
    649
    650	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
    651	if (!msg)
    652		return -ENOMEM;
    653
    654	_smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq,
    655		       nla_data(info->attrs[SMC_PNETID_NAME]), 0);
    656
    657	/* finish multi part message and send it */
    658	hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0,
    659			NLM_F_MULTI);
    660	if (!hdr) {
    661		nlmsg_free(msg);
    662		return -EMSGSIZE;
    663	}
    664	return genlmsg_reply(msg, info);
    665}
    666
    667/* Remove and delete all pnetids from pnet table.
    668 */
    669static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
    670{
    671	struct net *net = genl_info_net(info);
    672
    673	smc_pnet_remove_by_pnetid(net, NULL);
    674	return 0;
    675}
    676
    677/* SMC_PNETID generic netlink operation definition */
    678static const struct genl_ops smc_pnet_ops[] = {
    679	{
    680		.cmd = SMC_PNETID_GET,
    681		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
    682		/* can be retrieved by unprivileged users */
    683		.doit = smc_pnet_get,
    684		.dumpit = smc_pnet_dump,
    685		.start = smc_pnet_dump_start
    686	},
    687	{
    688		.cmd = SMC_PNETID_ADD,
    689		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
    690		.flags = GENL_ADMIN_PERM,
    691		.doit = smc_pnet_add
    692	},
    693	{
    694		.cmd = SMC_PNETID_DEL,
    695		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
    696		.flags = GENL_ADMIN_PERM,
    697		.doit = smc_pnet_del
    698	},
    699	{
    700		.cmd = SMC_PNETID_FLUSH,
    701		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
    702		.flags = GENL_ADMIN_PERM,
    703		.doit = smc_pnet_flush
    704	}
    705};
    706
    707/* SMC_PNETID family definition */
    708static struct genl_family smc_pnet_nl_family __ro_after_init = {
    709	.hdrsize = 0,
    710	.name = SMCR_GENL_FAMILY_NAME,
    711	.version = SMCR_GENL_FAMILY_VERSION,
    712	.maxattr = SMC_PNETID_MAX,
    713	.policy = smc_pnet_policy,
    714	.netnsok = true,
    715	.module = THIS_MODULE,
    716	.ops = smc_pnet_ops,
    717	.n_ops =  ARRAY_SIZE(smc_pnet_ops)
    718};
    719
    720bool smc_pnet_is_ndev_pnetid(struct net *net, u8 *pnetid)
    721{
    722	struct smc_net *sn = net_generic(net, smc_net_id);
    723	struct smc_pnetids_ndev_entry *pe;
    724	bool rc = false;
    725
    726	read_lock(&sn->pnetids_ndev.lock);
    727	list_for_each_entry(pe, &sn->pnetids_ndev.list, list) {
    728		if (smc_pnet_match(pnetid, pe->pnetid)) {
    729			rc = true;
    730			goto unlock;
    731		}
    732	}
    733
    734unlock:
    735	read_unlock(&sn->pnetids_ndev.lock);
    736	return rc;
    737}
    738
    739static int smc_pnet_add_pnetid(struct net *net, u8 *pnetid)
    740{
    741	struct smc_net *sn = net_generic(net, smc_net_id);
    742	struct smc_pnetids_ndev_entry *pe, *pi;
    743
    744	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
    745	if (!pe)
    746		return -ENOMEM;
    747
    748	write_lock(&sn->pnetids_ndev.lock);
    749	list_for_each_entry(pi, &sn->pnetids_ndev.list, list) {
    750		if (smc_pnet_match(pnetid, pe->pnetid)) {
    751			refcount_inc(&pi->refcnt);
    752			kfree(pe);
    753			goto unlock;
    754		}
    755	}
    756	refcount_set(&pe->refcnt, 1);
    757	memcpy(pe->pnetid, pnetid, SMC_MAX_PNETID_LEN);
    758	list_add_tail(&pe->list, &sn->pnetids_ndev.list);
    759
    760unlock:
    761	write_unlock(&sn->pnetids_ndev.lock);
    762	return 0;
    763}
    764
    765static void smc_pnet_remove_pnetid(struct net *net, u8 *pnetid)
    766{
    767	struct smc_net *sn = net_generic(net, smc_net_id);
    768	struct smc_pnetids_ndev_entry *pe, *pe2;
    769
    770	write_lock(&sn->pnetids_ndev.lock);
    771	list_for_each_entry_safe(pe, pe2, &sn->pnetids_ndev.list, list) {
    772		if (smc_pnet_match(pnetid, pe->pnetid)) {
    773			if (refcount_dec_and_test(&pe->refcnt)) {
    774				list_del(&pe->list);
    775				kfree(pe);
    776			}
    777			break;
    778		}
    779	}
    780	write_unlock(&sn->pnetids_ndev.lock);
    781}
    782
    783static void smc_pnet_add_base_pnetid(struct net *net, struct net_device *dev,
    784				     u8 *ndev_pnetid)
    785{
    786	struct net_device *base_dev;
    787
    788	base_dev = __pnet_find_base_ndev(dev);
    789	if (base_dev->flags & IFF_UP &&
    790	    !smc_pnetid_by_dev_port(base_dev->dev.parent, base_dev->dev_port,
    791				    ndev_pnetid)) {
    792		/* add to PNETIDs list */
    793		smc_pnet_add_pnetid(net, ndev_pnetid);
    794	}
    795}
    796
    797/* create initial list of netdevice pnetids */
    798static void smc_pnet_create_pnetids_list(struct net *net)
    799{
    800	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
    801	struct net_device *dev;
    802
    803	rtnl_lock();
    804	for_each_netdev(net, dev)
    805		smc_pnet_add_base_pnetid(net, dev, ndev_pnetid);
    806	rtnl_unlock();
    807}
    808
    809/* clean up list of netdevice pnetids */
    810static void smc_pnet_destroy_pnetids_list(struct net *net)
    811{
    812	struct smc_net *sn = net_generic(net, smc_net_id);
    813	struct smc_pnetids_ndev_entry *pe, *temp_pe;
    814
    815	write_lock(&sn->pnetids_ndev.lock);
    816	list_for_each_entry_safe(pe, temp_pe, &sn->pnetids_ndev.list, list) {
    817		list_del(&pe->list);
    818		kfree(pe);
    819	}
    820	write_unlock(&sn->pnetids_ndev.lock);
    821}
    822
    823static int smc_pnet_netdev_event(struct notifier_block *this,
    824				 unsigned long event, void *ptr)
    825{
    826	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
    827	struct net *net = dev_net(event_dev);
    828	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
    829
    830	switch (event) {
    831	case NETDEV_REBOOT:
    832	case NETDEV_UNREGISTER:
    833		smc_pnet_remove_by_ndev(event_dev);
    834		smc_ib_ndev_change(event_dev, event);
    835		return NOTIFY_OK;
    836	case NETDEV_REGISTER:
    837		smc_pnet_add_by_ndev(event_dev);
    838		smc_ib_ndev_change(event_dev, event);
    839		return NOTIFY_OK;
    840	case NETDEV_UP:
    841		smc_pnet_add_base_pnetid(net, event_dev, ndev_pnetid);
    842		return NOTIFY_OK;
    843	case NETDEV_DOWN:
    844		event_dev = __pnet_find_base_ndev(event_dev);
    845		if (!smc_pnetid_by_dev_port(event_dev->dev.parent,
    846					    event_dev->dev_port, ndev_pnetid)) {
    847			/* remove from PNETIDs list */
    848			smc_pnet_remove_pnetid(net, ndev_pnetid);
    849		}
    850		return NOTIFY_OK;
    851	default:
    852		return NOTIFY_DONE;
    853	}
    854}
    855
    856static struct notifier_block smc_netdev_notifier = {
    857	.notifier_call = smc_pnet_netdev_event
    858};
    859
    860/* init network namespace */
    861int smc_pnet_net_init(struct net *net)
    862{
    863	struct smc_net *sn = net_generic(net, smc_net_id);
    864	struct smc_pnettable *pnettable = &sn->pnettable;
    865	struct smc_pnetids_ndev *pnetids_ndev = &sn->pnetids_ndev;
    866
    867	INIT_LIST_HEAD(&pnettable->pnetlist);
    868	mutex_init(&pnettable->lock);
    869	INIT_LIST_HEAD(&pnetids_ndev->list);
    870	rwlock_init(&pnetids_ndev->lock);
    871
    872	smc_pnet_create_pnetids_list(net);
    873
    874	/* disable handshake limitation by default */
    875	net->smc.limit_smc_hs = 0;
    876
    877	return 0;
    878}
    879
    880int __init smc_pnet_init(void)
    881{
    882	int rc;
    883
    884	rc = genl_register_family(&smc_pnet_nl_family);
    885	if (rc)
    886		return rc;
    887	rc = register_netdevice_notifier(&smc_netdev_notifier);
    888	if (rc)
    889		genl_unregister_family(&smc_pnet_nl_family);
    890
    891	return rc;
    892}
    893
    894/* exit network namespace */
    895void smc_pnet_net_exit(struct net *net)
    896{
    897	/* flush pnet table */
    898	smc_pnet_remove_by_pnetid(net, NULL);
    899	smc_pnet_destroy_pnetids_list(net);
    900}
    901
    902void smc_pnet_exit(void)
    903{
    904	unregister_netdevice_notifier(&smc_netdev_notifier);
    905	genl_unregister_family(&smc_pnet_nl_family);
    906}
    907
    908static struct net_device *__pnet_find_base_ndev(struct net_device *ndev)
    909{
    910	int i, nest_lvl;
    911
    912	ASSERT_RTNL();
    913	nest_lvl = ndev->lower_level;
    914	for (i = 0; i < nest_lvl; i++) {
    915		struct list_head *lower = &ndev->adj_list.lower;
    916
    917		if (list_empty(lower))
    918			break;
    919		lower = lower->next;
    920		ndev = netdev_lower_get_next(ndev, &lower);
    921	}
    922	return ndev;
    923}
    924
    925/* Determine one base device for stacked net devices.
    926 * If the lower device level contains more than one devices
    927 * (for instance with bonding slaves), just the first device
    928 * is used to reach a base device.
    929 */
    930static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
    931{
    932	rtnl_lock();
    933	ndev = __pnet_find_base_ndev(ndev);
    934	rtnl_unlock();
    935	return ndev;
    936}
    937
    938static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
    939					      u8 *pnetid)
    940{
    941	struct smc_pnettable *pnettable;
    942	struct net *net = dev_net(ndev);
    943	struct smc_pnetentry *pnetelem;
    944	struct smc_net *sn;
    945	int rc = -ENOENT;
    946
    947	/* get pnettable for namespace */
    948	sn = net_generic(net, smc_net_id);
    949	pnettable = &sn->pnettable;
    950
    951	mutex_lock(&pnettable->lock);
    952	list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
    953		if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) {
    954			/* get pnetid of netdev device */
    955			memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
    956			rc = 0;
    957			break;
    958		}
    959	}
    960	mutex_unlock(&pnettable->lock);
    961	return rc;
    962}
    963
    964static int smc_pnet_determine_gid(struct smc_ib_device *ibdev, int i,
    965				  struct smc_init_info *ini)
    966{
    967	if (!ini->check_smcrv2 &&
    968	    !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->ib_gid, NULL,
    969				  NULL)) {
    970		ini->ib_dev = ibdev;
    971		ini->ib_port = i;
    972		return 0;
    973	}
    974	if (ini->check_smcrv2 &&
    975	    !smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->smcrv2.ib_gid_v2,
    976				  NULL, &ini->smcrv2)) {
    977		ini->smcrv2.ib_dev_v2 = ibdev;
    978		ini->smcrv2.ib_port_v2 = i;
    979		return 0;
    980	}
    981	return -ENODEV;
    982}
    983
    984/* find a roce device for the given pnetid */
    985static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
    986					  struct smc_init_info *ini,
    987					  struct smc_ib_device *known_dev,
    988					  struct net *net)
    989{
    990	struct smc_ib_device *ibdev;
    991	int i;
    992
    993	mutex_lock(&smc_ib_devices.mutex);
    994	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
    995		if (ibdev == known_dev ||
    996		    !rdma_dev_access_netns(ibdev->ibdev, net))
    997			continue;
    998		for (i = 1; i <= SMC_MAX_PORTS; i++) {
    999			if (!rdma_is_port_valid(ibdev->ibdev, i))
   1000				continue;
   1001			if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) &&
   1002			    smc_ib_port_active(ibdev, i) &&
   1003			    !test_bit(i - 1, ibdev->ports_going_away)) {
   1004				if (!smc_pnet_determine_gid(ibdev, i, ini))
   1005					goto out;
   1006			}
   1007		}
   1008	}
   1009out:
   1010	mutex_unlock(&smc_ib_devices.mutex);
   1011}
   1012
   1013/* find alternate roce device with same pnet_id, vlan_id and net namespace */
   1014void smc_pnet_find_alt_roce(struct smc_link_group *lgr,
   1015			    struct smc_init_info *ini,
   1016			    struct smc_ib_device *known_dev)
   1017{
   1018	struct net *net = lgr->net;
   1019
   1020	_smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev, net);
   1021}
   1022
   1023/* if handshake network device belongs to a roce device, return its
   1024 * IB device and port
   1025 */
   1026static void smc_pnet_find_rdma_dev(struct net_device *netdev,
   1027				   struct smc_init_info *ini)
   1028{
   1029	struct net *net = dev_net(netdev);
   1030	struct smc_ib_device *ibdev;
   1031
   1032	mutex_lock(&smc_ib_devices.mutex);
   1033	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
   1034		struct net_device *ndev;
   1035		int i;
   1036
   1037		/* check rdma net namespace */
   1038		if (!rdma_dev_access_netns(ibdev->ibdev, net))
   1039			continue;
   1040
   1041		for (i = 1; i <= SMC_MAX_PORTS; i++) {
   1042			if (!rdma_is_port_valid(ibdev->ibdev, i))
   1043				continue;
   1044			if (!ibdev->ibdev->ops.get_netdev)
   1045				continue;
   1046			ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i);
   1047			if (!ndev)
   1048				continue;
   1049			dev_put(ndev);
   1050			if (netdev == ndev &&
   1051			    smc_ib_port_active(ibdev, i) &&
   1052			    !test_bit(i - 1, ibdev->ports_going_away)) {
   1053				if (!smc_pnet_determine_gid(ibdev, i, ini))
   1054					break;
   1055			}
   1056		}
   1057	}
   1058	mutex_unlock(&smc_ib_devices.mutex);
   1059}
   1060
   1061/* Determine the corresponding IB device port based on the hardware PNETID.
   1062 * Searching stops at the first matching active IB device port with vlan_id
   1063 * configured.
   1064 * If nothing found, check pnetid table.
   1065 * If nothing found, try to use handshake device
   1066 */
   1067static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
   1068					 struct smc_init_info *ini)
   1069{
   1070	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
   1071	struct net *net;
   1072
   1073	ndev = pnet_find_base_ndev(ndev);
   1074	net = dev_net(ndev);
   1075	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
   1076				   ndev_pnetid) &&
   1077	    smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) {
   1078		smc_pnet_find_rdma_dev(ndev, ini);
   1079		return; /* pnetid could not be determined */
   1080	}
   1081	_smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL, net);
   1082}
   1083
   1084static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
   1085					struct smc_init_info *ini)
   1086{
   1087	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
   1088	struct smcd_dev *ismdev;
   1089
   1090	ndev = pnet_find_base_ndev(ndev);
   1091	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
   1092				   ndev_pnetid) &&
   1093	    smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid))
   1094		return; /* pnetid could not be determined */
   1095
   1096	mutex_lock(&smcd_dev_list.mutex);
   1097	list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
   1098		if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) &&
   1099		    !ismdev->going_away &&
   1100		    (!ini->ism_peer_gid[0] ||
   1101		     !smc_ism_cantalk(ini->ism_peer_gid[0], ini->vlan_id,
   1102				      ismdev))) {
   1103			ini->ism_dev[0] = ismdev;
   1104			break;
   1105		}
   1106	}
   1107	mutex_unlock(&smcd_dev_list.mutex);
   1108}
   1109
   1110/* PNET table analysis for a given sock:
   1111 * determine ib_device and port belonging to used internal TCP socket
   1112 * ethernet interface.
   1113 */
   1114void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini)
   1115{
   1116	struct dst_entry *dst = sk_dst_get(sk);
   1117
   1118	if (!dst)
   1119		goto out;
   1120	if (!dst->dev)
   1121		goto out_rel;
   1122
   1123	smc_pnet_find_roce_by_pnetid(dst->dev, ini);
   1124
   1125out_rel:
   1126	dst_release(dst);
   1127out:
   1128	return;
   1129}
   1130
   1131void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini)
   1132{
   1133	struct dst_entry *dst = sk_dst_get(sk);
   1134
   1135	ini->ism_dev[0] = NULL;
   1136	if (!dst)
   1137		goto out;
   1138	if (!dst->dev)
   1139		goto out_rel;
   1140
   1141	smc_pnet_find_ism_by_pnetid(dst->dev, ini);
   1142
   1143out_rel:
   1144	dst_release(dst);
   1145out:
   1146	return;
   1147}
   1148
   1149/* Lookup and apply a pnet table entry to the given ib device.
   1150 */
   1151int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
   1152{
   1153	char *ib_name = smcibdev->ibdev->name;
   1154	struct smc_pnettable *pnettable;
   1155	struct smc_pnetentry *tmp_pe;
   1156	struct smc_net *sn;
   1157	int rc = -ENOENT;
   1158
   1159	/* get pnettable for init namespace */
   1160	sn = net_generic(&init_net, smc_net_id);
   1161	pnettable = &sn->pnettable;
   1162
   1163	mutex_lock(&pnettable->lock);
   1164	list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
   1165		if (tmp_pe->type == SMC_PNET_IB &&
   1166		    !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) &&
   1167		    tmp_pe->ib_port == ib_port) {
   1168			smc_pnet_apply_ib(smcibdev, ib_port, tmp_pe->pnet_name);
   1169			rc = 0;
   1170			break;
   1171		}
   1172	}
   1173	mutex_unlock(&pnettable->lock);
   1174
   1175	return rc;
   1176}
   1177
   1178/* Lookup and apply a pnet table entry to the given smcd device.
   1179 */
   1180int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
   1181{
   1182	const char *ib_name = dev_name(&smcddev->dev);
   1183	struct smc_pnettable *pnettable;
   1184	struct smc_pnetentry *tmp_pe;
   1185	struct smc_net *sn;
   1186	int rc = -ENOENT;
   1187
   1188	/* get pnettable for init namespace */
   1189	sn = net_generic(&init_net, smc_net_id);
   1190	pnettable = &sn->pnettable;
   1191
   1192	mutex_lock(&pnettable->lock);
   1193	list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
   1194		if (tmp_pe->type == SMC_PNET_IB &&
   1195		    !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
   1196			smc_pnet_apply_smcd(smcddev, tmp_pe->pnet_name);
   1197			rc = 0;
   1198			break;
   1199		}
   1200	}
   1201	mutex_unlock(&pnettable->lock);
   1202
   1203	return rc;
   1204}