cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sysfs.c (40545B)


      1/*
      2 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
      3 * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
      4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
      5 *
      6 * This software is available to you under a choice of one of two
      7 * licenses.  You may choose to be licensed under the terms of the GNU
      8 * General Public License (GPL) Version 2, available from the file
      9 * COPYING in the main directory of this source tree, or the
     10 * OpenIB.org BSD license below:
     11 *
     12 *     Redistribution and use in source and binary forms, with or
     13 *     without modification, are permitted provided that the following
     14 *     conditions are met:
     15 *
     16 *      - Redistributions of source code must retain the above
     17 *        copyright notice, this list of conditions and the following
     18 *        disclaimer.
     19 *
     20 *      - Redistributions in binary form must reproduce the above
     21 *        copyright notice, this list of conditions and the following
     22 *        disclaimer in the documentation and/or other materials
     23 *        provided with the distribution.
     24 *
     25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     32 * SOFTWARE.
     33 */
     34
     35#include "core_priv.h"
     36
     37#include <linux/slab.h>
     38#include <linux/stat.h>
     39#include <linux/string.h>
     40#include <linux/netdevice.h>
     41#include <linux/ethtool.h>
     42
     43#include <rdma/ib_mad.h>
     44#include <rdma/ib_pma.h>
     45#include <rdma/ib_cache.h>
     46#include <rdma/rdma_counter.h>
     47#include <rdma/ib_sysfs.h>
     48
     49struct port_table_attribute {
     50	struct ib_port_attribute attr;
     51	char			name[8];
     52	int			index;
     53	__be16			attr_id;
     54};
     55
     56struct gid_attr_group {
     57	struct ib_port *port;
     58	struct kobject kobj;
     59	struct attribute_group groups[2];
     60	const struct attribute_group *groups_list[3];
     61	struct port_table_attribute attrs_list[];
     62};
     63
     64struct ib_port {
     65	struct kobject kobj;
     66	struct ib_device *ibdev;
     67	struct gid_attr_group *gid_attr_group;
     68	struct hw_stats_port_data *hw_stats_data;
     69
     70	struct attribute_group groups[3];
     71	const struct attribute_group *groups_list[5];
     72	u32 port_num;
     73	struct port_table_attribute attrs_list[];
     74};
     75
     76struct hw_stats_device_attribute {
     77	struct device_attribute attr;
     78	ssize_t (*show)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
     79			unsigned int index, unsigned int port_num, char *buf);
     80	ssize_t (*store)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
     81			 unsigned int index, unsigned int port_num,
     82			 const char *buf, size_t count);
     83};
     84
     85struct hw_stats_port_attribute {
     86	struct ib_port_attribute attr;
     87	ssize_t (*show)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
     88			unsigned int index, unsigned int port_num, char *buf);
     89	ssize_t (*store)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
     90			 unsigned int index, unsigned int port_num,
     91			 const char *buf, size_t count);
     92};
     93
     94struct hw_stats_device_data {
     95	struct attribute_group group;
     96	struct rdma_hw_stats *stats;
     97	struct hw_stats_device_attribute attrs[];
     98};
     99
    100struct hw_stats_port_data {
    101	struct rdma_hw_stats *stats;
    102	struct hw_stats_port_attribute attrs[];
    103};
    104
    105static ssize_t port_attr_show(struct kobject *kobj,
    106			      struct attribute *attr, char *buf)
    107{
    108	struct ib_port_attribute *port_attr =
    109		container_of(attr, struct ib_port_attribute, attr);
    110	struct ib_port *p = container_of(kobj, struct ib_port, kobj);
    111
    112	if (!port_attr->show)
    113		return -EIO;
    114
    115	return port_attr->show(p->ibdev, p->port_num, port_attr, buf);
    116}
    117
    118static ssize_t port_attr_store(struct kobject *kobj,
    119			       struct attribute *attr,
    120			       const char *buf, size_t count)
    121{
    122	struct ib_port_attribute *port_attr =
    123		container_of(attr, struct ib_port_attribute, attr);
    124	struct ib_port *p = container_of(kobj, struct ib_port, kobj);
    125
    126	if (!port_attr->store)
    127		return -EIO;
    128	return port_attr->store(p->ibdev, p->port_num, port_attr, buf, count);
    129}
    130
    131struct ib_device *ib_port_sysfs_get_ibdev_kobj(struct kobject *kobj,
    132					       u32 *port_num)
    133{
    134	struct ib_port *port = container_of(kobj, struct ib_port, kobj);
    135
    136	*port_num = port->port_num;
    137	return port->ibdev;
    138}
    139EXPORT_SYMBOL(ib_port_sysfs_get_ibdev_kobj);
    140
    141static const struct sysfs_ops port_sysfs_ops = {
    142	.show	= port_attr_show,
    143	.store	= port_attr_store
    144};
    145
    146static ssize_t hw_stat_device_show(struct device *dev,
    147				   struct device_attribute *attr, char *buf)
    148{
    149	struct hw_stats_device_attribute *stat_attr =
    150		container_of(attr, struct hw_stats_device_attribute, attr);
    151	struct ib_device *ibdev = container_of(dev, struct ib_device, dev);
    152
    153	return stat_attr->show(ibdev, ibdev->hw_stats_data->stats,
    154			       stat_attr - ibdev->hw_stats_data->attrs, 0, buf);
    155}
    156
    157static ssize_t hw_stat_device_store(struct device *dev,
    158				    struct device_attribute *attr,
    159				    const char *buf, size_t count)
    160{
    161	struct hw_stats_device_attribute *stat_attr =
    162		container_of(attr, struct hw_stats_device_attribute, attr);
    163	struct ib_device *ibdev = container_of(dev, struct ib_device, dev);
    164
    165	return stat_attr->store(ibdev, ibdev->hw_stats_data->stats,
    166				stat_attr - ibdev->hw_stats_data->attrs, 0, buf,
    167				count);
    168}
    169
    170static ssize_t hw_stat_port_show(struct ib_device *ibdev, u32 port_num,
    171				 struct ib_port_attribute *attr, char *buf)
    172{
    173	struct hw_stats_port_attribute *stat_attr =
    174		container_of(attr, struct hw_stats_port_attribute, attr);
    175	struct ib_port *port = ibdev->port_data[port_num].sysfs;
    176
    177	return stat_attr->show(ibdev, port->hw_stats_data->stats,
    178			       stat_attr - port->hw_stats_data->attrs,
    179			       port->port_num, buf);
    180}
    181
    182static ssize_t hw_stat_port_store(struct ib_device *ibdev, u32 port_num,
    183				  struct ib_port_attribute *attr,
    184				  const char *buf, size_t count)
    185{
    186	struct hw_stats_port_attribute *stat_attr =
    187		container_of(attr, struct hw_stats_port_attribute, attr);
    188	struct ib_port *port = ibdev->port_data[port_num].sysfs;
    189
    190	return stat_attr->store(ibdev, port->hw_stats_data->stats,
    191				stat_attr - port->hw_stats_data->attrs,
    192				port->port_num, buf, count);
    193}
    194
    195static ssize_t gid_attr_show(struct kobject *kobj,
    196			     struct attribute *attr, char *buf)
    197{
    198	struct ib_port_attribute *port_attr =
    199		container_of(attr, struct ib_port_attribute, attr);
    200	struct ib_port *p = container_of(kobj, struct gid_attr_group,
    201					 kobj)->port;
    202
    203	if (!port_attr->show)
    204		return -EIO;
    205
    206	return port_attr->show(p->ibdev, p->port_num, port_attr, buf);
    207}
    208
    209static const struct sysfs_ops gid_attr_sysfs_ops = {
    210	.show = gid_attr_show
    211};
    212
    213static ssize_t state_show(struct ib_device *ibdev, u32 port_num,
    214			  struct ib_port_attribute *unused, char *buf)
    215{
    216	struct ib_port_attr attr;
    217	ssize_t ret;
    218
    219	static const char *state_name[] = {
    220		[IB_PORT_NOP]		= "NOP",
    221		[IB_PORT_DOWN]		= "DOWN",
    222		[IB_PORT_INIT]		= "INIT",
    223		[IB_PORT_ARMED]		= "ARMED",
    224		[IB_PORT_ACTIVE]	= "ACTIVE",
    225		[IB_PORT_ACTIVE_DEFER]	= "ACTIVE_DEFER"
    226	};
    227
    228	ret = ib_query_port(ibdev, port_num, &attr);
    229	if (ret)
    230		return ret;
    231
    232	return sysfs_emit(buf, "%d: %s\n", attr.state,
    233			  attr.state >= 0 &&
    234					  attr.state < ARRAY_SIZE(state_name) ?
    235				  state_name[attr.state] :
    236				  "UNKNOWN");
    237}
    238
    239static ssize_t lid_show(struct ib_device *ibdev, u32 port_num,
    240			struct ib_port_attribute *unused, char *buf)
    241{
    242	struct ib_port_attr attr;
    243	ssize_t ret;
    244
    245	ret = ib_query_port(ibdev, port_num, &attr);
    246	if (ret)
    247		return ret;
    248
    249	return sysfs_emit(buf, "0x%x\n", attr.lid);
    250}
    251
    252static ssize_t lid_mask_count_show(struct ib_device *ibdev, u32 port_num,
    253				   struct ib_port_attribute *unused, char *buf)
    254{
    255	struct ib_port_attr attr;
    256	ssize_t ret;
    257
    258	ret = ib_query_port(ibdev, port_num, &attr);
    259	if (ret)
    260		return ret;
    261
    262	return sysfs_emit(buf, "%u\n", attr.lmc);
    263}
    264
    265static ssize_t sm_lid_show(struct ib_device *ibdev, u32 port_num,
    266			   struct ib_port_attribute *unused, char *buf)
    267{
    268	struct ib_port_attr attr;
    269	ssize_t ret;
    270
    271	ret = ib_query_port(ibdev, port_num, &attr);
    272	if (ret)
    273		return ret;
    274
    275	return sysfs_emit(buf, "0x%x\n", attr.sm_lid);
    276}
    277
    278static ssize_t sm_sl_show(struct ib_device *ibdev, u32 port_num,
    279			  struct ib_port_attribute *unused, char *buf)
    280{
    281	struct ib_port_attr attr;
    282	ssize_t ret;
    283
    284	ret = ib_query_port(ibdev, port_num, &attr);
    285	if (ret)
    286		return ret;
    287
    288	return sysfs_emit(buf, "%u\n", attr.sm_sl);
    289}
    290
    291static ssize_t cap_mask_show(struct ib_device *ibdev, u32 port_num,
    292			     struct ib_port_attribute *unused, char *buf)
    293{
    294	struct ib_port_attr attr;
    295	ssize_t ret;
    296
    297	ret = ib_query_port(ibdev, port_num, &attr);
    298	if (ret)
    299		return ret;
    300
    301	return sysfs_emit(buf, "0x%08x\n", attr.port_cap_flags);
    302}
    303
    304static ssize_t rate_show(struct ib_device *ibdev, u32 port_num,
    305			 struct ib_port_attribute *unused, char *buf)
    306{
    307	struct ib_port_attr attr;
    308	char *speed = "";
    309	int rate;		/* in deci-Gb/sec */
    310	ssize_t ret;
    311
    312	ret = ib_query_port(ibdev, port_num, &attr);
    313	if (ret)
    314		return ret;
    315
    316	switch (attr.active_speed) {
    317	case IB_SPEED_DDR:
    318		speed = " DDR";
    319		rate = 50;
    320		break;
    321	case IB_SPEED_QDR:
    322		speed = " QDR";
    323		rate = 100;
    324		break;
    325	case IB_SPEED_FDR10:
    326		speed = " FDR10";
    327		rate = 100;
    328		break;
    329	case IB_SPEED_FDR:
    330		speed = " FDR";
    331		rate = 140;
    332		break;
    333	case IB_SPEED_EDR:
    334		speed = " EDR";
    335		rate = 250;
    336		break;
    337	case IB_SPEED_HDR:
    338		speed = " HDR";
    339		rate = 500;
    340		break;
    341	case IB_SPEED_NDR:
    342		speed = " NDR";
    343		rate = 1000;
    344		break;
    345	case IB_SPEED_SDR:
    346	default:		/* default to SDR for invalid rates */
    347		speed = " SDR";
    348		rate = 25;
    349		break;
    350	}
    351
    352	rate *= ib_width_enum_to_int(attr.active_width);
    353	if (rate < 0)
    354		return -EINVAL;
    355
    356	return sysfs_emit(buf, "%d%s Gb/sec (%dX%s)\n", rate / 10,
    357			  rate % 10 ? ".5" : "",
    358			  ib_width_enum_to_int(attr.active_width), speed);
    359}
    360
    361static const char *phys_state_to_str(enum ib_port_phys_state phys_state)
    362{
    363	static const char *phys_state_str[] = {
    364		"<unknown>",
    365		"Sleep",
    366		"Polling",
    367		"Disabled",
    368		"PortConfigurationTraining",
    369		"LinkUp",
    370		"LinkErrorRecovery",
    371		"Phy Test",
    372	};
    373
    374	if (phys_state < ARRAY_SIZE(phys_state_str))
    375		return phys_state_str[phys_state];
    376	return "<unknown>";
    377}
    378
    379static ssize_t phys_state_show(struct ib_device *ibdev, u32 port_num,
    380			       struct ib_port_attribute *unused, char *buf)
    381{
    382	struct ib_port_attr attr;
    383
    384	ssize_t ret;
    385
    386	ret = ib_query_port(ibdev, port_num, &attr);
    387	if (ret)
    388		return ret;
    389
    390	return sysfs_emit(buf, "%u: %s\n", attr.phys_state,
    391			  phys_state_to_str(attr.phys_state));
    392}
    393
    394static ssize_t link_layer_show(struct ib_device *ibdev, u32 port_num,
    395			       struct ib_port_attribute *unused, char *buf)
    396{
    397	const char *output;
    398
    399	switch (rdma_port_get_link_layer(ibdev, port_num)) {
    400	case IB_LINK_LAYER_INFINIBAND:
    401		output = "InfiniBand";
    402		break;
    403	case IB_LINK_LAYER_ETHERNET:
    404		output = "Ethernet";
    405		break;
    406	default:
    407		output = "Unknown";
    408		break;
    409	}
    410
    411	return sysfs_emit(buf, "%s\n", output);
    412}
    413
    414static IB_PORT_ATTR_RO(state);
    415static IB_PORT_ATTR_RO(lid);
    416static IB_PORT_ATTR_RO(lid_mask_count);
    417static IB_PORT_ATTR_RO(sm_lid);
    418static IB_PORT_ATTR_RO(sm_sl);
    419static IB_PORT_ATTR_RO(cap_mask);
    420static IB_PORT_ATTR_RO(rate);
    421static IB_PORT_ATTR_RO(phys_state);
    422static IB_PORT_ATTR_RO(link_layer);
    423
    424static struct attribute *port_default_attrs[] = {
    425	&ib_port_attr_state.attr,
    426	&ib_port_attr_lid.attr,
    427	&ib_port_attr_lid_mask_count.attr,
    428	&ib_port_attr_sm_lid.attr,
    429	&ib_port_attr_sm_sl.attr,
    430	&ib_port_attr_cap_mask.attr,
    431	&ib_port_attr_rate.attr,
    432	&ib_port_attr_phys_state.attr,
    433	&ib_port_attr_link_layer.attr,
    434	NULL
    435};
    436ATTRIBUTE_GROUPS(port_default);
    437
    438static ssize_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf)
    439{
    440	struct net_device *ndev;
    441	int ret = -EINVAL;
    442
    443	rcu_read_lock();
    444	ndev = rcu_dereference(gid_attr->ndev);
    445	if (ndev)
    446		ret = sysfs_emit(buf, "%s\n", ndev->name);
    447	rcu_read_unlock();
    448	return ret;
    449}
    450
    451static ssize_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf)
    452{
    453	return sysfs_emit(buf, "%s\n",
    454			  ib_cache_gid_type_str(gid_attr->gid_type));
    455}
    456
    457static ssize_t _show_port_gid_attr(
    458	struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *attr,
    459	char *buf,
    460	ssize_t (*print)(const struct ib_gid_attr *gid_attr, char *buf))
    461{
    462	struct port_table_attribute *tab_attr =
    463		container_of(attr, struct port_table_attribute, attr);
    464	const struct ib_gid_attr *gid_attr;
    465	ssize_t ret;
    466
    467	gid_attr = rdma_get_gid_attr(ibdev, port_num, tab_attr->index);
    468	if (IS_ERR(gid_attr))
    469		/* -EINVAL is returned for user space compatibility reasons. */
    470		return -EINVAL;
    471
    472	ret = print(gid_attr, buf);
    473	rdma_put_gid_attr(gid_attr);
    474	return ret;
    475}
    476
    477static ssize_t show_port_gid(struct ib_device *ibdev, u32 port_num,
    478			     struct ib_port_attribute *attr, char *buf)
    479{
    480	struct port_table_attribute *tab_attr =
    481		container_of(attr, struct port_table_attribute, attr);
    482	const struct ib_gid_attr *gid_attr;
    483	int len;
    484
    485	gid_attr = rdma_get_gid_attr(ibdev, port_num, tab_attr->index);
    486	if (IS_ERR(gid_attr)) {
    487		const union ib_gid zgid = {};
    488
    489		/* If reading GID fails, it is likely due to GID entry being
    490		 * empty (invalid) or reserved GID in the table.  User space
    491		 * expects to read GID table entries as long as it given index
    492		 * is within GID table size.  Administrative/debugging tool
    493		 * fails to query rest of the GID entries if it hits error
    494		 * while querying a GID of the given index.  To avoid user
    495		 * space throwing such error on fail to read gid, return zero
    496		 * GID as before. This maintains backward compatibility.
    497		 */
    498		return sysfs_emit(buf, "%pI6\n", zgid.raw);
    499	}
    500
    501	len = sysfs_emit(buf, "%pI6\n", gid_attr->gid.raw);
    502	rdma_put_gid_attr(gid_attr);
    503	return len;
    504}
    505
    506static ssize_t show_port_gid_attr_ndev(struct ib_device *ibdev, u32 port_num,
    507				       struct ib_port_attribute *attr,
    508				       char *buf)
    509{
    510	return _show_port_gid_attr(ibdev, port_num, attr, buf, print_ndev);
    511}
    512
    513static ssize_t show_port_gid_attr_gid_type(struct ib_device *ibdev,
    514					   u32 port_num,
    515					   struct ib_port_attribute *attr,
    516					   char *buf)
    517{
    518	return _show_port_gid_attr(ibdev, port_num, attr, buf, print_gid_type);
    519}
    520
    521static ssize_t show_port_pkey(struct ib_device *ibdev, u32 port_num,
    522			      struct ib_port_attribute *attr, char *buf)
    523{
    524	struct port_table_attribute *tab_attr =
    525		container_of(attr, struct port_table_attribute, attr);
    526	u16 pkey;
    527	int ret;
    528
    529	ret = ib_query_pkey(ibdev, port_num, tab_attr->index, &pkey);
    530	if (ret)
    531		return ret;
    532
    533	return sysfs_emit(buf, "0x%04x\n", pkey);
    534}
    535
    536#define PORT_PMA_ATTR(_name, _counter, _width, _offset)			\
    537struct port_table_attribute port_pma_attr_##_name = {			\
    538	.attr  = __ATTR(_name, S_IRUGO, show_pma_counter, NULL),	\
    539	.index = (_offset) | ((_width) << 16) | ((_counter) << 24),	\
    540	.attr_id = IB_PMA_PORT_COUNTERS,				\
    541}
    542
    543#define PORT_PMA_ATTR_EXT(_name, _width, _offset)			\
    544struct port_table_attribute port_pma_attr_ext_##_name = {		\
    545	.attr  = __ATTR(_name, S_IRUGO, show_pma_counter, NULL),	\
    546	.index = (_offset) | ((_width) << 16),				\
    547	.attr_id = IB_PMA_PORT_COUNTERS_EXT,				\
    548}
    549
    550/*
    551 * Get a Perfmgmt MAD block of data.
    552 * Returns error code or the number of bytes retrieved.
    553 */
    554static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
    555		void *data, int offset, size_t size)
    556{
    557	struct ib_mad *in_mad;
    558	struct ib_mad *out_mad;
    559	size_t mad_size = sizeof(*out_mad);
    560	u16 out_mad_pkey_index = 0;
    561	ssize_t ret;
    562
    563	if (!dev->ops.process_mad)
    564		return -ENOSYS;
    565
    566	in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
    567	out_mad = kzalloc(sizeof(*out_mad), GFP_KERNEL);
    568	if (!in_mad || !out_mad) {
    569		ret = -ENOMEM;
    570		goto out;
    571	}
    572
    573	in_mad->mad_hdr.base_version  = 1;
    574	in_mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_PERF_MGMT;
    575	in_mad->mad_hdr.class_version = 1;
    576	in_mad->mad_hdr.method        = IB_MGMT_METHOD_GET;
    577	in_mad->mad_hdr.attr_id       = attr;
    578
    579	if (attr != IB_PMA_CLASS_PORT_INFO)
    580		in_mad->data[41] = port_num;	/* PortSelect field */
    581
    582	if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, port_num, NULL, NULL,
    583				  in_mad, out_mad, &mad_size,
    584				  &out_mad_pkey_index) &
    585	     (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
    586	    (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
    587		ret = -EINVAL;
    588		goto out;
    589	}
    590	memcpy(data, out_mad->data + offset, size);
    591	ret = size;
    592out:
    593	kfree(in_mad);
    594	kfree(out_mad);
    595	return ret;
    596}
    597
    598static ssize_t show_pma_counter(struct ib_device *ibdev, u32 port_num,
    599				struct ib_port_attribute *attr, char *buf)
    600{
    601	struct port_table_attribute *tab_attr =
    602		container_of(attr, struct port_table_attribute, attr);
    603	int offset = tab_attr->index & 0xffff;
    604	int width  = (tab_attr->index >> 16) & 0xff;
    605	int ret;
    606	u8 data[8];
    607	int len;
    608
    609	ret = get_perf_mad(ibdev, port_num, tab_attr->attr_id, &data,
    610			40 + offset / 8, sizeof(data));
    611	if (ret < 0)
    612		return ret;
    613
    614	switch (width) {
    615	case 4:
    616		len = sysfs_emit(buf, "%d\n",
    617				 (*data >> (4 - (offset % 8))) & 0xf);
    618		break;
    619	case 8:
    620		len = sysfs_emit(buf, "%u\n", *data);
    621		break;
    622	case 16:
    623		len = sysfs_emit(buf, "%u\n", be16_to_cpup((__be16 *)data));
    624		break;
    625	case 32:
    626		len = sysfs_emit(buf, "%u\n", be32_to_cpup((__be32 *)data));
    627		break;
    628	case 64:
    629		len = sysfs_emit(buf, "%llu\n", be64_to_cpup((__be64 *)data));
    630		break;
    631	default:
    632		len = 0;
    633		break;
    634	}
    635
    636	return len;
    637}
    638
    639static PORT_PMA_ATTR(symbol_error		    ,  0, 16,  32);
    640static PORT_PMA_ATTR(link_error_recovery	    ,  1,  8,  48);
    641static PORT_PMA_ATTR(link_downed		    ,  2,  8,  56);
    642static PORT_PMA_ATTR(port_rcv_errors		    ,  3, 16,  64);
    643static PORT_PMA_ATTR(port_rcv_remote_physical_errors,  4, 16,  80);
    644static PORT_PMA_ATTR(port_rcv_switch_relay_errors   ,  5, 16,  96);
    645static PORT_PMA_ATTR(port_xmit_discards		    ,  6, 16, 112);
    646static PORT_PMA_ATTR(port_xmit_constraint_errors    ,  7,  8, 128);
    647static PORT_PMA_ATTR(port_rcv_constraint_errors	    ,  8,  8, 136);
    648static PORT_PMA_ATTR(local_link_integrity_errors    ,  9,  4, 152);
    649static PORT_PMA_ATTR(excessive_buffer_overrun_errors, 10,  4, 156);
    650static PORT_PMA_ATTR(VL15_dropped		    , 11, 16, 176);
    651static PORT_PMA_ATTR(port_xmit_data		    , 12, 32, 192);
    652static PORT_PMA_ATTR(port_rcv_data		    , 13, 32, 224);
    653static PORT_PMA_ATTR(port_xmit_packets		    , 14, 32, 256);
    654static PORT_PMA_ATTR(port_rcv_packets		    , 15, 32, 288);
    655static PORT_PMA_ATTR(port_xmit_wait		    ,  0, 32, 320);
    656
    657/*
    658 * Counters added by extended set
    659 */
    660static PORT_PMA_ATTR_EXT(port_xmit_data		    , 64,  64);
    661static PORT_PMA_ATTR_EXT(port_rcv_data		    , 64, 128);
    662static PORT_PMA_ATTR_EXT(port_xmit_packets	    , 64, 192);
    663static PORT_PMA_ATTR_EXT(port_rcv_packets	    , 64, 256);
    664static PORT_PMA_ATTR_EXT(unicast_xmit_packets	    , 64, 320);
    665static PORT_PMA_ATTR_EXT(unicast_rcv_packets	    , 64, 384);
    666static PORT_PMA_ATTR_EXT(multicast_xmit_packets	    , 64, 448);
    667static PORT_PMA_ATTR_EXT(multicast_rcv_packets	    , 64, 512);
    668
    669static struct attribute *pma_attrs[] = {
    670	&port_pma_attr_symbol_error.attr.attr,
    671	&port_pma_attr_link_error_recovery.attr.attr,
    672	&port_pma_attr_link_downed.attr.attr,
    673	&port_pma_attr_port_rcv_errors.attr.attr,
    674	&port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
    675	&port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
    676	&port_pma_attr_port_xmit_discards.attr.attr,
    677	&port_pma_attr_port_xmit_constraint_errors.attr.attr,
    678	&port_pma_attr_port_rcv_constraint_errors.attr.attr,
    679	&port_pma_attr_local_link_integrity_errors.attr.attr,
    680	&port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
    681	&port_pma_attr_VL15_dropped.attr.attr,
    682	&port_pma_attr_port_xmit_data.attr.attr,
    683	&port_pma_attr_port_rcv_data.attr.attr,
    684	&port_pma_attr_port_xmit_packets.attr.attr,
    685	&port_pma_attr_port_rcv_packets.attr.attr,
    686	&port_pma_attr_port_xmit_wait.attr.attr,
    687	NULL
    688};
    689
    690static struct attribute *pma_attrs_ext[] = {
    691	&port_pma_attr_symbol_error.attr.attr,
    692	&port_pma_attr_link_error_recovery.attr.attr,
    693	&port_pma_attr_link_downed.attr.attr,
    694	&port_pma_attr_port_rcv_errors.attr.attr,
    695	&port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
    696	&port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
    697	&port_pma_attr_port_xmit_discards.attr.attr,
    698	&port_pma_attr_port_xmit_constraint_errors.attr.attr,
    699	&port_pma_attr_port_rcv_constraint_errors.attr.attr,
    700	&port_pma_attr_local_link_integrity_errors.attr.attr,
    701	&port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
    702	&port_pma_attr_VL15_dropped.attr.attr,
    703	&port_pma_attr_ext_port_xmit_data.attr.attr,
    704	&port_pma_attr_ext_port_rcv_data.attr.attr,
    705	&port_pma_attr_ext_port_xmit_packets.attr.attr,
    706	&port_pma_attr_port_xmit_wait.attr.attr,
    707	&port_pma_attr_ext_port_rcv_packets.attr.attr,
    708	&port_pma_attr_ext_unicast_rcv_packets.attr.attr,
    709	&port_pma_attr_ext_unicast_xmit_packets.attr.attr,
    710	&port_pma_attr_ext_multicast_rcv_packets.attr.attr,
    711	&port_pma_attr_ext_multicast_xmit_packets.attr.attr,
    712	NULL
    713};
    714
    715static struct attribute *pma_attrs_noietf[] = {
    716	&port_pma_attr_symbol_error.attr.attr,
    717	&port_pma_attr_link_error_recovery.attr.attr,
    718	&port_pma_attr_link_downed.attr.attr,
    719	&port_pma_attr_port_rcv_errors.attr.attr,
    720	&port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
    721	&port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
    722	&port_pma_attr_port_xmit_discards.attr.attr,
    723	&port_pma_attr_port_xmit_constraint_errors.attr.attr,
    724	&port_pma_attr_port_rcv_constraint_errors.attr.attr,
    725	&port_pma_attr_local_link_integrity_errors.attr.attr,
    726	&port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
    727	&port_pma_attr_VL15_dropped.attr.attr,
    728	&port_pma_attr_ext_port_xmit_data.attr.attr,
    729	&port_pma_attr_ext_port_rcv_data.attr.attr,
    730	&port_pma_attr_ext_port_xmit_packets.attr.attr,
    731	&port_pma_attr_ext_port_rcv_packets.attr.attr,
    732	&port_pma_attr_port_xmit_wait.attr.attr,
    733	NULL
    734};
    735
    736static const struct attribute_group pma_group = {
    737	.name  = "counters",
    738	.attrs  = pma_attrs
    739};
    740
    741static const struct attribute_group pma_group_ext = {
    742	.name  = "counters",
    743	.attrs  = pma_attrs_ext
    744};
    745
    746static const struct attribute_group pma_group_noietf = {
    747	.name  = "counters",
    748	.attrs  = pma_attrs_noietf
    749};
    750
    751static void ib_port_release(struct kobject *kobj)
    752{
    753	struct ib_port *port = container_of(kobj, struct ib_port, kobj);
    754	int i;
    755
    756	for (i = 0; i != ARRAY_SIZE(port->groups); i++)
    757		kfree(port->groups[i].attrs);
    758	if (port->hw_stats_data)
    759		rdma_free_hw_stats_struct(port->hw_stats_data->stats);
    760	kfree(port->hw_stats_data);
    761	kvfree(port);
    762}
    763
    764static void ib_port_gid_attr_release(struct kobject *kobj)
    765{
    766	struct gid_attr_group *gid_attr_group =
    767		container_of(kobj, struct gid_attr_group, kobj);
    768	int i;
    769
    770	for (i = 0; i != ARRAY_SIZE(gid_attr_group->groups); i++)
    771		kfree(gid_attr_group->groups[i].attrs);
    772	kfree(gid_attr_group);
    773}
    774
    775static struct kobj_type port_type = {
    776	.release       = ib_port_release,
    777	.sysfs_ops     = &port_sysfs_ops,
    778	.default_groups = port_default_groups,
    779};
    780
    781static struct kobj_type gid_attr_type = {
    782	.sysfs_ops      = &gid_attr_sysfs_ops,
    783	.release        = ib_port_gid_attr_release
    784};
    785
    786/*
    787 * Figure out which counter table to use depending on
    788 * the device capabilities.
    789 */
    790static const struct attribute_group *get_counter_table(struct ib_device *dev,
    791						       int port_num)
    792{
    793	struct ib_class_port_info cpi;
    794
    795	if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO,
    796				&cpi, 40, sizeof(cpi)) >= 0) {
    797		if (cpi.capability_mask & IB_PMA_CLASS_CAP_EXT_WIDTH)
    798			/* We have extended counters */
    799			return &pma_group_ext;
    800
    801		if (cpi.capability_mask & IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF)
    802			/* But not the IETF ones */
    803			return &pma_group_noietf;
    804	}
    805
    806	/* Fall back to normal counters */
    807	return &pma_group;
    808}
    809
    810static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats,
    811			   u32 port_num, int index)
    812{
    813	int ret;
    814
    815	if (time_is_after_eq_jiffies(stats->timestamp + stats->lifespan))
    816		return 0;
    817	ret = dev->ops.get_hw_stats(dev, stats, port_num, index);
    818	if (ret < 0)
    819		return ret;
    820	if (ret == stats->num_counters)
    821		stats->timestamp = jiffies;
    822
    823	return 0;
    824}
    825
    826static int print_hw_stat(struct ib_device *dev, int port_num,
    827			 struct rdma_hw_stats *stats, int index, char *buf)
    828{
    829	u64 v = rdma_counter_get_hwstat_value(dev, port_num, index);
    830
    831	return sysfs_emit(buf, "%llu\n", stats->value[index] + v);
    832}
    833
    834static ssize_t show_hw_stats(struct ib_device *ibdev,
    835			     struct rdma_hw_stats *stats, unsigned int index,
    836			     unsigned int port_num, char *buf)
    837{
    838	int ret;
    839
    840	mutex_lock(&stats->lock);
    841	ret = update_hw_stats(ibdev, stats, port_num, index);
    842	if (ret)
    843		goto unlock;
    844	ret = print_hw_stat(ibdev, port_num, stats, index, buf);
    845unlock:
    846	mutex_unlock(&stats->lock);
    847
    848	return ret;
    849}
    850
    851static ssize_t show_stats_lifespan(struct ib_device *ibdev,
    852				   struct rdma_hw_stats *stats,
    853				   unsigned int index, unsigned int port_num,
    854				   char *buf)
    855{
    856	int msecs;
    857
    858	mutex_lock(&stats->lock);
    859	msecs = jiffies_to_msecs(stats->lifespan);
    860	mutex_unlock(&stats->lock);
    861
    862	return sysfs_emit(buf, "%d\n", msecs);
    863}
    864
    865static ssize_t set_stats_lifespan(struct ib_device *ibdev,
    866				   struct rdma_hw_stats *stats,
    867				   unsigned int index, unsigned int port_num,
    868				   const char *buf, size_t count)
    869{
    870	int msecs;
    871	int jiffies;
    872	int ret;
    873
    874	ret = kstrtoint(buf, 10, &msecs);
    875	if (ret)
    876		return ret;
    877	if (msecs < 0 || msecs > 10000)
    878		return -EINVAL;
    879	jiffies = msecs_to_jiffies(msecs);
    880
    881	mutex_lock(&stats->lock);
    882	stats->lifespan = jiffies;
    883	mutex_unlock(&stats->lock);
    884
    885	return count;
    886}
    887
    888static struct hw_stats_device_data *
    889alloc_hw_stats_device(struct ib_device *ibdev)
    890{
    891	struct hw_stats_device_data *data;
    892	struct rdma_hw_stats *stats;
    893
    894	if (!ibdev->ops.alloc_hw_device_stats)
    895		return ERR_PTR(-EOPNOTSUPP);
    896	stats = ibdev->ops.alloc_hw_device_stats(ibdev);
    897	if (!stats)
    898		return ERR_PTR(-ENOMEM);
    899	if (!stats->descs || stats->num_counters <= 0)
    900		goto err_free_stats;
    901
    902	/*
    903	 * Two extra attribue elements here, one for the lifespan entry and
    904	 * one to NULL terminate the list for the sysfs core code
    905	 */
    906	data = kzalloc(struct_size(data, attrs, stats->num_counters + 1),
    907		       GFP_KERNEL);
    908	if (!data)
    909		goto err_free_stats;
    910	data->group.attrs = kcalloc(stats->num_counters + 2,
    911				    sizeof(*data->group.attrs), GFP_KERNEL);
    912	if (!data->group.attrs)
    913		goto err_free_data;
    914
    915	data->group.name = "hw_counters";
    916	data->stats = stats;
    917	return data;
    918
    919err_free_data:
    920	kfree(data);
    921err_free_stats:
    922	rdma_free_hw_stats_struct(stats);
    923	return ERR_PTR(-ENOMEM);
    924}
    925
    926void ib_device_release_hw_stats(struct hw_stats_device_data *data)
    927{
    928	kfree(data->group.attrs);
    929	rdma_free_hw_stats_struct(data->stats);
    930	kfree(data);
    931}
    932
    933int ib_setup_device_attrs(struct ib_device *ibdev)
    934{
    935	struct hw_stats_device_attribute *attr;
    936	struct hw_stats_device_data *data;
    937	bool opstat_skipped = false;
    938	int i, ret, pos = 0;
    939
    940	data = alloc_hw_stats_device(ibdev);
    941	if (IS_ERR(data)) {
    942		if (PTR_ERR(data) == -EOPNOTSUPP)
    943			return 0;
    944		return PTR_ERR(data);
    945	}
    946	ibdev->hw_stats_data = data;
    947
    948	ret = ibdev->ops.get_hw_stats(ibdev, data->stats, 0,
    949				      data->stats->num_counters);
    950	if (ret != data->stats->num_counters) {
    951		if (WARN_ON(ret >= 0))
    952			return -EINVAL;
    953		return ret;
    954	}
    955
    956	data->stats->timestamp = jiffies;
    957
    958	for (i = 0; i < data->stats->num_counters; i++) {
    959		if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
    960			opstat_skipped = true;
    961			continue;
    962		}
    963
    964		WARN_ON(opstat_skipped);
    965		attr = &data->attrs[pos];
    966		sysfs_attr_init(&attr->attr.attr);
    967		attr->attr.attr.name = data->stats->descs[i].name;
    968		attr->attr.attr.mode = 0444;
    969		attr->attr.show = hw_stat_device_show;
    970		attr->show = show_hw_stats;
    971		data->group.attrs[pos] = &attr->attr.attr;
    972		pos++;
    973	}
    974
    975	attr = &data->attrs[pos];
    976	sysfs_attr_init(&attr->attr.attr);
    977	attr->attr.attr.name = "lifespan";
    978	attr->attr.attr.mode = 0644;
    979	attr->attr.show = hw_stat_device_show;
    980	attr->show = show_stats_lifespan;
    981	attr->attr.store = hw_stat_device_store;
    982	attr->store = set_stats_lifespan;
    983	data->group.attrs[pos] = &attr->attr.attr;
    984	for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++)
    985		if (!ibdev->groups[i]) {
    986			ibdev->groups[i] = &data->group;
    987			return 0;
    988		}
    989	WARN(true, "struct ib_device->groups is too small");
    990	return -EINVAL;
    991}
    992
    993static struct hw_stats_port_data *
    994alloc_hw_stats_port(struct ib_port *port, struct attribute_group *group)
    995{
    996	struct ib_device *ibdev = port->ibdev;
    997	struct hw_stats_port_data *data;
    998	struct rdma_hw_stats *stats;
    999
   1000	if (!ibdev->ops.alloc_hw_port_stats)
   1001		return ERR_PTR(-EOPNOTSUPP);
   1002	stats = ibdev->ops.alloc_hw_port_stats(port->ibdev, port->port_num);
   1003	if (!stats)
   1004		return ERR_PTR(-ENOMEM);
   1005	if (!stats->descs || stats->num_counters <= 0)
   1006		goto err_free_stats;
   1007
   1008	/*
   1009	 * Two extra attribue elements here, one for the lifespan entry and
   1010	 * one to NULL terminate the list for the sysfs core code
   1011	 */
   1012	data = kzalloc(struct_size(data, attrs, stats->num_counters + 1),
   1013		       GFP_KERNEL);
   1014	if (!data)
   1015		goto err_free_stats;
   1016	group->attrs = kcalloc(stats->num_counters + 2,
   1017				    sizeof(*group->attrs), GFP_KERNEL);
   1018	if (!group->attrs)
   1019		goto err_free_data;
   1020
   1021	group->name = "hw_counters";
   1022	data->stats = stats;
   1023	return data;
   1024
   1025err_free_data:
   1026	kfree(data);
   1027err_free_stats:
   1028	rdma_free_hw_stats_struct(stats);
   1029	return ERR_PTR(-ENOMEM);
   1030}
   1031
   1032static int setup_hw_port_stats(struct ib_port *port,
   1033			       struct attribute_group *group)
   1034{
   1035	struct hw_stats_port_attribute *attr;
   1036	struct hw_stats_port_data *data;
   1037	bool opstat_skipped = false;
   1038	int i, ret, pos = 0;
   1039
   1040	data = alloc_hw_stats_port(port, group);
   1041	if (IS_ERR(data))
   1042		return PTR_ERR(data);
   1043
   1044	ret = port->ibdev->ops.get_hw_stats(port->ibdev, data->stats,
   1045					    port->port_num,
   1046					    data->stats->num_counters);
   1047	if (ret != data->stats->num_counters) {
   1048		if (WARN_ON(ret >= 0))
   1049			return -EINVAL;
   1050		return ret;
   1051	}
   1052
   1053	data->stats->timestamp = jiffies;
   1054
   1055	for (i = 0; i < data->stats->num_counters; i++) {
   1056		if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
   1057			opstat_skipped = true;
   1058			continue;
   1059		}
   1060
   1061		WARN_ON(opstat_skipped);
   1062		attr = &data->attrs[pos];
   1063		sysfs_attr_init(&attr->attr.attr);
   1064		attr->attr.attr.name = data->stats->descs[i].name;
   1065		attr->attr.attr.mode = 0444;
   1066		attr->attr.show = hw_stat_port_show;
   1067		attr->show = show_hw_stats;
   1068		group->attrs[pos] = &attr->attr.attr;
   1069		pos++;
   1070	}
   1071
   1072	attr = &data->attrs[pos];
   1073	sysfs_attr_init(&attr->attr.attr);
   1074	attr->attr.attr.name = "lifespan";
   1075	attr->attr.attr.mode = 0644;
   1076	attr->attr.show = hw_stat_port_show;
   1077	attr->show = show_stats_lifespan;
   1078	attr->attr.store = hw_stat_port_store;
   1079	attr->store = set_stats_lifespan;
   1080	group->attrs[pos] = &attr->attr.attr;
   1081
   1082	port->hw_stats_data = data;
   1083	return 0;
   1084}
   1085
   1086struct rdma_hw_stats *ib_get_hw_stats_port(struct ib_device *ibdev,
   1087					   u32 port_num)
   1088{
   1089	if (!ibdev->port_data || !rdma_is_port_valid(ibdev, port_num) ||
   1090	    !ibdev->port_data[port_num].sysfs->hw_stats_data)
   1091		return NULL;
   1092	return ibdev->port_data[port_num].sysfs->hw_stats_data->stats;
   1093}
   1094
   1095static int
   1096alloc_port_table_group(const char *name, struct attribute_group *group,
   1097		       struct port_table_attribute *attrs, size_t num,
   1098		       ssize_t (*show)(struct ib_device *ibdev, u32 port_num,
   1099				       struct ib_port_attribute *, char *buf))
   1100{
   1101	struct attribute **attr_list;
   1102	int i;
   1103
   1104	attr_list = kcalloc(num + 1, sizeof(*attr_list), GFP_KERNEL);
   1105	if (!attr_list)
   1106		return -ENOMEM;
   1107
   1108	for (i = 0; i < num; i++) {
   1109		struct port_table_attribute *element = &attrs[i];
   1110
   1111		if (snprintf(element->name, sizeof(element->name), "%d", i) >=
   1112		    sizeof(element->name))
   1113			goto err;
   1114
   1115		sysfs_attr_init(&element->attr.attr);
   1116		element->attr.attr.name = element->name;
   1117		element->attr.attr.mode = 0444;
   1118		element->attr.show = show;
   1119		element->index = i;
   1120
   1121		attr_list[i] = &element->attr.attr;
   1122	}
   1123	group->name = name;
   1124	group->attrs = attr_list;
   1125	return 0;
   1126err:
   1127	kfree(attr_list);
   1128	return -EINVAL;
   1129}
   1130
   1131/*
   1132 * Create the sysfs:
   1133 *  ibp0s9/ports/XX/gid_attrs/{ndevs,types}/YYY
   1134 * YYY is the gid table index in decimal
   1135 */
   1136static int setup_gid_attrs(struct ib_port *port,
   1137			   const struct ib_port_attr *attr)
   1138{
   1139	struct gid_attr_group *gid_attr_group;
   1140	int ret;
   1141
   1142	gid_attr_group = kzalloc(struct_size(gid_attr_group, attrs_list,
   1143					     attr->gid_tbl_len * 2),
   1144				 GFP_KERNEL);
   1145	if (!gid_attr_group)
   1146		return -ENOMEM;
   1147	gid_attr_group->port = port;
   1148	kobject_init(&gid_attr_group->kobj, &gid_attr_type);
   1149
   1150	ret = alloc_port_table_group("ndevs", &gid_attr_group->groups[0],
   1151				     gid_attr_group->attrs_list,
   1152				     attr->gid_tbl_len,
   1153				     show_port_gid_attr_ndev);
   1154	if (ret)
   1155		goto err_put;
   1156	gid_attr_group->groups_list[0] = &gid_attr_group->groups[0];
   1157
   1158	ret = alloc_port_table_group(
   1159		"types", &gid_attr_group->groups[1],
   1160		gid_attr_group->attrs_list + attr->gid_tbl_len,
   1161		attr->gid_tbl_len, show_port_gid_attr_gid_type);
   1162	if (ret)
   1163		goto err_put;
   1164	gid_attr_group->groups_list[1] = &gid_attr_group->groups[1];
   1165
   1166	ret = kobject_add(&gid_attr_group->kobj, &port->kobj, "gid_attrs");
   1167	if (ret)
   1168		goto err_put;
   1169	ret = sysfs_create_groups(&gid_attr_group->kobj,
   1170				  gid_attr_group->groups_list);
   1171	if (ret)
   1172		goto err_del;
   1173	port->gid_attr_group = gid_attr_group;
   1174	return 0;
   1175
   1176err_del:
   1177	kobject_del(&gid_attr_group->kobj);
   1178err_put:
   1179	kobject_put(&gid_attr_group->kobj);
   1180	return ret;
   1181}
   1182
   1183static void destroy_gid_attrs(struct ib_port *port)
   1184{
   1185	struct gid_attr_group *gid_attr_group = port->gid_attr_group;
   1186
   1187	if (!gid_attr_group)
   1188		return;
   1189	sysfs_remove_groups(&gid_attr_group->kobj, gid_attr_group->groups_list);
   1190	kobject_del(&gid_attr_group->kobj);
   1191	kobject_put(&gid_attr_group->kobj);
   1192}
   1193
   1194/*
   1195 * Create the sysfs:
   1196 *  ibp0s9/ports/XX/{gids,pkeys,counters}/YYY
   1197 */
   1198static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num,
   1199				  const struct ib_port_attr *attr)
   1200{
   1201	struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
   1202	bool is_full_dev = &device->coredev == coredev;
   1203	const struct attribute_group **cur_group;
   1204	struct ib_port *p;
   1205	int ret;
   1206
   1207	p = kvzalloc(struct_size(p, attrs_list,
   1208				attr->gid_tbl_len + attr->pkey_tbl_len),
   1209		    GFP_KERNEL);
   1210	if (!p)
   1211		return ERR_PTR(-ENOMEM);
   1212	p->ibdev = device;
   1213	p->port_num = port_num;
   1214	kobject_init(&p->kobj, &port_type);
   1215
   1216	cur_group = p->groups_list;
   1217	ret = alloc_port_table_group("gids", &p->groups[0], p->attrs_list,
   1218				     attr->gid_tbl_len, show_port_gid);
   1219	if (ret)
   1220		goto err_put;
   1221	*cur_group++ = &p->groups[0];
   1222
   1223	if (attr->pkey_tbl_len) {
   1224		ret = alloc_port_table_group("pkeys", &p->groups[1],
   1225					     p->attrs_list + attr->gid_tbl_len,
   1226					     attr->pkey_tbl_len, show_port_pkey);
   1227		if (ret)
   1228			goto err_put;
   1229		*cur_group++ = &p->groups[1];
   1230	}
   1231
   1232	/*
   1233	 * If port == 0, it means hw_counters are per device and not per
   1234	 * port, so holder should be device. Therefore skip per port
   1235	 * counter initialization.
   1236	 */
   1237	if (port_num && is_full_dev) {
   1238		ret = setup_hw_port_stats(p, &p->groups[2]);
   1239		if (ret && ret != -EOPNOTSUPP)
   1240			goto err_put;
   1241		if (!ret)
   1242			*cur_group++ = &p->groups[2];
   1243	}
   1244
   1245	if (device->ops.process_mad && is_full_dev)
   1246		*cur_group++ = get_counter_table(device, port_num);
   1247
   1248	ret = kobject_add(&p->kobj, coredev->ports_kobj, "%d", port_num);
   1249	if (ret)
   1250		goto err_put;
   1251	ret = sysfs_create_groups(&p->kobj, p->groups_list);
   1252	if (ret)
   1253		goto err_del;
   1254	if (is_full_dev) {
   1255		ret = sysfs_create_groups(&p->kobj, device->ops.port_groups);
   1256		if (ret)
   1257			goto err_groups;
   1258	}
   1259
   1260	list_add_tail(&p->kobj.entry, &coredev->port_list);
   1261	if (device->port_data && is_full_dev)
   1262		device->port_data[port_num].sysfs = p;
   1263
   1264	return p;
   1265
   1266err_groups:
   1267	sysfs_remove_groups(&p->kobj, p->groups_list);
   1268err_del:
   1269	kobject_del(&p->kobj);
   1270err_put:
   1271	kobject_put(&p->kobj);
   1272	return ERR_PTR(ret);
   1273}
   1274
   1275static void destroy_port(struct ib_core_device *coredev, struct ib_port *port)
   1276{
   1277	bool is_full_dev = &port->ibdev->coredev == coredev;
   1278
   1279	if (port->ibdev->port_data &&
   1280	    port->ibdev->port_data[port->port_num].sysfs == port)
   1281		port->ibdev->port_data[port->port_num].sysfs = NULL;
   1282	list_del(&port->kobj.entry);
   1283	if (is_full_dev)
   1284		sysfs_remove_groups(&port->kobj, port->ibdev->ops.port_groups);
   1285	sysfs_remove_groups(&port->kobj, port->groups_list);
   1286	kobject_del(&port->kobj);
   1287	kobject_put(&port->kobj);
   1288}
   1289
   1290static const char *node_type_string(int node_type)
   1291{
   1292	switch (node_type) {
   1293	case RDMA_NODE_IB_CA:
   1294		return "CA";
   1295	case RDMA_NODE_IB_SWITCH:
   1296		return "switch";
   1297	case RDMA_NODE_IB_ROUTER:
   1298		return "router";
   1299	case RDMA_NODE_RNIC:
   1300		return "RNIC";
   1301	case RDMA_NODE_USNIC:
   1302		return "usNIC";
   1303	case RDMA_NODE_USNIC_UDP:
   1304		return "usNIC UDP";
   1305	case RDMA_NODE_UNSPECIFIED:
   1306		return "unspecified";
   1307	}
   1308	return "<unknown>";
   1309}
   1310
   1311static ssize_t node_type_show(struct device *device,
   1312			      struct device_attribute *attr, char *buf)
   1313{
   1314	struct ib_device *dev = rdma_device_to_ibdev(device);
   1315
   1316	return sysfs_emit(buf, "%u: %s\n", dev->node_type,
   1317			  node_type_string(dev->node_type));
   1318}
   1319static DEVICE_ATTR_RO(node_type);
   1320
   1321static ssize_t sys_image_guid_show(struct device *device,
   1322				   struct device_attribute *dev_attr, char *buf)
   1323{
   1324	struct ib_device *dev = rdma_device_to_ibdev(device);
   1325	__be16 *guid = (__be16 *)&dev->attrs.sys_image_guid;
   1326
   1327	return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n",
   1328			  be16_to_cpu(guid[0]),
   1329			  be16_to_cpu(guid[1]),
   1330			  be16_to_cpu(guid[2]),
   1331			  be16_to_cpu(guid[3]));
   1332}
   1333static DEVICE_ATTR_RO(sys_image_guid);
   1334
   1335static ssize_t node_guid_show(struct device *device,
   1336			      struct device_attribute *attr, char *buf)
   1337{
   1338	struct ib_device *dev = rdma_device_to_ibdev(device);
   1339	__be16 *node_guid = (__be16 *)&dev->node_guid;
   1340
   1341	return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n",
   1342			  be16_to_cpu(node_guid[0]),
   1343			  be16_to_cpu(node_guid[1]),
   1344			  be16_to_cpu(node_guid[2]),
   1345			  be16_to_cpu(node_guid[3]));
   1346}
   1347static DEVICE_ATTR_RO(node_guid);
   1348
   1349static ssize_t node_desc_show(struct device *device,
   1350			      struct device_attribute *attr, char *buf)
   1351{
   1352	struct ib_device *dev = rdma_device_to_ibdev(device);
   1353
   1354	return sysfs_emit(buf, "%.64s\n", dev->node_desc);
   1355}
   1356
   1357static ssize_t node_desc_store(struct device *device,
   1358			       struct device_attribute *attr,
   1359			       const char *buf, size_t count)
   1360{
   1361	struct ib_device *dev = rdma_device_to_ibdev(device);
   1362	struct ib_device_modify desc = {};
   1363	int ret;
   1364
   1365	if (!dev->ops.modify_device)
   1366		return -EOPNOTSUPP;
   1367
   1368	memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX));
   1369	ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc);
   1370	if (ret)
   1371		return ret;
   1372
   1373	return count;
   1374}
   1375static DEVICE_ATTR_RW(node_desc);
   1376
   1377static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr,
   1378			   char *buf)
   1379{
   1380	struct ib_device *dev = rdma_device_to_ibdev(device);
   1381	char version[IB_FW_VERSION_NAME_MAX] = {};
   1382
   1383	ib_get_device_fw_str(dev, version);
   1384
   1385	return sysfs_emit(buf, "%s\n", version);
   1386}
   1387static DEVICE_ATTR_RO(fw_ver);
   1388
   1389static struct attribute *ib_dev_attrs[] = {
   1390	&dev_attr_node_type.attr,
   1391	&dev_attr_node_guid.attr,
   1392	&dev_attr_sys_image_guid.attr,
   1393	&dev_attr_fw_ver.attr,
   1394	&dev_attr_node_desc.attr,
   1395	NULL,
   1396};
   1397
   1398const struct attribute_group ib_dev_attr_group = {
   1399	.attrs = ib_dev_attrs,
   1400};
   1401
   1402void ib_free_port_attrs(struct ib_core_device *coredev)
   1403{
   1404	struct kobject *p, *t;
   1405
   1406	list_for_each_entry_safe(p, t, &coredev->port_list, entry) {
   1407		struct ib_port *port = container_of(p, struct ib_port, kobj);
   1408
   1409		destroy_gid_attrs(port);
   1410		destroy_port(coredev, port);
   1411	}
   1412
   1413	kobject_put(coredev->ports_kobj);
   1414}
   1415
   1416int ib_setup_port_attrs(struct ib_core_device *coredev)
   1417{
   1418	struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
   1419	u32 port_num;
   1420	int ret;
   1421
   1422	coredev->ports_kobj = kobject_create_and_add("ports",
   1423						     &coredev->dev.kobj);
   1424	if (!coredev->ports_kobj)
   1425		return -ENOMEM;
   1426
   1427	rdma_for_each_port (device, port_num) {
   1428		struct ib_port_attr attr;
   1429		struct ib_port *port;
   1430
   1431		ret = ib_query_port(device, port_num, &attr);
   1432		if (ret)
   1433			goto err_put;
   1434
   1435		port = setup_port(coredev, port_num, &attr);
   1436		if (IS_ERR(port)) {
   1437			ret = PTR_ERR(port);
   1438			goto err_put;
   1439		}
   1440
   1441		ret = setup_gid_attrs(port, &attr);
   1442		if (ret)
   1443			goto err_put;
   1444	}
   1445	return 0;
   1446
   1447err_put:
   1448	ib_free_port_attrs(coredev);
   1449	return ret;
   1450}
   1451
   1452/**
   1453 * ib_port_register_client_groups - Add an ib_client's attributes to the port
   1454 *
   1455 * @ibdev: IB device to add counters
   1456 * @port_num: valid port number
   1457 * @groups: Group list of attributes
   1458 *
   1459 * Do not use. Only for legacy sysfs compatibility.
   1460 */
   1461int ib_port_register_client_groups(struct ib_device *ibdev, u32 port_num,
   1462				   const struct attribute_group **groups)
   1463{
   1464	return sysfs_create_groups(&ibdev->port_data[port_num].sysfs->kobj,
   1465				   groups);
   1466}
   1467EXPORT_SYMBOL(ib_port_register_client_groups);
   1468
   1469void ib_port_unregister_client_groups(struct ib_device *ibdev, u32 port_num,
   1470				      const struct attribute_group **groups)
   1471{
   1472	return sysfs_remove_groups(&ibdev->port_data[port_num].sysfs->kobj,
   1473				   groups);
   1474}
   1475EXPORT_SYMBOL(ib_port_unregister_client_groups);