cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

mad.c (140770B)


      1// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
      2/*
      3 * Copyright(c) 2015-2018 Intel Corporation.
      4 */
      5
      6#include <linux/net.h>
      7#include <rdma/opa_addr.h>
      8#define OPA_NUM_PKEY_BLOCKS_PER_SMP (OPA_SMP_DR_DATA_SIZE \
      9			/ (OPA_PARTITION_TABLE_BLK_SIZE * sizeof(u16)))
     10
     11#include "hfi.h"
     12#include "mad.h"
     13#include "trace.h"
     14#include "qp.h"
     15#include "vnic.h"
     16
     17/* the reset value from the FM is supposed to be 0xffff, handle both */
     18#define OPA_LINK_WIDTH_RESET_OLD 0x0fff
     19#define OPA_LINK_WIDTH_RESET 0xffff
     20
     21struct trap_node {
     22	struct list_head list;
     23	struct opa_mad_notice_attr data;
     24	__be64 tid;
     25	int len;
     26	u32 retry;
     27	u8 in_use;
     28	u8 repress;
     29};
     30
     31static int smp_length_check(u32 data_size, u32 request_len)
     32{
     33	if (unlikely(request_len < data_size))
     34		return -EINVAL;
     35
     36	return 0;
     37}
     38
     39static int reply(struct ib_mad_hdr *smp)
     40{
     41	/*
     42	 * The verbs framework will handle the directed/LID route
     43	 * packet changes.
     44	 */
     45	smp->method = IB_MGMT_METHOD_GET_RESP;
     46	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
     47		smp->status |= IB_SMP_DIRECTION;
     48	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
     49}
     50
     51static inline void clear_opa_smp_data(struct opa_smp *smp)
     52{
     53	void *data = opa_get_smp_data(smp);
     54	size_t size = opa_get_smp_data_size(smp);
     55
     56	memset(data, 0, size);
     57}
     58
     59static u16 hfi1_lookup_pkey_value(struct hfi1_ibport *ibp, int pkey_idx)
     60{
     61	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
     62
     63	if (pkey_idx < ARRAY_SIZE(ppd->pkeys))
     64		return ppd->pkeys[pkey_idx];
     65
     66	return 0;
     67}
     68
     69void hfi1_event_pkey_change(struct hfi1_devdata *dd, u32 port)
     70{
     71	struct ib_event event;
     72
     73	event.event = IB_EVENT_PKEY_CHANGE;
     74	event.device = &dd->verbs_dev.rdi.ibdev;
     75	event.element.port_num = port;
     76	ib_dispatch_event(&event);
     77}
     78
     79/*
     80 * If the port is down, clean up all pending traps.  We need to be careful
     81 * with the given trap, because it may be queued.
     82 */
     83static void cleanup_traps(struct hfi1_ibport *ibp, struct trap_node *trap)
     84{
     85	struct trap_node *node, *q;
     86	unsigned long flags;
     87	struct list_head trap_list;
     88	int i;
     89
     90	for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) {
     91		spin_lock_irqsave(&ibp->rvp.lock, flags);
     92		list_replace_init(&ibp->rvp.trap_lists[i].list, &trap_list);
     93		ibp->rvp.trap_lists[i].list_len = 0;
     94		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
     95
     96		/*
     97		 * Remove all items from the list, freeing all the non-given
     98		 * traps.
     99		 */
    100		list_for_each_entry_safe(node, q, &trap_list, list) {
    101			list_del(&node->list);
    102			if (node != trap)
    103				kfree(node);
    104		}
    105	}
    106
    107	/*
    108	 * If this wasn't on one of the lists it would not be freed.  If it
    109	 * was on the list, it is now safe to free.
    110	 */
    111	kfree(trap);
    112}
    113
    114static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp,
    115					    struct trap_node *trap)
    116{
    117	struct trap_node *node;
    118	struct trap_list *trap_list;
    119	unsigned long flags;
    120	unsigned long timeout;
    121	int found = 0;
    122	unsigned int queue_id;
    123	static int trap_count;
    124
    125	queue_id = trap->data.generic_type & 0x0F;
    126	if (queue_id >= RVT_MAX_TRAP_LISTS) {
    127		trap_count++;
    128		pr_err_ratelimited("hfi1: Invalid trap 0x%0x dropped. Total dropped: %d\n",
    129				   trap->data.generic_type, trap_count);
    130		kfree(trap);
    131		return NULL;
    132	}
    133
    134	/*
    135	 * Since the retry (handle timeout) does not remove a trap request
    136	 * from the list, all we have to do is compare the node.
    137	 */
    138	spin_lock_irqsave(&ibp->rvp.lock, flags);
    139	trap_list = &ibp->rvp.trap_lists[queue_id];
    140
    141	list_for_each_entry(node, &trap_list->list, list) {
    142		if (node == trap) {
    143			node->retry++;
    144			found = 1;
    145			break;
    146		}
    147	}
    148
    149	/* If it is not on the list, add it, limited to RVT-MAX_TRAP_LEN. */
    150	if (!found) {
    151		if (trap_list->list_len < RVT_MAX_TRAP_LEN) {
    152			trap_list->list_len++;
    153			list_add_tail(&trap->list, &trap_list->list);
    154		} else {
    155			pr_warn_ratelimited("hfi1: Maximum trap limit reached for 0x%0x traps\n",
    156					    trap->data.generic_type);
    157			kfree(trap);
    158		}
    159	}
    160
    161	/*
    162	 * Next check to see if there is a timer pending.  If not, set it up
    163	 * and get the first trap from the list.
    164	 */
    165	node = NULL;
    166	if (!timer_pending(&ibp->rvp.trap_timer)) {
    167		/*
    168		 * o14-2
    169		 * If the time out is set we have to wait until it expires
    170		 * before the trap can be sent.
    171		 * This should be > RVT_TRAP_TIMEOUT
    172		 */
    173		timeout = (RVT_TRAP_TIMEOUT *
    174			   (1UL << ibp->rvp.subnet_timeout)) / 1000;
    175		mod_timer(&ibp->rvp.trap_timer,
    176			  jiffies + usecs_to_jiffies(timeout));
    177		node = list_first_entry(&trap_list->list, struct trap_node,
    178					list);
    179		node->in_use = 1;
    180	}
    181	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
    182
    183	return node;
    184}
    185
    186static void subn_handle_opa_trap_repress(struct hfi1_ibport *ibp,
    187					 struct opa_smp *smp)
    188{
    189	struct trap_list *trap_list;
    190	struct trap_node *trap;
    191	unsigned long flags;
    192	int i;
    193
    194	if (smp->attr_id != IB_SMP_ATTR_NOTICE)
    195		return;
    196
    197	spin_lock_irqsave(&ibp->rvp.lock, flags);
    198	for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) {
    199		trap_list = &ibp->rvp.trap_lists[i];
    200		trap = list_first_entry_or_null(&trap_list->list,
    201						struct trap_node, list);
    202		if (trap && trap->tid == smp->tid) {
    203			if (trap->in_use) {
    204				trap->repress = 1;
    205			} else {
    206				trap_list->list_len--;
    207				list_del(&trap->list);
    208				kfree(trap);
    209			}
    210			break;
    211		}
    212	}
    213	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
    214}
    215
    216static void hfi1_update_sm_ah_attr(struct hfi1_ibport *ibp,
    217				   struct rdma_ah_attr *attr, u32 dlid)
    218{
    219	rdma_ah_set_dlid(attr, dlid);
    220	rdma_ah_set_port_num(attr, ppd_from_ibp(ibp)->port);
    221	if (dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
    222		struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
    223
    224		rdma_ah_set_ah_flags(attr, IB_AH_GRH);
    225		grh->sgid_index = 0;
    226		grh->hop_limit = 1;
    227		grh->dgid.global.subnet_prefix =
    228			ibp->rvp.gid_prefix;
    229		grh->dgid.global.interface_id = OPA_MAKE_ID(dlid);
    230	}
    231}
    232
    233static int hfi1_modify_qp0_ah(struct hfi1_ibport *ibp,
    234			      struct rvt_ah *ah, u32 dlid)
    235{
    236	struct rdma_ah_attr attr;
    237	struct rvt_qp *qp0;
    238	int ret = -EINVAL;
    239
    240	memset(&attr, 0, sizeof(attr));
    241	attr.type = ah->ibah.type;
    242	hfi1_update_sm_ah_attr(ibp, &attr, dlid);
    243	rcu_read_lock();
    244	qp0 = rcu_dereference(ibp->rvp.qp[0]);
    245	if (qp0)
    246		ret = rdma_modify_ah(&ah->ibah, &attr);
    247	rcu_read_unlock();
    248	return ret;
    249}
    250
    251static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid)
    252{
    253	struct rdma_ah_attr attr;
    254	struct ib_ah *ah = ERR_PTR(-EINVAL);
    255	struct rvt_qp *qp0;
    256	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
    257	struct hfi1_devdata *dd = dd_from_ppd(ppd);
    258	u32 port_num = ppd->port;
    259
    260	memset(&attr, 0, sizeof(attr));
    261	attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
    262	hfi1_update_sm_ah_attr(ibp, &attr, dlid);
    263	rcu_read_lock();
    264	qp0 = rcu_dereference(ibp->rvp.qp[0]);
    265	if (qp0)
    266		ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0);
    267	rcu_read_unlock();
    268	return ah;
    269}
    270
    271static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap)
    272{
    273	struct ib_mad_send_buf *send_buf;
    274	struct ib_mad_agent *agent;
    275	struct opa_smp *smp;
    276	unsigned long flags;
    277	int pkey_idx;
    278	u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
    279
    280	agent = ibp->rvp.send_agent;
    281	if (!agent) {
    282		cleanup_traps(ibp, trap);
    283		return;
    284	}
    285
    286	/* o14-3.2.1 */
    287	if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) {
    288		cleanup_traps(ibp, trap);
    289		return;
    290	}
    291
    292	/* Add the trap to the list if necessary and see if we can send it */
    293	trap = check_and_add_trap(ibp, trap);
    294	if (!trap)
    295		return;
    296
    297	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
    298	if (pkey_idx < 0) {
    299		pr_warn("%s: failed to find limited mgmt pkey, defaulting 0x%x\n",
    300			__func__, hfi1_get_pkey(ibp, 1));
    301		pkey_idx = 1;
    302	}
    303
    304	send_buf = ib_create_send_mad(agent, qpn, pkey_idx, 0,
    305				      IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
    306				      GFP_ATOMIC, IB_MGMT_BASE_VERSION);
    307	if (IS_ERR(send_buf))
    308		return;
    309
    310	smp = send_buf->mad;
    311	smp->base_version = OPA_MGMT_BASE_VERSION;
    312	smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
    313	smp->class_version = OPA_SM_CLASS_VERSION;
    314	smp->method = IB_MGMT_METHOD_TRAP;
    315
    316	/* Only update the transaction ID for new traps (o13-5). */
    317	if (trap->tid == 0) {
    318		ibp->rvp.tid++;
    319		/* make sure that tid != 0 */
    320		if (ibp->rvp.tid == 0)
    321			ibp->rvp.tid++;
    322		trap->tid = cpu_to_be64(ibp->rvp.tid);
    323	}
    324	smp->tid = trap->tid;
    325
    326	smp->attr_id = IB_SMP_ATTR_NOTICE;
    327	/* o14-1: smp->mkey = 0; */
    328
    329	memcpy(smp->route.lid.data, &trap->data, trap->len);
    330
    331	spin_lock_irqsave(&ibp->rvp.lock, flags);
    332	if (!ibp->rvp.sm_ah) {
    333		if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
    334			struct ib_ah *ah;
    335
    336			ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
    337			if (IS_ERR(ah)) {
    338				spin_unlock_irqrestore(&ibp->rvp.lock, flags);
    339				return;
    340			}
    341			send_buf->ah = ah;
    342			ibp->rvp.sm_ah = ibah_to_rvtah(ah);
    343		} else {
    344			spin_unlock_irqrestore(&ibp->rvp.lock, flags);
    345			return;
    346		}
    347	} else {
    348		send_buf->ah = &ibp->rvp.sm_ah->ibah;
    349	}
    350
    351	/*
    352	 * If the trap was repressed while things were getting set up, don't
    353	 * bother sending it. This could happen for a retry.
    354	 */
    355	if (trap->repress) {
    356		list_del(&trap->list);
    357		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
    358		kfree(trap);
    359		ib_free_send_mad(send_buf);
    360		return;
    361	}
    362
    363	trap->in_use = 0;
    364	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
    365
    366	if (ib_post_send_mad(send_buf, NULL))
    367		ib_free_send_mad(send_buf);
    368}
    369
    370void hfi1_handle_trap_timer(struct timer_list *t)
    371{
    372	struct hfi1_ibport *ibp = from_timer(ibp, t, rvp.trap_timer);
    373	struct trap_node *trap = NULL;
    374	unsigned long flags;
    375	int i;
    376
    377	/* Find the trap with the highest priority */
    378	spin_lock_irqsave(&ibp->rvp.lock, flags);
    379	for (i = 0; !trap && i < RVT_MAX_TRAP_LISTS; i++) {
    380		trap = list_first_entry_or_null(&ibp->rvp.trap_lists[i].list,
    381						struct trap_node, list);
    382	}
    383	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
    384
    385	if (trap)
    386		send_trap(ibp, trap);
    387}
    388
    389static struct trap_node *create_trap_node(u8 type, __be16 trap_num, u32 lid)
    390{
    391	struct trap_node *trap;
    392
    393	trap = kzalloc(sizeof(*trap), GFP_ATOMIC);
    394	if (!trap)
    395		return NULL;
    396
    397	INIT_LIST_HEAD(&trap->list);
    398	trap->data.generic_type = type;
    399	trap->data.prod_type_lsb = IB_NOTICE_PROD_CA;
    400	trap->data.trap_num = trap_num;
    401	trap->data.issuer_lid = cpu_to_be32(lid);
    402
    403	return trap;
    404}
    405
    406/*
    407 * Send a bad P_Key trap (ch. 14.3.8).
    408 */
    409void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl,
    410		   u32 qp1, u32 qp2, u32 lid1, u32 lid2)
    411{
    412	struct trap_node *trap;
    413	u32 lid = ppd_from_ibp(ibp)->lid;
    414
    415	ibp->rvp.n_pkt_drops++;
    416	ibp->rvp.pkey_violations++;
    417
    418	trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_P_KEY,
    419				lid);
    420	if (!trap)
    421		return;
    422
    423	/* Send violation trap */
    424	trap->data.ntc_257_258.lid1 = cpu_to_be32(lid1);
    425	trap->data.ntc_257_258.lid2 = cpu_to_be32(lid2);
    426	trap->data.ntc_257_258.key = cpu_to_be32(key);
    427	trap->data.ntc_257_258.sl = sl << 3;
    428	trap->data.ntc_257_258.qp1 = cpu_to_be32(qp1);
    429	trap->data.ntc_257_258.qp2 = cpu_to_be32(qp2);
    430
    431	trap->len = sizeof(trap->data);
    432	send_trap(ibp, trap);
    433}
    434
    435/*
    436 * Send a bad M_Key trap (ch. 14.3.9).
    437 */
    438static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
    439		     __be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt)
    440{
    441	struct trap_node *trap;
    442	u32 lid = ppd_from_ibp(ibp)->lid;
    443
    444	trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_M_KEY,
    445				lid);
    446	if (!trap)
    447		return;
    448
    449	/* Send violation trap */
    450	trap->data.ntc_256.lid = trap->data.issuer_lid;
    451	trap->data.ntc_256.method = mad->method;
    452	trap->data.ntc_256.attr_id = mad->attr_id;
    453	trap->data.ntc_256.attr_mod = mad->attr_mod;
    454	trap->data.ntc_256.mkey = mkey;
    455	if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
    456		trap->data.ntc_256.dr_slid = dr_slid;
    457		trap->data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
    458		if (hop_cnt > ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path)) {
    459			trap->data.ntc_256.dr_trunc_hop |=
    460				IB_NOTICE_TRAP_DR_TRUNC;
    461			hop_cnt = ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path);
    462		}
    463		trap->data.ntc_256.dr_trunc_hop |= hop_cnt;
    464		memcpy(trap->data.ntc_256.dr_rtn_path, return_path,
    465		       hop_cnt);
    466	}
    467
    468	trap->len = sizeof(trap->data);
    469
    470	send_trap(ibp, trap);
    471}
    472
    473/*
    474 * Send a Port Capability Mask Changed trap (ch. 14.3.11).
    475 */
    476void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u32 port_num)
    477{
    478	struct trap_node *trap;
    479	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
    480	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
    481	struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
    482	u32 lid = ppd_from_ibp(ibp)->lid;
    483
    484	trap = create_trap_node(IB_NOTICE_TYPE_INFO,
    485				OPA_TRAP_CHANGE_CAPABILITY,
    486				lid);
    487	if (!trap)
    488		return;
    489
    490	trap->data.ntc_144.lid = trap->data.issuer_lid;
    491	trap->data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
    492	trap->data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags);
    493
    494	trap->len = sizeof(trap->data);
    495	send_trap(ibp, trap);
    496}
    497
    498/*
    499 * Send a System Image GUID Changed trap (ch. 14.3.12).
    500 */
    501void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
    502{
    503	struct trap_node *trap;
    504	u32 lid = ppd_from_ibp(ibp)->lid;
    505
    506	trap = create_trap_node(IB_NOTICE_TYPE_INFO, OPA_TRAP_CHANGE_SYSGUID,
    507				lid);
    508	if (!trap)
    509		return;
    510
    511	trap->data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
    512	trap->data.ntc_145.lid = trap->data.issuer_lid;
    513
    514	trap->len = sizeof(trap->data);
    515	send_trap(ibp, trap);
    516}
    517
    518/*
    519 * Send a Node Description Changed trap (ch. 14.3.13).
    520 */
    521void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
    522{
    523	struct trap_node *trap;
    524	u32 lid = ppd_from_ibp(ibp)->lid;
    525
    526	trap = create_trap_node(IB_NOTICE_TYPE_INFO,
    527				OPA_TRAP_CHANGE_CAPABILITY,
    528				lid);
    529	if (!trap)
    530		return;
    531
    532	trap->data.ntc_144.lid = trap->data.issuer_lid;
    533	trap->data.ntc_144.change_flags =
    534		cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG);
    535
    536	trap->len = sizeof(trap->data);
    537	send_trap(ibp, trap);
    538}
    539
    540static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
    541				   u8 *data, struct ib_device *ibdev,
    542				   u32 port, u32 *resp_len, u32 max_len)
    543{
    544	struct opa_node_description *nd;
    545
    546	if (am || smp_length_check(sizeof(*nd), max_len)) {
    547		smp->status |= IB_SMP_INVALID_FIELD;
    548		return reply((struct ib_mad_hdr *)smp);
    549	}
    550
    551	nd = (struct opa_node_description *)data;
    552
    553	memcpy(nd->data, ibdev->node_desc, sizeof(nd->data));
    554
    555	if (resp_len)
    556		*resp_len += sizeof(*nd);
    557
    558	return reply((struct ib_mad_hdr *)smp);
    559}
    560
    561static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
    562				   struct ib_device *ibdev, u32 port,
    563				   u32 *resp_len, u32 max_len)
    564{
    565	struct opa_node_info *ni;
    566	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
    567	u32 pidx = port - 1; /* IB number port from 1, hw from 0 */
    568
    569	ni = (struct opa_node_info *)data;
    570
    571	/* GUID 0 is illegal */
    572	if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
    573	    smp_length_check(sizeof(*ni), max_len) ||
    574	    get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
    575		smp->status |= IB_SMP_INVALID_FIELD;
    576		return reply((struct ib_mad_hdr *)smp);
    577	}
    578
    579	ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
    580	ni->base_version = OPA_MGMT_BASE_VERSION;
    581	ni->class_version = OPA_SM_CLASS_VERSION;
    582	ni->node_type = 1;     /* channel adapter */
    583	ni->num_ports = ibdev->phys_port_cnt;
    584	/* This is already in network order */
    585	ni->system_image_guid = ib_hfi1_sys_image_guid;
    586	ni->node_guid = ibdev->node_guid;
    587	ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
    588	ni->device_id = cpu_to_be16(dd->pcidev->device);
    589	ni->revision = cpu_to_be32(dd->minrev);
    590	ni->local_port_num = port;
    591	ni->vendor_id[0] = dd->oui1;
    592	ni->vendor_id[1] = dd->oui2;
    593	ni->vendor_id[2] = dd->oui3;
    594
    595	if (resp_len)
    596		*resp_len += sizeof(*ni);
    597
    598	return reply((struct ib_mad_hdr *)smp);
    599}
    600
    601static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
    602			     u32 port)
    603{
    604	struct ib_node_info *nip = (struct ib_node_info *)&smp->data;
    605	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
    606	u32 pidx = port - 1; /* IB number port from 1, hw from 0 */
    607
    608	/* GUID 0 is illegal */
    609	if (smp->attr_mod || pidx >= dd->num_pports ||
    610	    ibdev->node_guid == 0 ||
    611	    get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
    612		smp->status |= IB_SMP_INVALID_FIELD;
    613		return reply((struct ib_mad_hdr *)smp);
    614	}
    615
    616	nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
    617	nip->base_version = OPA_MGMT_BASE_VERSION;
    618	nip->class_version = OPA_SM_CLASS_VERSION;
    619	nip->node_type = 1;     /* channel adapter */
    620	nip->num_ports = ibdev->phys_port_cnt;
    621	/* This is already in network order */
    622	nip->sys_guid = ib_hfi1_sys_image_guid;
    623	nip->node_guid = ibdev->node_guid;
    624	nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
    625	nip->device_id = cpu_to_be16(dd->pcidev->device);
    626	nip->revision = cpu_to_be32(dd->minrev);
    627	nip->local_port_num = port;
    628	nip->vendor_id[0] = dd->oui1;
    629	nip->vendor_id[1] = dd->oui2;
    630	nip->vendor_id[2] = dd->oui3;
    631
    632	return reply((struct ib_mad_hdr *)smp);
    633}
    634
    635static void set_link_width_enabled(struct hfi1_pportdata *ppd, u32 w)
    636{
    637	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_ENB, w);
    638}
    639
    640static void set_link_width_downgrade_enabled(struct hfi1_pportdata *ppd, u32 w)
    641{
    642	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_DG_ENB, w);
    643}
    644
    645static void set_link_speed_enabled(struct hfi1_pportdata *ppd, u32 s)
    646{
    647	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_SPD_ENB, s);
    648}
    649
    650static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
    651		      int mad_flags, __be64 mkey, __be32 dr_slid,
    652		      u8 return_path[], u8 hop_cnt)
    653{
    654	int valid_mkey = 0;
    655	int ret = 0;
    656
    657	/* Is the mkey in the process of expiring? */
    658	if (ibp->rvp.mkey_lease_timeout &&
    659	    time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
    660		/* Clear timeout and mkey protection field. */
    661		ibp->rvp.mkey_lease_timeout = 0;
    662		ibp->rvp.mkeyprot = 0;
    663	}
    664
    665	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
    666	    ibp->rvp.mkey == mkey)
    667		valid_mkey = 1;
    668
    669	/* Unset lease timeout on any valid Get/Set/TrapRepress */
    670	if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
    671	    (mad->method == IB_MGMT_METHOD_GET ||
    672	     mad->method == IB_MGMT_METHOD_SET ||
    673	     mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
    674		ibp->rvp.mkey_lease_timeout = 0;
    675
    676	if (!valid_mkey) {
    677		switch (mad->method) {
    678		case IB_MGMT_METHOD_GET:
    679			/* Bad mkey not a violation below level 2 */
    680			if (ibp->rvp.mkeyprot < 2)
    681				break;
    682			fallthrough;
    683		case IB_MGMT_METHOD_SET:
    684		case IB_MGMT_METHOD_TRAP_REPRESS:
    685			if (ibp->rvp.mkey_violations != 0xFFFF)
    686				++ibp->rvp.mkey_violations;
    687			if (!ibp->rvp.mkey_lease_timeout &&
    688			    ibp->rvp.mkey_lease_period)
    689				ibp->rvp.mkey_lease_timeout = jiffies +
    690					ibp->rvp.mkey_lease_period * HZ;
    691			/* Generate a trap notice. */
    692			bad_mkey(ibp, mad, mkey, dr_slid, return_path,
    693				 hop_cnt);
    694			ret = 1;
    695		}
    696	}
    697
    698	return ret;
    699}
    700
    701/*
    702 * The SMA caches reads from LCB registers in case the LCB is unavailable.
    703 * (The LCB is unavailable in certain link states, for example.)
    704 */
    705struct lcb_datum {
    706	u32 off;
    707	u64 val;
    708};
    709
    710static struct lcb_datum lcb_cache[] = {
    711	{ DC_LCB_STS_ROUND_TRIP_LTP_CNT, 0 },
    712};
    713
    714static int write_lcb_cache(u32 off, u64 val)
    715{
    716	int i;
    717
    718	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
    719		if (lcb_cache[i].off == off) {
    720			lcb_cache[i].val = val;
    721			return 0;
    722		}
    723	}
    724
    725	pr_warn("%s bad offset 0x%x\n", __func__, off);
    726	return -1;
    727}
    728
    729static int read_lcb_cache(u32 off, u64 *val)
    730{
    731	int i;
    732
    733	for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
    734		if (lcb_cache[i].off == off) {
    735			*val = lcb_cache[i].val;
    736			return 0;
    737		}
    738	}
    739
    740	pr_warn("%s bad offset 0x%x\n", __func__, off);
    741	return -1;
    742}
    743
    744void read_ltp_rtt(struct hfi1_devdata *dd)
    745{
    746	u64 reg;
    747
    748	if (read_lcb_csr(dd, DC_LCB_STS_ROUND_TRIP_LTP_CNT, &reg))
    749		dd_dev_err(dd, "%s: unable to read LTP RTT\n", __func__);
    750	else
    751		write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
    752}
    753
    754static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
    755				   struct ib_device *ibdev, u32 port,
    756				   u32 *resp_len, u32 max_len)
    757{
    758	int i;
    759	struct hfi1_devdata *dd;
    760	struct hfi1_pportdata *ppd;
    761	struct hfi1_ibport *ibp;
    762	struct opa_port_info *pi = (struct opa_port_info *)data;
    763	u8 mtu;
    764	u8 credit_rate;
    765	u8 is_beaconing_active;
    766	u32 state;
    767	u32 num_ports = OPA_AM_NPORT(am);
    768	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
    769	u32 buffer_units;
    770	u64 tmp = 0;
    771
    772	if (num_ports != 1 || smp_length_check(sizeof(*pi), max_len)) {
    773		smp->status |= IB_SMP_INVALID_FIELD;
    774		return reply((struct ib_mad_hdr *)smp);
    775	}
    776
    777	dd = dd_from_ibdev(ibdev);
    778	/* IB numbers ports from 1, hw from 0 */
    779	ppd = dd->pport + (port - 1);
    780	ibp = &ppd->ibport_data;
    781
    782	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
    783	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
    784		smp->status |= IB_SMP_INVALID_FIELD;
    785		return reply((struct ib_mad_hdr *)smp);
    786	}
    787
    788	pi->lid = cpu_to_be32(ppd->lid);
    789
    790	/* Only return the mkey if the protection field allows it. */
    791	if (!(smp->method == IB_MGMT_METHOD_GET &&
    792	      ibp->rvp.mkey != smp->mkey &&
    793	      ibp->rvp.mkeyprot == 1))
    794		pi->mkey = ibp->rvp.mkey;
    795
    796	pi->subnet_prefix = ibp->rvp.gid_prefix;
    797	pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
    798	pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
    799	pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
    800	pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
    801	pi->sa_qp = cpu_to_be32(ppd->sa_qp);
    802
    803	pi->link_width.enabled = cpu_to_be16(ppd->link_width_enabled);
    804	pi->link_width.supported = cpu_to_be16(ppd->link_width_supported);
    805	pi->link_width.active = cpu_to_be16(ppd->link_width_active);
    806
    807	pi->link_width_downgrade.supported =
    808			cpu_to_be16(ppd->link_width_downgrade_supported);
    809	pi->link_width_downgrade.enabled =
    810			cpu_to_be16(ppd->link_width_downgrade_enabled);
    811	pi->link_width_downgrade.tx_active =
    812			cpu_to_be16(ppd->link_width_downgrade_tx_active);
    813	pi->link_width_downgrade.rx_active =
    814			cpu_to_be16(ppd->link_width_downgrade_rx_active);
    815
    816	pi->link_speed.supported = cpu_to_be16(ppd->link_speed_supported);
    817	pi->link_speed.active = cpu_to_be16(ppd->link_speed_active);
    818	pi->link_speed.enabled = cpu_to_be16(ppd->link_speed_enabled);
    819
    820	state = driver_lstate(ppd);
    821
    822	if (start_of_sm_config && (state == IB_PORT_INIT))
    823		ppd->is_sm_config_started = 1;
    824
    825	pi->port_phys_conf = (ppd->port_type & 0xf);
    826
    827	pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
    828	pi->port_states.ledenable_offlinereason |=
    829		ppd->is_sm_config_started << 5;
    830	/*
    831	 * This pairs with the memory barrier in hfi1_start_led_override to
    832	 * ensure that we read the correct state of LED beaconing represented
    833	 * by led_override_timer_active
    834	 */
    835	smp_rmb();
    836	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
    837	pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
    838	pi->port_states.ledenable_offlinereason |=
    839		ppd->offline_disabled_reason;
    840
    841	pi->port_states.portphysstate_portstate =
    842		(driver_pstate(ppd) << 4) | state;
    843
    844	pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
    845
    846	memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
    847	for (i = 0; i < ppd->vls_supported; i++) {
    848		mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
    849		if ((i % 2) == 0)
    850			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
    851		else
    852			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
    853	}
    854	/* don't forget VL 15 */
    855	mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
    856	pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
    857	pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
    858	pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
    859	pi->partenforce_filterraw |=
    860		(ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
    861	if (ppd->part_enforce & HFI1_PART_ENFORCE_IN)
    862		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
    863	if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
    864		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
    865	pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
    866	/* P_KeyViolations are counted by hardware. */
    867	pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
    868	pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
    869
    870	pi->vl.cap = ppd->vls_supported;
    871	pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
    872	pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
    873	pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
    874
    875	pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
    876
    877	pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
    878					  OPA_PORT_LINK_MODE_OPA << 5 |
    879					  OPA_PORT_LINK_MODE_OPA);
    880
    881	pi->port_ltp_crc_mode = cpu_to_be16(ppd->port_ltp_crc_mode);
    882
    883	pi->port_mode = cpu_to_be16(
    884				ppd->is_active_optimize_enabled ?
    885					OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
    886
    887	pi->port_packet_format.supported =
    888		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
    889			    OPA_PORT_PACKET_FORMAT_16B);
    890	pi->port_packet_format.enabled =
    891		cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
    892			    OPA_PORT_PACKET_FORMAT_16B);
    893
    894	/* flit_control.interleave is (OPA V1, version .76):
    895	 * bits		use
    896	 * ----		---
    897	 * 2		res
    898	 * 2		DistanceSupported
    899	 * 2		DistanceEnabled
    900	 * 5		MaxNextLevelTxEnabled
    901	 * 5		MaxNestLevelRxSupported
    902	 *
    903	 * HFI supports only "distance mode 1" (see OPA V1, version .76,
    904	 * section 9.6.2), so set DistanceSupported, DistanceEnabled
    905	 * to 0x1.
    906	 */
    907	pi->flit_control.interleave = cpu_to_be16(0x1400);
    908
    909	pi->link_down_reason = ppd->local_link_down_reason.sma;
    910	pi->neigh_link_down_reason = ppd->neigh_link_down_reason.sma;
    911	pi->port_error_action = cpu_to_be32(ppd->port_error_action);
    912	pi->mtucap = mtu_to_enum(hfi1_max_mtu, IB_MTU_4096);
    913
    914	/* 32.768 usec. response time (guessing) */
    915	pi->resptimevalue = 3;
    916
    917	pi->local_port_num = port;
    918
    919	/* buffer info for FM */
    920	pi->overall_buffer_space = cpu_to_be16(dd->link_credits);
    921
    922	pi->neigh_node_guid = cpu_to_be64(ppd->neighbor_guid);
    923	pi->neigh_port_num = ppd->neighbor_port_number;
    924	pi->port_neigh_mode =
    925		(ppd->neighbor_type & OPA_PI_MASK_NEIGH_NODE_TYPE) |
    926		(ppd->mgmt_allowed ? OPA_PI_MASK_NEIGH_MGMT_ALLOWED : 0) |
    927		(ppd->neighbor_fm_security ?
    928			OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS : 0);
    929
    930	/* HFIs shall always return VL15 credits to their
    931	 * neighbor in a timely manner, without any credit return pacing.
    932	 */
    933	credit_rate = 0;
    934	buffer_units  = (dd->vau) & OPA_PI_MASK_BUF_UNIT_BUF_ALLOC;
    935	buffer_units |= (dd->vcu << 3) & OPA_PI_MASK_BUF_UNIT_CREDIT_ACK;
    936	buffer_units |= (credit_rate << 6) &
    937				OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE;
    938	buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
    939	pi->buffer_units = cpu_to_be32(buffer_units);
    940
    941	pi->opa_cap_mask = cpu_to_be16(ibp->rvp.port_cap3_flags);
    942	pi->collectivemask_multicastmask = ((OPA_COLLECTIVE_NR & 0x7)
    943					    << 3 | (OPA_MCAST_NR & 0x7));
    944
    945	/* HFI supports a replay buffer 128 LTPs in size */
    946	pi->replay_depth.buffer = 0x80;
    947	/* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
    948	read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
    949
    950	/*
    951	 * this counter is 16 bits wide, but the replay_depth.wire
    952	 * variable is only 8 bits
    953	 */
    954	if (tmp > 0xff)
    955		tmp = 0xff;
    956	pi->replay_depth.wire = tmp;
    957
    958	if (resp_len)
    959		*resp_len += sizeof(struct opa_port_info);
    960
    961	return reply((struct ib_mad_hdr *)smp);
    962}
    963
    964/**
    965 * get_pkeys - return the PKEY table
    966 * @dd: the hfi1_ib device
    967 * @port: the IB port number
    968 * @pkeys: the pkey table is placed here
    969 */
    970static int get_pkeys(struct hfi1_devdata *dd, u32 port, u16 *pkeys)
    971{
    972	struct hfi1_pportdata *ppd = dd->pport + port - 1;
    973
    974	memcpy(pkeys, ppd->pkeys, sizeof(ppd->pkeys));
    975
    976	return 0;
    977}
    978
    979static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
    980				    struct ib_device *ibdev, u32 port,
    981				    u32 *resp_len, u32 max_len)
    982{
    983	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
    984	u32 n_blocks_req = OPA_AM_NBLK(am);
    985	u32 start_block = am & 0x7ff;
    986	__be16 *p;
    987	u16 *q;
    988	int i;
    989	u16 n_blocks_avail;
    990	unsigned npkeys = hfi1_get_npkeys(dd);
    991	size_t size;
    992
    993	if (n_blocks_req == 0) {
    994		pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
    995			port, start_block, n_blocks_req);
    996		smp->status |= IB_SMP_INVALID_FIELD;
    997		return reply((struct ib_mad_hdr *)smp);
    998	}
    999
   1000	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
   1001
   1002	size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
   1003
   1004	if (smp_length_check(size, max_len)) {
   1005		smp->status |= IB_SMP_INVALID_FIELD;
   1006		return reply((struct ib_mad_hdr *)smp);
   1007	}
   1008
   1009	if (start_block + n_blocks_req > n_blocks_avail ||
   1010	    n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
   1011		pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
   1012			"avail 0x%x; blk/smp 0x%lx\n",
   1013			start_block, n_blocks_req, n_blocks_avail,
   1014			OPA_NUM_PKEY_BLOCKS_PER_SMP);
   1015		smp->status |= IB_SMP_INVALID_FIELD;
   1016		return reply((struct ib_mad_hdr *)smp);
   1017	}
   1018
   1019	p = (__be16 *)data;
   1020	q = (u16 *)data;
   1021	/* get the real pkeys if we are requesting the first block */
   1022	if (start_block == 0) {
   1023		get_pkeys(dd, port, q);
   1024		for (i = 0; i < npkeys; i++)
   1025			p[i] = cpu_to_be16(q[i]);
   1026		if (resp_len)
   1027			*resp_len += size;
   1028	} else {
   1029		smp->status |= IB_SMP_INVALID_FIELD;
   1030	}
   1031	return reply((struct ib_mad_hdr *)smp);
   1032}
   1033
   1034enum {
   1035	HFI_TRANSITION_DISALLOWED,
   1036	HFI_TRANSITION_IGNORED,
   1037	HFI_TRANSITION_ALLOWED,
   1038	HFI_TRANSITION_UNDEFINED,
   1039};
   1040
   1041/*
   1042 * Use shortened names to improve readability of
   1043 * {logical,physical}_state_transitions
   1044 */
   1045enum {
   1046	__D = HFI_TRANSITION_DISALLOWED,
   1047	__I = HFI_TRANSITION_IGNORED,
   1048	__A = HFI_TRANSITION_ALLOWED,
   1049	__U = HFI_TRANSITION_UNDEFINED,
   1050};
   1051
   1052/*
   1053 * IB_PORTPHYSSTATE_POLLING (2) through OPA_PORTPHYSSTATE_MAX (11) are
   1054 * represented in physical_state_transitions.
   1055 */
   1056#define __N_PHYSTATES (OPA_PORTPHYSSTATE_MAX - IB_PORTPHYSSTATE_POLLING + 1)
   1057
   1058/*
   1059 * Within physical_state_transitions, rows represent "old" states,
   1060 * columns "new" states, and physical_state_transitions.allowed[old][new]
   1061 * indicates if the transition from old state to new state is legal (see
   1062 * OPAg1v1, Table 6-4).
   1063 */
   1064static const struct {
   1065	u8 allowed[__N_PHYSTATES][__N_PHYSTATES];
   1066} physical_state_transitions = {
   1067	{
   1068		/* 2    3    4    5    6    7    8    9   10   11 */
   1069	/* 2 */	{ __A, __A, __D, __D, __D, __D, __D, __D, __D, __D },
   1070	/* 3 */	{ __A, __I, __D, __D, __D, __D, __D, __D, __D, __A },
   1071	/* 4 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
   1072	/* 5 */	{ __A, __A, __D, __I, __D, __D, __D, __D, __D, __D },
   1073	/* 6 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
   1074	/* 7 */	{ __D, __A, __D, __D, __D, __I, __D, __D, __D, __D },
   1075	/* 8 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
   1076	/* 9 */	{ __I, __A, __D, __D, __D, __D, __D, __I, __D, __D },
   1077	/*10 */	{ __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
   1078	/*11 */	{ __D, __A, __D, __D, __D, __D, __D, __D, __D, __I },
   1079	}
   1080};
   1081
   1082/*
   1083 * IB_PORT_DOWN (1) through IB_PORT_ACTIVE_DEFER (5) are represented
   1084 * logical_state_transitions
   1085 */
   1086
   1087#define __N_LOGICAL_STATES (IB_PORT_ACTIVE_DEFER - IB_PORT_DOWN + 1)
   1088
   1089/*
   1090 * Within logical_state_transitions rows represent "old" states,
   1091 * columns "new" states, and logical_state_transitions.allowed[old][new]
   1092 * indicates if the transition from old state to new state is legal (see
   1093 * OPAg1v1, Table 9-12).
   1094 */
   1095static const struct {
   1096	u8 allowed[__N_LOGICAL_STATES][__N_LOGICAL_STATES];
   1097} logical_state_transitions = {
   1098	{
   1099		/* 1    2    3    4    5 */
   1100	/* 1 */	{ __I, __D, __D, __D, __U},
   1101	/* 2 */	{ __D, __I, __A, __D, __U},
   1102	/* 3 */	{ __D, __D, __I, __A, __U},
   1103	/* 4 */	{ __D, __D, __I, __I, __U},
   1104	/* 5 */	{ __U, __U, __U, __U, __U},
   1105	}
   1106};
   1107
   1108static int logical_transition_allowed(int old, int new)
   1109{
   1110	if (old < IB_PORT_NOP || old > IB_PORT_ACTIVE_DEFER ||
   1111	    new < IB_PORT_NOP || new > IB_PORT_ACTIVE_DEFER) {
   1112		pr_warn("invalid logical state(s) (old %d new %d)\n",
   1113			old, new);
   1114		return HFI_TRANSITION_UNDEFINED;
   1115	}
   1116
   1117	if (new == IB_PORT_NOP)
   1118		return HFI_TRANSITION_ALLOWED; /* always allowed */
   1119
   1120	/* adjust states for indexing into logical_state_transitions */
   1121	old -= IB_PORT_DOWN;
   1122	new -= IB_PORT_DOWN;
   1123
   1124	if (old < 0 || new < 0)
   1125		return HFI_TRANSITION_UNDEFINED;
   1126	return logical_state_transitions.allowed[old][new];
   1127}
   1128
   1129static int physical_transition_allowed(int old, int new)
   1130{
   1131	if (old < IB_PORTPHYSSTATE_NOP || old > OPA_PORTPHYSSTATE_MAX ||
   1132	    new < IB_PORTPHYSSTATE_NOP || new > OPA_PORTPHYSSTATE_MAX) {
   1133		pr_warn("invalid physical state(s) (old %d new %d)\n",
   1134			old, new);
   1135		return HFI_TRANSITION_UNDEFINED;
   1136	}
   1137
   1138	if (new == IB_PORTPHYSSTATE_NOP)
   1139		return HFI_TRANSITION_ALLOWED; /* always allowed */
   1140
   1141	/* adjust states for indexing into physical_state_transitions */
   1142	old -= IB_PORTPHYSSTATE_POLLING;
   1143	new -= IB_PORTPHYSSTATE_POLLING;
   1144
   1145	if (old < 0 || new < 0)
   1146		return HFI_TRANSITION_UNDEFINED;
   1147	return physical_state_transitions.allowed[old][new];
   1148}
   1149
   1150static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
   1151					  u32 logical_new, u32 physical_new)
   1152{
   1153	u32 physical_old = driver_pstate(ppd);
   1154	u32 logical_old = driver_lstate(ppd);
   1155	int ret, logical_allowed, physical_allowed;
   1156
   1157	ret = logical_transition_allowed(logical_old, logical_new);
   1158	logical_allowed = ret;
   1159
   1160	if (ret == HFI_TRANSITION_DISALLOWED ||
   1161	    ret == HFI_TRANSITION_UNDEFINED) {
   1162		pr_warn("invalid logical state transition %s -> %s\n",
   1163			opa_lstate_name(logical_old),
   1164			opa_lstate_name(logical_new));
   1165		return ret;
   1166	}
   1167
   1168	ret = physical_transition_allowed(physical_old, physical_new);
   1169	physical_allowed = ret;
   1170
   1171	if (ret == HFI_TRANSITION_DISALLOWED ||
   1172	    ret == HFI_TRANSITION_UNDEFINED) {
   1173		pr_warn("invalid physical state transition %s -> %s\n",
   1174			opa_pstate_name(physical_old),
   1175			opa_pstate_name(physical_new));
   1176		return ret;
   1177	}
   1178
   1179	if (logical_allowed == HFI_TRANSITION_IGNORED &&
   1180	    physical_allowed == HFI_TRANSITION_IGNORED)
   1181		return HFI_TRANSITION_IGNORED;
   1182
   1183	/*
   1184	 * A change request of Physical Port State from
   1185	 * 'Offline' to 'Polling' should be ignored.
   1186	 */
   1187	if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
   1188	    (physical_new == IB_PORTPHYSSTATE_POLLING))
   1189		return HFI_TRANSITION_IGNORED;
   1190
   1191	/*
   1192	 * Either physical_allowed or logical_allowed is
   1193	 * HFI_TRANSITION_ALLOWED.
   1194	 */
   1195	return HFI_TRANSITION_ALLOWED;
   1196}
   1197
   1198static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
   1199			   u32 logical_state, u32 phys_state, int local_mad)
   1200{
   1201	struct hfi1_devdata *dd = ppd->dd;
   1202	u32 link_state;
   1203	int ret;
   1204
   1205	ret = port_states_transition_allowed(ppd, logical_state, phys_state);
   1206	if (ret == HFI_TRANSITION_DISALLOWED ||
   1207	    ret == HFI_TRANSITION_UNDEFINED) {
   1208		/* error message emitted above */
   1209		smp->status |= IB_SMP_INVALID_FIELD;
   1210		return 0;
   1211	}
   1212
   1213	if (ret == HFI_TRANSITION_IGNORED)
   1214		return 0;
   1215
   1216	if ((phys_state != IB_PORTPHYSSTATE_NOP) &&
   1217	    !(logical_state == IB_PORT_DOWN ||
   1218	      logical_state == IB_PORT_NOP)){
   1219		pr_warn("SubnSet(OPA_PortInfo) port state invalid: logical_state 0x%x physical_state 0x%x\n",
   1220			logical_state, phys_state);
   1221		smp->status |= IB_SMP_INVALID_FIELD;
   1222	}
   1223
   1224	/*
   1225	 * Logical state changes are summarized in OPAv1g1 spec.,
   1226	 * Table 9-12; physical state changes are summarized in
   1227	 * OPAv1g1 spec., Table 6.4.
   1228	 */
   1229	switch (logical_state) {
   1230	case IB_PORT_NOP:
   1231		if (phys_state == IB_PORTPHYSSTATE_NOP)
   1232			break;
   1233		fallthrough;
   1234	case IB_PORT_DOWN:
   1235		if (phys_state == IB_PORTPHYSSTATE_NOP) {
   1236			link_state = HLS_DN_DOWNDEF;
   1237		} else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
   1238			link_state = HLS_DN_POLL;
   1239			set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
   1240					     0, OPA_LINKDOWN_REASON_FM_BOUNCE);
   1241		} else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
   1242			link_state = HLS_DN_DISABLE;
   1243		} else {
   1244			pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
   1245				phys_state);
   1246			smp->status |= IB_SMP_INVALID_FIELD;
   1247			break;
   1248		}
   1249
   1250		if ((link_state == HLS_DN_POLL ||
   1251		     link_state == HLS_DN_DOWNDEF)) {
   1252			/*
   1253			 * Going to poll.  No matter what the current state,
   1254			 * always move offline first, then tune and start the
   1255			 * link.  This correctly handles a FM link bounce and
   1256			 * a link enable.  Going offline is a no-op if already
   1257			 * offline.
   1258			 */
   1259			set_link_state(ppd, HLS_DN_OFFLINE);
   1260			start_link(ppd);
   1261		} else {
   1262			set_link_state(ppd, link_state);
   1263		}
   1264		if (link_state == HLS_DN_DISABLE &&
   1265		    (ppd->offline_disabled_reason >
   1266		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
   1267		     ppd->offline_disabled_reason ==
   1268		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
   1269			ppd->offline_disabled_reason =
   1270			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
   1271		/*
   1272		 * Don't send a reply if the response would be sent
   1273		 * through the disabled port.
   1274		 */
   1275		if (link_state == HLS_DN_DISABLE && !local_mad)
   1276			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
   1277		break;
   1278	case IB_PORT_ARMED:
   1279		ret = set_link_state(ppd, HLS_UP_ARMED);
   1280		if (!ret)
   1281			send_idle_sma(dd, SMA_IDLE_ARM);
   1282		break;
   1283	case IB_PORT_ACTIVE:
   1284		if (ppd->neighbor_normal) {
   1285			ret = set_link_state(ppd, HLS_UP_ACTIVE);
   1286			if (ret == 0)
   1287				send_idle_sma(dd, SMA_IDLE_ACTIVE);
   1288		} else {
   1289			pr_warn("SubnSet(OPA_PortInfo) Cannot move to Active with NeighborNormal 0\n");
   1290			smp->status |= IB_SMP_INVALID_FIELD;
   1291		}
   1292		break;
   1293	default:
   1294		pr_warn("SubnSet(OPA_PortInfo) invalid logical state 0x%x\n",
   1295			logical_state);
   1296		smp->status |= IB_SMP_INVALID_FIELD;
   1297	}
   1298
   1299	return 0;
   1300}
   1301
   1302/*
   1303 * subn_set_opa_portinfo - set port information
   1304 * @smp: the incoming SM packet
   1305 * @ibdev: the infiniband device
   1306 * @port: the port on the device
   1307 *
   1308 */
   1309static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
   1310				   struct ib_device *ibdev, u32 port,
   1311				   u32 *resp_len, u32 max_len, int local_mad)
   1312{
   1313	struct opa_port_info *pi = (struct opa_port_info *)data;
   1314	struct ib_event event;
   1315	struct hfi1_devdata *dd;
   1316	struct hfi1_pportdata *ppd;
   1317	struct hfi1_ibport *ibp;
   1318	u8 clientrereg;
   1319	unsigned long flags;
   1320	u32 smlid;
   1321	u32 lid;
   1322	u8 ls_old, ls_new, ps_new;
   1323	u8 vls;
   1324	u8 msl;
   1325	u8 crc_enabled;
   1326	u16 lse, lwe, mtu;
   1327	u32 num_ports = OPA_AM_NPORT(am);
   1328	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
   1329	int ret, i, invalid = 0, call_set_mtu = 0;
   1330	int call_link_downgrade_policy = 0;
   1331
   1332	if (num_ports != 1 ||
   1333	    smp_length_check(sizeof(*pi), max_len)) {
   1334		smp->status |= IB_SMP_INVALID_FIELD;
   1335		return reply((struct ib_mad_hdr *)smp);
   1336	}
   1337
   1338	lid = be32_to_cpu(pi->lid);
   1339	if (lid & 0xFF000000) {
   1340		pr_warn("OPA_PortInfo lid out of range: %X\n", lid);
   1341		smp->status |= IB_SMP_INVALID_FIELD;
   1342		goto get_only;
   1343	}
   1344
   1345
   1346	smlid = be32_to_cpu(pi->sm_lid);
   1347	if (smlid & 0xFF000000) {
   1348		pr_warn("OPA_PortInfo SM lid out of range: %X\n", smlid);
   1349		smp->status |= IB_SMP_INVALID_FIELD;
   1350		goto get_only;
   1351	}
   1352
   1353	clientrereg = (pi->clientrereg_subnettimeout &
   1354			OPA_PI_MASK_CLIENT_REREGISTER);
   1355
   1356	dd = dd_from_ibdev(ibdev);
   1357	/* IB numbers ports from 1, hw from 0 */
   1358	ppd = dd->pport + (port - 1);
   1359	ibp = &ppd->ibport_data;
   1360	event.device = ibdev;
   1361	event.element.port_num = port;
   1362
   1363	ls_old = driver_lstate(ppd);
   1364
   1365	ibp->rvp.mkey = pi->mkey;
   1366	if (ibp->rvp.gid_prefix != pi->subnet_prefix) {
   1367		ibp->rvp.gid_prefix = pi->subnet_prefix;
   1368		event.event = IB_EVENT_GID_CHANGE;
   1369		ib_dispatch_event(&event);
   1370	}
   1371	ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
   1372
   1373	/* Must be a valid unicast LID address. */
   1374	if ((lid == 0 && ls_old > IB_PORT_INIT) ||
   1375	     (hfi1_is_16B_mcast(lid))) {
   1376		smp->status |= IB_SMP_INVALID_FIELD;
   1377		pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
   1378			lid);
   1379	} else if (ppd->lid != lid ||
   1380		 ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC)) {
   1381		if (ppd->lid != lid)
   1382			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LID_CHANGE_BIT);
   1383		if (ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC))
   1384			hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LMC_CHANGE_BIT);
   1385		hfi1_set_lid(ppd, lid, pi->mkeyprotect_lmc & OPA_PI_MASK_LMC);
   1386		event.event = IB_EVENT_LID_CHANGE;
   1387		ib_dispatch_event(&event);
   1388
   1389		if (HFI1_PORT_GUID_INDEX + 1 < HFI1_GUIDS_PER_PORT) {
   1390			/* Manufacture GID from LID to support extended
   1391			 * addresses
   1392			 */
   1393			ppd->guids[HFI1_PORT_GUID_INDEX + 1] =
   1394				be64_to_cpu(OPA_MAKE_ID(lid));
   1395			event.event = IB_EVENT_GID_CHANGE;
   1396			ib_dispatch_event(&event);
   1397		}
   1398	}
   1399
   1400	msl = pi->smsl & OPA_PI_MASK_SMSL;
   1401	if (pi->partenforce_filterraw & OPA_PI_MASK_LINKINIT_REASON)
   1402		ppd->linkinit_reason =
   1403			(pi->partenforce_filterraw &
   1404			 OPA_PI_MASK_LINKINIT_REASON);
   1405
   1406	/* Must be a valid unicast LID address. */
   1407	if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
   1408	     (hfi1_is_16B_mcast(smlid))) {
   1409		smp->status |= IB_SMP_INVALID_FIELD;
   1410		pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
   1411	} else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
   1412		pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
   1413		spin_lock_irqsave(&ibp->rvp.lock, flags);
   1414		if (ibp->rvp.sm_ah) {
   1415			if (smlid != ibp->rvp.sm_lid)
   1416				hfi1_modify_qp0_ah(ibp, ibp->rvp.sm_ah, smlid);
   1417			if (msl != ibp->rvp.sm_sl)
   1418				rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl);
   1419		}
   1420		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
   1421		if (smlid != ibp->rvp.sm_lid)
   1422			ibp->rvp.sm_lid = smlid;
   1423		if (msl != ibp->rvp.sm_sl)
   1424			ibp->rvp.sm_sl = msl;
   1425		event.event = IB_EVENT_SM_CHANGE;
   1426		ib_dispatch_event(&event);
   1427	}
   1428
   1429	if (pi->link_down_reason == 0) {
   1430		ppd->local_link_down_reason.sma = 0;
   1431		ppd->local_link_down_reason.latest = 0;
   1432	}
   1433
   1434	if (pi->neigh_link_down_reason == 0) {
   1435		ppd->neigh_link_down_reason.sma = 0;
   1436		ppd->neigh_link_down_reason.latest = 0;
   1437	}
   1438
   1439	ppd->sm_trap_qp = be32_to_cpu(pi->sm_trap_qp);
   1440	ppd->sa_qp = be32_to_cpu(pi->sa_qp);
   1441
   1442	ppd->port_error_action = be32_to_cpu(pi->port_error_action);
   1443	lwe = be16_to_cpu(pi->link_width.enabled);
   1444	if (lwe) {
   1445		if (lwe == OPA_LINK_WIDTH_RESET ||
   1446		    lwe == OPA_LINK_WIDTH_RESET_OLD)
   1447			set_link_width_enabled(ppd, ppd->link_width_supported);
   1448		else if ((lwe & ~ppd->link_width_supported) == 0)
   1449			set_link_width_enabled(ppd, lwe);
   1450		else
   1451			smp->status |= IB_SMP_INVALID_FIELD;
   1452	}
   1453	lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
   1454	/* LWD.E is always applied - 0 means "disabled" */
   1455	if (lwe == OPA_LINK_WIDTH_RESET ||
   1456	    lwe == OPA_LINK_WIDTH_RESET_OLD) {
   1457		set_link_width_downgrade_enabled(ppd,
   1458						 ppd->
   1459						 link_width_downgrade_supported
   1460						 );
   1461	} else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
   1462		/* only set and apply if something changed */
   1463		if (lwe != ppd->link_width_downgrade_enabled) {
   1464			set_link_width_downgrade_enabled(ppd, lwe);
   1465			call_link_downgrade_policy = 1;
   1466		}
   1467	} else {
   1468		smp->status |= IB_SMP_INVALID_FIELD;
   1469	}
   1470	lse = be16_to_cpu(pi->link_speed.enabled);
   1471	if (lse) {
   1472		if (lse & be16_to_cpu(pi->link_speed.supported))
   1473			set_link_speed_enabled(ppd, lse);
   1474		else
   1475			smp->status |= IB_SMP_INVALID_FIELD;
   1476	}
   1477
   1478	ibp->rvp.mkeyprot =
   1479		(pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
   1480	ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
   1481	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
   1482				    ibp->rvp.vl_high_limit);
   1483
   1484	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
   1485	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
   1486		smp->status |= IB_SMP_INVALID_FIELD;
   1487		return reply((struct ib_mad_hdr *)smp);
   1488	}
   1489	for (i = 0; i < ppd->vls_supported; i++) {
   1490		if ((i % 2) == 0)
   1491			mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
   1492					   4) & 0xF);
   1493		else
   1494			mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
   1495					  0xF);
   1496		if (mtu == 0xffff) {
   1497			pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
   1498				mtu,
   1499				(pi->neigh_mtu.pvlx_to_mtu[0] >> 4) & 0xF);
   1500			smp->status |= IB_SMP_INVALID_FIELD;
   1501			mtu = hfi1_max_mtu; /* use a valid MTU */
   1502		}
   1503		if (dd->vld[i].mtu != mtu) {
   1504			dd_dev_info(dd,
   1505				    "MTU change on vl %d from %d to %d\n",
   1506				    i, dd->vld[i].mtu, mtu);
   1507			dd->vld[i].mtu = mtu;
   1508			call_set_mtu++;
   1509		}
   1510	}
   1511	/* As per OPAV1 spec: VL15 must support and be configured
   1512	 * for operation with a 2048 or larger MTU.
   1513	 */
   1514	mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
   1515	if (mtu < 2048 || mtu == 0xffff)
   1516		mtu = 2048;
   1517	if (dd->vld[15].mtu != mtu) {
   1518		dd_dev_info(dd,
   1519			    "MTU change on vl 15 from %d to %d\n",
   1520			    dd->vld[15].mtu, mtu);
   1521		dd->vld[15].mtu = mtu;
   1522		call_set_mtu++;
   1523	}
   1524	if (call_set_mtu)
   1525		set_mtu(ppd);
   1526
   1527	/* Set operational VLs */
   1528	vls = pi->operational_vls & OPA_PI_MASK_OPERATIONAL_VL;
   1529	if (vls) {
   1530		if (vls > ppd->vls_supported) {
   1531			pr_warn("SubnSet(OPA_PortInfo) VL's supported invalid %d\n",
   1532				pi->operational_vls);
   1533			smp->status |= IB_SMP_INVALID_FIELD;
   1534		} else {
   1535			if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
   1536					    vls) == -EINVAL)
   1537				smp->status |= IB_SMP_INVALID_FIELD;
   1538		}
   1539	}
   1540
   1541	if (pi->mkey_violations == 0)
   1542		ibp->rvp.mkey_violations = 0;
   1543
   1544	if (pi->pkey_violations == 0)
   1545		ibp->rvp.pkey_violations = 0;
   1546
   1547	if (pi->qkey_violations == 0)
   1548		ibp->rvp.qkey_violations = 0;
   1549
   1550	ibp->rvp.subnet_timeout =
   1551		pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
   1552
   1553	crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
   1554	crc_enabled >>= 4;
   1555	crc_enabled &= 0xf;
   1556
   1557	if (crc_enabled != 0)
   1558		ppd->port_crc_mode_enabled = port_ltp_to_cap(crc_enabled);
   1559
   1560	ppd->is_active_optimize_enabled =
   1561			!!(be16_to_cpu(pi->port_mode)
   1562					& OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE);
   1563
   1564	ls_new = pi->port_states.portphysstate_portstate &
   1565			OPA_PI_MASK_PORT_STATE;
   1566	ps_new = (pi->port_states.portphysstate_portstate &
   1567			OPA_PI_MASK_PORT_PHYSICAL_STATE) >> 4;
   1568
   1569	if (ls_old == IB_PORT_INIT) {
   1570		if (start_of_sm_config) {
   1571			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
   1572				ppd->is_sm_config_started = 1;
   1573		} else if (ls_new == IB_PORT_ARMED) {
   1574			if (ppd->is_sm_config_started == 0) {
   1575				invalid = 1;
   1576				smp->status |= IB_SMP_INVALID_FIELD;
   1577			}
   1578		}
   1579	}
   1580
   1581	/* Handle CLIENT_REREGISTER event b/c SM asked us for it */
   1582	if (clientrereg) {
   1583		event.event = IB_EVENT_CLIENT_REREGISTER;
   1584		ib_dispatch_event(&event);
   1585	}
   1586
   1587	/*
   1588	 * Do the port state change now that the other link parameters
   1589	 * have been set.
   1590	 * Changing the port physical state only makes sense if the link
   1591	 * is down or is being set to down.
   1592	 */
   1593
   1594	if (!invalid) {
   1595		ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad);
   1596		if (ret)
   1597			return ret;
   1598	}
   1599
   1600	ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
   1601				      max_len);
   1602
   1603	/* restore re-reg bit per o14-12.2.1 */
   1604	pi->clientrereg_subnettimeout |= clientrereg;
   1605
   1606	/*
   1607	 * Apply the new link downgrade policy.  This may result in a link
   1608	 * bounce.  Do this after everything else so things are settled.
   1609	 * Possible problem: if setting the port state above fails, then
   1610	 * the policy change is not applied.
   1611	 */
   1612	if (call_link_downgrade_policy)
   1613		apply_link_downgrade_policy(ppd, 0);
   1614
   1615	return ret;
   1616
   1617get_only:
   1618	return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
   1619				       max_len);
   1620}
   1621
   1622/**
   1623 * set_pkeys - set the PKEY table for ctxt 0
   1624 * @dd: the hfi1_ib device
   1625 * @port: the IB port number
   1626 * @pkeys: the PKEY table
   1627 */
   1628static int set_pkeys(struct hfi1_devdata *dd, u32 port, u16 *pkeys)
   1629{
   1630	struct hfi1_pportdata *ppd;
   1631	int i;
   1632	int changed = 0;
   1633	int update_includes_mgmt_partition = 0;
   1634
   1635	/*
   1636	 * IB port one/two always maps to context zero/one,
   1637	 * always a kernel context, no locking needed
   1638	 * If we get here with ppd setup, no need to check
   1639	 * that rcd is valid.
   1640	 */
   1641	ppd = dd->pport + (port - 1);
   1642	/*
   1643	 * If the update does not include the management pkey, don't do it.
   1644	 */
   1645	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
   1646		if (pkeys[i] == LIM_MGMT_P_KEY) {
   1647			update_includes_mgmt_partition = 1;
   1648			break;
   1649		}
   1650	}
   1651
   1652	if (!update_includes_mgmt_partition)
   1653		return 1;
   1654
   1655	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
   1656		u16 key = pkeys[i];
   1657		u16 okey = ppd->pkeys[i];
   1658
   1659		if (key == okey)
   1660			continue;
   1661		/*
   1662		 * The SM gives us the complete PKey table. We have
   1663		 * to ensure that we put the PKeys in the matching
   1664		 * slots.
   1665		 */
   1666		ppd->pkeys[i] = key;
   1667		changed = 1;
   1668	}
   1669
   1670	if (changed) {
   1671		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
   1672		hfi1_event_pkey_change(dd, port);
   1673	}
   1674
   1675	return 0;
   1676}
   1677
   1678static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
   1679				    struct ib_device *ibdev, u32 port,
   1680				    u32 *resp_len, u32 max_len)
   1681{
   1682	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   1683	u32 n_blocks_sent = OPA_AM_NBLK(am);
   1684	u32 start_block = am & 0x7ff;
   1685	u16 *p = (u16 *)data;
   1686	__be16 *q = (__be16 *)data;
   1687	int i;
   1688	u16 n_blocks_avail;
   1689	unsigned npkeys = hfi1_get_npkeys(dd);
   1690	u32 size = 0;
   1691
   1692	if (n_blocks_sent == 0) {
   1693		pr_warn("OPA Get PKey AM Invalid : P = %u; B = 0x%x; N = 0x%x\n",
   1694			port, start_block, n_blocks_sent);
   1695		smp->status |= IB_SMP_INVALID_FIELD;
   1696		return reply((struct ib_mad_hdr *)smp);
   1697	}
   1698
   1699	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
   1700
   1701	size = sizeof(u16) * (n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE);
   1702
   1703	if (smp_length_check(size, max_len)) {
   1704		smp->status |= IB_SMP_INVALID_FIELD;
   1705		return reply((struct ib_mad_hdr *)smp);
   1706	}
   1707
   1708	if (start_block + n_blocks_sent > n_blocks_avail ||
   1709	    n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
   1710		pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
   1711			start_block, n_blocks_sent, n_blocks_avail,
   1712			OPA_NUM_PKEY_BLOCKS_PER_SMP);
   1713		smp->status |= IB_SMP_INVALID_FIELD;
   1714		return reply((struct ib_mad_hdr *)smp);
   1715	}
   1716
   1717	for (i = 0; i < n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE; i++)
   1718		p[i] = be16_to_cpu(q[i]);
   1719
   1720	if (start_block == 0 && set_pkeys(dd, port, p) != 0) {
   1721		smp->status |= IB_SMP_INVALID_FIELD;
   1722		return reply((struct ib_mad_hdr *)smp);
   1723	}
   1724
   1725	return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len,
   1726					max_len);
   1727}
   1728
   1729#define ILLEGAL_VL 12
   1730/*
   1731 * filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
   1732 * for SC15, which must map to VL15). If we don't remap things this
   1733 * way it is possible for VL15 counters to increment when we try to
   1734 * send on a SC which is mapped to an invalid VL.
   1735 * When getting the table convert ILLEGAL_VL back to VL15.
   1736 */
   1737static void filter_sc2vlt(void *data, bool set)
   1738{
   1739	int i;
   1740	u8 *pd = data;
   1741
   1742	for (i = 0; i < OPA_MAX_SCS; i++) {
   1743		if (i == 15)
   1744			continue;
   1745
   1746		if (set) {
   1747			if ((pd[i] & 0x1f) == 0xf)
   1748				pd[i] = ILLEGAL_VL;
   1749		} else {
   1750			if ((pd[i] & 0x1f) == ILLEGAL_VL)
   1751				pd[i] = 0xf;
   1752		}
   1753	}
   1754}
   1755
   1756static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
   1757{
   1758	u64 *val = data;
   1759
   1760	filter_sc2vlt(data, true);
   1761
   1762	write_csr(dd, SEND_SC2VLT0, *val++);
   1763	write_csr(dd, SEND_SC2VLT1, *val++);
   1764	write_csr(dd, SEND_SC2VLT2, *val++);
   1765	write_csr(dd, SEND_SC2VLT3, *val++);
   1766	write_seqlock_irq(&dd->sc2vl_lock);
   1767	memcpy(dd->sc2vl, data, sizeof(dd->sc2vl));
   1768	write_sequnlock_irq(&dd->sc2vl_lock);
   1769	return 0;
   1770}
   1771
   1772static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
   1773{
   1774	u64 *val = (u64 *)data;
   1775
   1776	*val++ = read_csr(dd, SEND_SC2VLT0);
   1777	*val++ = read_csr(dd, SEND_SC2VLT1);
   1778	*val++ = read_csr(dd, SEND_SC2VLT2);
   1779	*val++ = read_csr(dd, SEND_SC2VLT3);
   1780
   1781	filter_sc2vlt((u64 *)data, false);
   1782	return 0;
   1783}
   1784
   1785static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
   1786				   struct ib_device *ibdev, u32 port,
   1787				   u32 *resp_len, u32 max_len)
   1788{
   1789	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   1790	u8 *p = data;
   1791	size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
   1792	unsigned i;
   1793
   1794	if (am || smp_length_check(size, max_len)) {
   1795		smp->status |= IB_SMP_INVALID_FIELD;
   1796		return reply((struct ib_mad_hdr *)smp);
   1797	}
   1798
   1799	for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++)
   1800		*p++ = ibp->sl_to_sc[i];
   1801
   1802	if (resp_len)
   1803		*resp_len += size;
   1804
   1805	return reply((struct ib_mad_hdr *)smp);
   1806}
   1807
   1808static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
   1809				   struct ib_device *ibdev, u32 port,
   1810				   u32 *resp_len, u32 max_len)
   1811{
   1812	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   1813	u8 *p = data;
   1814	size_t size = ARRAY_SIZE(ibp->sl_to_sc);
   1815	int i;
   1816	u8 sc;
   1817
   1818	if (am || smp_length_check(size, max_len)) {
   1819		smp->status |= IB_SMP_INVALID_FIELD;
   1820		return reply((struct ib_mad_hdr *)smp);
   1821	}
   1822
   1823	for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
   1824		sc = *p++;
   1825		if (ibp->sl_to_sc[i] != sc) {
   1826			ibp->sl_to_sc[i] = sc;
   1827
   1828			/* Put all stale qps into error state */
   1829			hfi1_error_port_qps(ibp, i);
   1830		}
   1831	}
   1832
   1833	return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len,
   1834				       max_len);
   1835}
   1836
   1837static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
   1838				   struct ib_device *ibdev, u32 port,
   1839				   u32 *resp_len, u32 max_len)
   1840{
   1841	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   1842	u8 *p = data;
   1843	size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
   1844	unsigned i;
   1845
   1846	if (am || smp_length_check(size, max_len)) {
   1847		smp->status |= IB_SMP_INVALID_FIELD;
   1848		return reply((struct ib_mad_hdr *)smp);
   1849	}
   1850
   1851	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
   1852		*p++ = ibp->sc_to_sl[i];
   1853
   1854	if (resp_len)
   1855		*resp_len += size;
   1856
   1857	return reply((struct ib_mad_hdr *)smp);
   1858}
   1859
   1860static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
   1861				   struct ib_device *ibdev, u32 port,
   1862				   u32 *resp_len, u32 max_len)
   1863{
   1864	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   1865	size_t size = ARRAY_SIZE(ibp->sc_to_sl);
   1866	u8 *p = data;
   1867	int i;
   1868
   1869	if (am || smp_length_check(size, max_len)) {
   1870		smp->status |= IB_SMP_INVALID_FIELD;
   1871		return reply((struct ib_mad_hdr *)smp);
   1872	}
   1873
   1874	for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
   1875		ibp->sc_to_sl[i] = *p++;
   1876
   1877	return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len,
   1878				       max_len);
   1879}
   1880
   1881static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
   1882				    struct ib_device *ibdev, u32 port,
   1883				    u32 *resp_len, u32 max_len)
   1884{
   1885	u32 n_blocks = OPA_AM_NBLK(am);
   1886	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   1887	void *vp = (void *)data;
   1888	size_t size = 4 * sizeof(u64);
   1889
   1890	if (n_blocks != 1 || smp_length_check(size, max_len)) {
   1891		smp->status |= IB_SMP_INVALID_FIELD;
   1892		return reply((struct ib_mad_hdr *)smp);
   1893	}
   1894
   1895	get_sc2vlt_tables(dd, vp);
   1896
   1897	if (resp_len)
   1898		*resp_len += size;
   1899
   1900	return reply((struct ib_mad_hdr *)smp);
   1901}
   1902
   1903static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
   1904				    struct ib_device *ibdev, u32 port,
   1905				    u32 *resp_len, u32 max_len)
   1906{
   1907	u32 n_blocks = OPA_AM_NBLK(am);
   1908	int async_update = OPA_AM_ASYNC(am);
   1909	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   1910	void *vp = (void *)data;
   1911	struct hfi1_pportdata *ppd;
   1912	int lstate;
   1913	/*
   1914	 * set_sc2vlt_tables writes the information contained in *data
   1915	 * to four 64-bit registers SendSC2VLt[0-3]. We need to make
   1916	 * sure *max_len is not greater than the total size of the four
   1917	 * SendSC2VLt[0-3] registers.
   1918	 */
   1919	size_t size = 4 * sizeof(u64);
   1920
   1921	if (n_blocks != 1 || async_update || smp_length_check(size, max_len)) {
   1922		smp->status |= IB_SMP_INVALID_FIELD;
   1923		return reply((struct ib_mad_hdr *)smp);
   1924	}
   1925
   1926	/* IB numbers ports from 1, hw from 0 */
   1927	ppd = dd->pport + (port - 1);
   1928	lstate = driver_lstate(ppd);
   1929	/*
   1930	 * it's known that async_update is 0 by this point, but include
   1931	 * the explicit check for clarity
   1932	 */
   1933	if (!async_update &&
   1934	    (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
   1935		smp->status |= IB_SMP_INVALID_FIELD;
   1936		return reply((struct ib_mad_hdr *)smp);
   1937	}
   1938
   1939	set_sc2vlt_tables(dd, vp);
   1940
   1941	return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len,
   1942					max_len);
   1943}
   1944
   1945static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
   1946				     struct ib_device *ibdev, u32 port,
   1947				     u32 *resp_len, u32 max_len)
   1948{
   1949	u32 n_blocks = OPA_AM_NPORT(am);
   1950	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   1951	struct hfi1_pportdata *ppd;
   1952	void *vp = (void *)data;
   1953	int size = sizeof(struct sc2vlnt);
   1954
   1955	if (n_blocks != 1 || smp_length_check(size, max_len)) {
   1956		smp->status |= IB_SMP_INVALID_FIELD;
   1957		return reply((struct ib_mad_hdr *)smp);
   1958	}
   1959
   1960	ppd = dd->pport + (port - 1);
   1961
   1962	fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
   1963
   1964	if (resp_len)
   1965		*resp_len += size;
   1966
   1967	return reply((struct ib_mad_hdr *)smp);
   1968}
   1969
   1970static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
   1971				     struct ib_device *ibdev, u32 port,
   1972				     u32 *resp_len, u32 max_len)
   1973{
   1974	u32 n_blocks = OPA_AM_NPORT(am);
   1975	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   1976	struct hfi1_pportdata *ppd;
   1977	void *vp = (void *)data;
   1978	int lstate;
   1979	int size = sizeof(struct sc2vlnt);
   1980
   1981	if (n_blocks != 1 || smp_length_check(size, max_len)) {
   1982		smp->status |= IB_SMP_INVALID_FIELD;
   1983		return reply((struct ib_mad_hdr *)smp);
   1984	}
   1985
   1986	/* IB numbers ports from 1, hw from 0 */
   1987	ppd = dd->pport + (port - 1);
   1988	lstate = driver_lstate(ppd);
   1989	if (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE) {
   1990		smp->status |= IB_SMP_INVALID_FIELD;
   1991		return reply((struct ib_mad_hdr *)smp);
   1992	}
   1993
   1994	ppd = dd->pport + (port - 1);
   1995
   1996	fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
   1997
   1998	return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
   1999					 resp_len, max_len);
   2000}
   2001
   2002static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
   2003			      struct ib_device *ibdev, u32 port,
   2004			      u32 *resp_len, u32 max_len)
   2005{
   2006	u32 nports = OPA_AM_NPORT(am);
   2007	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
   2008	u32 lstate;
   2009	struct hfi1_ibport *ibp;
   2010	struct hfi1_pportdata *ppd;
   2011	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
   2012
   2013	if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
   2014		smp->status |= IB_SMP_INVALID_FIELD;
   2015		return reply((struct ib_mad_hdr *)smp);
   2016	}
   2017
   2018	ibp = to_iport(ibdev, port);
   2019	ppd = ppd_from_ibp(ibp);
   2020
   2021	lstate = driver_lstate(ppd);
   2022
   2023	if (start_of_sm_config && (lstate == IB_PORT_INIT))
   2024		ppd->is_sm_config_started = 1;
   2025
   2026	psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
   2027	psi->port_states.ledenable_offlinereason |=
   2028		ppd->is_sm_config_started << 5;
   2029	psi->port_states.ledenable_offlinereason |=
   2030		ppd->offline_disabled_reason;
   2031
   2032	psi->port_states.portphysstate_portstate =
   2033		(driver_pstate(ppd) << 4) | (lstate & 0xf);
   2034	psi->link_width_downgrade_tx_active =
   2035		cpu_to_be16(ppd->link_width_downgrade_tx_active);
   2036	psi->link_width_downgrade_rx_active =
   2037		cpu_to_be16(ppd->link_width_downgrade_rx_active);
   2038	if (resp_len)
   2039		*resp_len += sizeof(struct opa_port_state_info);
   2040
   2041	return reply((struct ib_mad_hdr *)smp);
   2042}
   2043
   2044static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
   2045			      struct ib_device *ibdev, u32 port,
   2046			      u32 *resp_len, u32 max_len, int local_mad)
   2047{
   2048	u32 nports = OPA_AM_NPORT(am);
   2049	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
   2050	u32 ls_old;
   2051	u8 ls_new, ps_new;
   2052	struct hfi1_ibport *ibp;
   2053	struct hfi1_pportdata *ppd;
   2054	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
   2055	int ret, invalid = 0;
   2056
   2057	if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
   2058		smp->status |= IB_SMP_INVALID_FIELD;
   2059		return reply((struct ib_mad_hdr *)smp);
   2060	}
   2061
   2062	ibp = to_iport(ibdev, port);
   2063	ppd = ppd_from_ibp(ibp);
   2064
   2065	ls_old = driver_lstate(ppd);
   2066
   2067	ls_new = port_states_to_logical_state(&psi->port_states);
   2068	ps_new = port_states_to_phys_state(&psi->port_states);
   2069
   2070	if (ls_old == IB_PORT_INIT) {
   2071		if (start_of_sm_config) {
   2072			if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
   2073				ppd->is_sm_config_started = 1;
   2074		} else if (ls_new == IB_PORT_ARMED) {
   2075			if (ppd->is_sm_config_started == 0) {
   2076				invalid = 1;
   2077				smp->status |= IB_SMP_INVALID_FIELD;
   2078			}
   2079		}
   2080	}
   2081
   2082	if (!invalid) {
   2083		ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad);
   2084		if (ret)
   2085			return ret;
   2086	}
   2087
   2088	return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len,
   2089				  max_len);
   2090}
   2091
   2092static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
   2093				     struct ib_device *ibdev, u32 port,
   2094				     u32 *resp_len, u32 max_len)
   2095{
   2096	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   2097	u32 addr = OPA_AM_CI_ADDR(am);
   2098	u32 len = OPA_AM_CI_LEN(am) + 1;
   2099	int ret;
   2100
   2101	if (dd->pport->port_type != PORT_TYPE_QSFP ||
   2102	    smp_length_check(len, max_len)) {
   2103		smp->status |= IB_SMP_INVALID_FIELD;
   2104		return reply((struct ib_mad_hdr *)smp);
   2105	}
   2106
   2107#define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
   2108#define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
   2109#define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
   2110
   2111	/*
   2112	 * check that addr is within spec, and
   2113	 * addr and (addr + len - 1) are on the same "page"
   2114	 */
   2115	if (addr >= 4096 ||
   2116	    (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
   2117		smp->status |= IB_SMP_INVALID_FIELD;
   2118		return reply((struct ib_mad_hdr *)smp);
   2119	}
   2120
   2121	ret = get_cable_info(dd, port, addr, len, data);
   2122
   2123	if (ret == -ENODEV) {
   2124		smp->status |= IB_SMP_UNSUP_METH_ATTR;
   2125		return reply((struct ib_mad_hdr *)smp);
   2126	}
   2127
   2128	/* The address range for the CableInfo SMA query is wider than the
   2129	 * memory available on the QSFP cable. We want to return a valid
   2130	 * response, albeit zeroed out, for address ranges beyond available
   2131	 * memory but that are within the CableInfo query spec
   2132	 */
   2133	if (ret < 0 && ret != -ERANGE) {
   2134		smp->status |= IB_SMP_INVALID_FIELD;
   2135		return reply((struct ib_mad_hdr *)smp);
   2136	}
   2137
   2138	if (resp_len)
   2139		*resp_len += len;
   2140
   2141	return reply((struct ib_mad_hdr *)smp);
   2142}
   2143
   2144static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
   2145			      struct ib_device *ibdev, u32 port, u32 *resp_len,
   2146			      u32 max_len)
   2147{
   2148	u32 num_ports = OPA_AM_NPORT(am);
   2149	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   2150	struct hfi1_pportdata *ppd;
   2151	struct buffer_control *p = (struct buffer_control *)data;
   2152	int size = sizeof(struct buffer_control);
   2153
   2154	if (num_ports != 1 || smp_length_check(size, max_len)) {
   2155		smp->status |= IB_SMP_INVALID_FIELD;
   2156		return reply((struct ib_mad_hdr *)smp);
   2157	}
   2158
   2159	ppd = dd->pport + (port - 1);
   2160	fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
   2161	trace_bct_get(dd, p);
   2162	if (resp_len)
   2163		*resp_len += size;
   2164
   2165	return reply((struct ib_mad_hdr *)smp);
   2166}
   2167
   2168static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
   2169			      struct ib_device *ibdev, u32 port, u32 *resp_len,
   2170			      u32 max_len)
   2171{
   2172	u32 num_ports = OPA_AM_NPORT(am);
   2173	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   2174	struct hfi1_pportdata *ppd;
   2175	struct buffer_control *p = (struct buffer_control *)data;
   2176
   2177	if (num_ports != 1 || smp_length_check(sizeof(*p), max_len)) {
   2178		smp->status |= IB_SMP_INVALID_FIELD;
   2179		return reply((struct ib_mad_hdr *)smp);
   2180	}
   2181	ppd = dd->pport + (port - 1);
   2182	trace_bct_set(dd, p);
   2183	if (fm_set_table(ppd, FM_TBL_BUFFER_CONTROL, p) < 0) {
   2184		smp->status |= IB_SMP_INVALID_FIELD;
   2185		return reply((struct ib_mad_hdr *)smp);
   2186	}
   2187
   2188	return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len,
   2189				  max_len);
   2190}
   2191
   2192static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
   2193				 struct ib_device *ibdev, u32 port,
   2194				 u32 *resp_len, u32 max_len)
   2195{
   2196	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
   2197	u32 num_ports = OPA_AM_NPORT(am);
   2198	u8 section = (am & 0x00ff0000) >> 16;
   2199	u8 *p = data;
   2200	int size = 256;
   2201
   2202	if (num_ports != 1 || smp_length_check(size, max_len)) {
   2203		smp->status |= IB_SMP_INVALID_FIELD;
   2204		return reply((struct ib_mad_hdr *)smp);
   2205	}
   2206
   2207	switch (section) {
   2208	case OPA_VLARB_LOW_ELEMENTS:
   2209		fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
   2210		break;
   2211	case OPA_VLARB_HIGH_ELEMENTS:
   2212		fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
   2213		break;
   2214	case OPA_VLARB_PREEMPT_ELEMENTS:
   2215		fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
   2216		break;
   2217	case OPA_VLARB_PREEMPT_MATRIX:
   2218		fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
   2219		break;
   2220	default:
   2221		pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
   2222			be32_to_cpu(smp->attr_mod));
   2223		smp->status |= IB_SMP_INVALID_FIELD;
   2224		size = 0;
   2225		break;
   2226	}
   2227
   2228	if (size > 0 && resp_len)
   2229		*resp_len += size;
   2230
   2231	return reply((struct ib_mad_hdr *)smp);
   2232}
   2233
   2234static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
   2235				 struct ib_device *ibdev, u32 port,
   2236				 u32 *resp_len, u32 max_len)
   2237{
   2238	struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
   2239	u32 num_ports = OPA_AM_NPORT(am);
   2240	u8 section = (am & 0x00ff0000) >> 16;
   2241	u8 *p = data;
   2242	int size = 256;
   2243
   2244	if (num_ports != 1 || smp_length_check(size, max_len)) {
   2245		smp->status |= IB_SMP_INVALID_FIELD;
   2246		return reply((struct ib_mad_hdr *)smp);
   2247	}
   2248
   2249	switch (section) {
   2250	case OPA_VLARB_LOW_ELEMENTS:
   2251		(void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
   2252		break;
   2253	case OPA_VLARB_HIGH_ELEMENTS:
   2254		(void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
   2255		break;
   2256	/*
   2257	 * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
   2258	 * can be changed from the default values
   2259	 */
   2260	case OPA_VLARB_PREEMPT_ELEMENTS:
   2261	case OPA_VLARB_PREEMPT_MATRIX:
   2262		smp->status |= IB_SMP_UNSUP_METH_ATTR;
   2263		break;
   2264	default:
   2265		pr_warn("OPA SubnSet(VL Arb) AM Invalid : 0x%x\n",
   2266			be32_to_cpu(smp->attr_mod));
   2267		smp->status |= IB_SMP_INVALID_FIELD;
   2268		break;
   2269	}
   2270
   2271	return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len,
   2272				     max_len);
   2273}
   2274
   2275struct opa_pma_mad {
   2276	struct ib_mad_hdr mad_hdr;
   2277	u8 data[2024];
   2278} __packed;
   2279
   2280struct opa_port_status_req {
   2281	__u8 port_num;
   2282	__u8 reserved[3];
   2283	__be32 vl_select_mask;
   2284};
   2285
   2286#define VL_MASK_ALL		0x00000000000080ffUL
   2287
   2288struct opa_port_status_rsp {
   2289	__u8 port_num;
   2290	__u8 reserved[3];
   2291	__be32  vl_select_mask;
   2292
   2293	/* Data counters */
   2294	__be64 port_xmit_data;
   2295	__be64 port_rcv_data;
   2296	__be64 port_xmit_pkts;
   2297	__be64 port_rcv_pkts;
   2298	__be64 port_multicast_xmit_pkts;
   2299	__be64 port_multicast_rcv_pkts;
   2300	__be64 port_xmit_wait;
   2301	__be64 sw_port_congestion;
   2302	__be64 port_rcv_fecn;
   2303	__be64 port_rcv_becn;
   2304	__be64 port_xmit_time_cong;
   2305	__be64 port_xmit_wasted_bw;
   2306	__be64 port_xmit_wait_data;
   2307	__be64 port_rcv_bubble;
   2308	__be64 port_mark_fecn;
   2309	/* Error counters */
   2310	__be64 port_rcv_constraint_errors;
   2311	__be64 port_rcv_switch_relay_errors;
   2312	__be64 port_xmit_discards;
   2313	__be64 port_xmit_constraint_errors;
   2314	__be64 port_rcv_remote_physical_errors;
   2315	__be64 local_link_integrity_errors;
   2316	__be64 port_rcv_errors;
   2317	__be64 excessive_buffer_overruns;
   2318	__be64 fm_config_errors;
   2319	__be32 link_error_recovery;
   2320	__be32 link_downed;
   2321	u8 uncorrectable_errors;
   2322
   2323	u8 link_quality_indicator; /* 5res, 3bit */
   2324	u8 res2[6];
   2325	struct _vls_pctrs {
   2326		/* per-VL Data counters */
   2327		__be64 port_vl_xmit_data;
   2328		__be64 port_vl_rcv_data;
   2329		__be64 port_vl_xmit_pkts;
   2330		__be64 port_vl_rcv_pkts;
   2331		__be64 port_vl_xmit_wait;
   2332		__be64 sw_port_vl_congestion;
   2333		__be64 port_vl_rcv_fecn;
   2334		__be64 port_vl_rcv_becn;
   2335		__be64 port_xmit_time_cong;
   2336		__be64 port_vl_xmit_wasted_bw;
   2337		__be64 port_vl_xmit_wait_data;
   2338		__be64 port_vl_rcv_bubble;
   2339		__be64 port_vl_mark_fecn;
   2340		__be64 port_vl_xmit_discards;
   2341	} vls[]; /* real array size defined by # bits set in vl_select_mask */
   2342};
   2343
   2344enum counter_selects {
   2345	CS_PORT_XMIT_DATA			= (1 << 31),
   2346	CS_PORT_RCV_DATA			= (1 << 30),
   2347	CS_PORT_XMIT_PKTS			= (1 << 29),
   2348	CS_PORT_RCV_PKTS			= (1 << 28),
   2349	CS_PORT_MCAST_XMIT_PKTS			= (1 << 27),
   2350	CS_PORT_MCAST_RCV_PKTS			= (1 << 26),
   2351	CS_PORT_XMIT_WAIT			= (1 << 25),
   2352	CS_SW_PORT_CONGESTION			= (1 << 24),
   2353	CS_PORT_RCV_FECN			= (1 << 23),
   2354	CS_PORT_RCV_BECN			= (1 << 22),
   2355	CS_PORT_XMIT_TIME_CONG			= (1 << 21),
   2356	CS_PORT_XMIT_WASTED_BW			= (1 << 20),
   2357	CS_PORT_XMIT_WAIT_DATA			= (1 << 19),
   2358	CS_PORT_RCV_BUBBLE			= (1 << 18),
   2359	CS_PORT_MARK_FECN			= (1 << 17),
   2360	CS_PORT_RCV_CONSTRAINT_ERRORS		= (1 << 16),
   2361	CS_PORT_RCV_SWITCH_RELAY_ERRORS		= (1 << 15),
   2362	CS_PORT_XMIT_DISCARDS			= (1 << 14),
   2363	CS_PORT_XMIT_CONSTRAINT_ERRORS		= (1 << 13),
   2364	CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS	= (1 << 12),
   2365	CS_LOCAL_LINK_INTEGRITY_ERRORS		= (1 << 11),
   2366	CS_PORT_RCV_ERRORS			= (1 << 10),
   2367	CS_EXCESSIVE_BUFFER_OVERRUNS		= (1 << 9),
   2368	CS_FM_CONFIG_ERRORS			= (1 << 8),
   2369	CS_LINK_ERROR_RECOVERY			= (1 << 7),
   2370	CS_LINK_DOWNED				= (1 << 6),
   2371	CS_UNCORRECTABLE_ERRORS			= (1 << 5),
   2372};
   2373
   2374struct opa_clear_port_status {
   2375	__be64 port_select_mask[4];
   2376	__be32 counter_select_mask;
   2377};
   2378
   2379struct opa_aggregate {
   2380	__be16 attr_id;
   2381	__be16 err_reqlength;	/* 1 bit, 8 res, 7 bit */
   2382	__be32 attr_mod;
   2383	u8 data[];
   2384};
   2385
   2386#define MSK_LLI 0x000000f0
   2387#define MSK_LLI_SFT 4
   2388#define MSK_LER 0x0000000f
   2389#define MSK_LER_SFT 0
   2390#define ADD_LLI 8
   2391#define ADD_LER 2
   2392
   2393/* Request contains first three fields, response contains those plus the rest */
   2394struct opa_port_data_counters_msg {
   2395	__be64 port_select_mask[4];
   2396	__be32 vl_select_mask;
   2397	__be32 resolution;
   2398
   2399	/* Response fields follow */
   2400	struct _port_dctrs {
   2401		u8 port_number;
   2402		u8 reserved2[3];
   2403		__be32 link_quality_indicator; /* 29res, 3bit */
   2404
   2405		/* Data counters */
   2406		__be64 port_xmit_data;
   2407		__be64 port_rcv_data;
   2408		__be64 port_xmit_pkts;
   2409		__be64 port_rcv_pkts;
   2410		__be64 port_multicast_xmit_pkts;
   2411		__be64 port_multicast_rcv_pkts;
   2412		__be64 port_xmit_wait;
   2413		__be64 sw_port_congestion;
   2414		__be64 port_rcv_fecn;
   2415		__be64 port_rcv_becn;
   2416		__be64 port_xmit_time_cong;
   2417		__be64 port_xmit_wasted_bw;
   2418		__be64 port_xmit_wait_data;
   2419		__be64 port_rcv_bubble;
   2420		__be64 port_mark_fecn;
   2421
   2422		__be64 port_error_counter_summary;
   2423		/* Sum of error counts/port */
   2424
   2425		struct _vls_dctrs {
   2426			/* per-VL Data counters */
   2427			__be64 port_vl_xmit_data;
   2428			__be64 port_vl_rcv_data;
   2429			__be64 port_vl_xmit_pkts;
   2430			__be64 port_vl_rcv_pkts;
   2431			__be64 port_vl_xmit_wait;
   2432			__be64 sw_port_vl_congestion;
   2433			__be64 port_vl_rcv_fecn;
   2434			__be64 port_vl_rcv_becn;
   2435			__be64 port_xmit_time_cong;
   2436			__be64 port_vl_xmit_wasted_bw;
   2437			__be64 port_vl_xmit_wait_data;
   2438			__be64 port_vl_rcv_bubble;
   2439			__be64 port_vl_mark_fecn;
   2440		} vls[0];
   2441		/* array size defined by #bits set in vl_select_mask*/
   2442	} port[1]; /* array size defined by  #ports in attribute modifier */
   2443};
   2444
   2445struct opa_port_error_counters64_msg {
   2446	/*
   2447	 * Request contains first two fields, response contains the
   2448	 * whole magilla
   2449	 */
   2450	__be64 port_select_mask[4];
   2451	__be32 vl_select_mask;
   2452
   2453	/* Response-only fields follow */
   2454	__be32 reserved1;
   2455	struct _port_ectrs {
   2456		u8 port_number;
   2457		u8 reserved2[7];
   2458		__be64 port_rcv_constraint_errors;
   2459		__be64 port_rcv_switch_relay_errors;
   2460		__be64 port_xmit_discards;
   2461		__be64 port_xmit_constraint_errors;
   2462		__be64 port_rcv_remote_physical_errors;
   2463		__be64 local_link_integrity_errors;
   2464		__be64 port_rcv_errors;
   2465		__be64 excessive_buffer_overruns;
   2466		__be64 fm_config_errors;
   2467		__be32 link_error_recovery;
   2468		__be32 link_downed;
   2469		u8 uncorrectable_errors;
   2470		u8 reserved3[7];
   2471		struct _vls_ectrs {
   2472			__be64 port_vl_xmit_discards;
   2473		} vls[0];
   2474		/* array size defined by #bits set in vl_select_mask */
   2475	} port[1]; /* array size defined by #ports in attribute modifier */
   2476};
   2477
   2478struct opa_port_error_info_msg {
   2479	__be64 port_select_mask[4];
   2480	__be32 error_info_select_mask;
   2481	__be32 reserved1;
   2482	struct _port_ei {
   2483		u8 port_number;
   2484		u8 reserved2[7];
   2485
   2486		/* PortRcvErrorInfo */
   2487		struct {
   2488			u8 status_and_code;
   2489			union {
   2490				u8 raw[17];
   2491				struct {
   2492					/* EI1to12 format */
   2493					u8 packet_flit1[8];
   2494					u8 packet_flit2[8];
   2495					u8 remaining_flit_bits12;
   2496				} ei1to12;
   2497				struct {
   2498					u8 packet_bytes[8];
   2499					u8 remaining_flit_bits;
   2500				} ei13;
   2501			} ei;
   2502			u8 reserved3[6];
   2503		} __packed port_rcv_ei;
   2504
   2505		/* ExcessiveBufferOverrunInfo */
   2506		struct {
   2507			u8 status_and_sc;
   2508			u8 reserved4[7];
   2509		} __packed excessive_buffer_overrun_ei;
   2510
   2511		/* PortXmitConstraintErrorInfo */
   2512		struct {
   2513			u8 status;
   2514			u8 reserved5;
   2515			__be16 pkey;
   2516			__be32 slid;
   2517		} __packed port_xmit_constraint_ei;
   2518
   2519		/* PortRcvConstraintErrorInfo */
   2520		struct {
   2521			u8 status;
   2522			u8 reserved6;
   2523			__be16 pkey;
   2524			__be32 slid;
   2525		} __packed port_rcv_constraint_ei;
   2526
   2527		/* PortRcvSwitchRelayErrorInfo */
   2528		struct {
   2529			u8 status_and_code;
   2530			u8 reserved7[3];
   2531			__u32 error_info;
   2532		} __packed port_rcv_switch_relay_ei;
   2533
   2534		/* UncorrectableErrorInfo */
   2535		struct {
   2536			u8 status_and_code;
   2537			u8 reserved8;
   2538		} __packed uncorrectable_ei;
   2539
   2540		/* FMConfigErrorInfo */
   2541		struct {
   2542			u8 status_and_code;
   2543			u8 error_info;
   2544		} __packed fm_config_ei;
   2545		__u32 reserved9;
   2546	} port[1]; /* actual array size defined by #ports in attr modifier */
   2547};
   2548
   2549/* opa_port_error_info_msg error_info_select_mask bit definitions */
   2550enum error_info_selects {
   2551	ES_PORT_RCV_ERROR_INFO			= (1 << 31),
   2552	ES_EXCESSIVE_BUFFER_OVERRUN_INFO	= (1 << 30),
   2553	ES_PORT_XMIT_CONSTRAINT_ERROR_INFO	= (1 << 29),
   2554	ES_PORT_RCV_CONSTRAINT_ERROR_INFO	= (1 << 28),
   2555	ES_PORT_RCV_SWITCH_RELAY_ERROR_INFO	= (1 << 27),
   2556	ES_UNCORRECTABLE_ERROR_INFO		= (1 << 26),
   2557	ES_FM_CONFIG_ERROR_INFO			= (1 << 25)
   2558};
   2559
   2560static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
   2561				     struct ib_device *ibdev, u32 *resp_len)
   2562{
   2563	struct opa_class_port_info *p =
   2564		(struct opa_class_port_info *)pmp->data;
   2565
   2566	memset(pmp->data, 0, sizeof(pmp->data));
   2567
   2568	if (pmp->mad_hdr.attr_mod != 0)
   2569		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
   2570
   2571	p->base_version = OPA_MGMT_BASE_VERSION;
   2572	p->class_version = OPA_SM_CLASS_VERSION;
   2573	/*
   2574	 * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
   2575	 */
   2576	p->cap_mask2_resp_time = cpu_to_be32(18);
   2577
   2578	if (resp_len)
   2579		*resp_len += sizeof(*p);
   2580
   2581	return reply((struct ib_mad_hdr *)pmp);
   2582}
   2583
   2584static void a0_portstatus(struct hfi1_pportdata *ppd,
   2585			  struct opa_port_status_rsp *rsp)
   2586{
   2587	if (!is_bx(ppd->dd)) {
   2588		unsigned long vl;
   2589		u64 sum_vl_xmit_wait = 0;
   2590		unsigned long vl_all_mask = VL_MASK_ALL;
   2591
   2592		for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
   2593			u64 tmp = sum_vl_xmit_wait +
   2594				  read_port_cntr(ppd, C_TX_WAIT_VL,
   2595						 idx_from_vl(vl));
   2596			if (tmp < sum_vl_xmit_wait) {
   2597				/* we wrapped */
   2598				sum_vl_xmit_wait = (u64)~0;
   2599				break;
   2600			}
   2601			sum_vl_xmit_wait = tmp;
   2602		}
   2603		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
   2604			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
   2605	}
   2606}
   2607
   2608/**
   2609 * tx_link_width - convert link width bitmask to integer
   2610 * value representing actual link width.
   2611 * @link_width: width of active link
   2612 * @return: return index of the bit set in link_width var
   2613 *
   2614 * The function convert and return the index of bit set
   2615 * that indicate the current link width.
   2616 */
   2617u16 tx_link_width(u16 link_width)
   2618{
   2619	int n = LINK_WIDTH_DEFAULT;
   2620	u16 tx_width = n;
   2621
   2622	while (link_width && n) {
   2623		if (link_width & (1 << (n - 1))) {
   2624			tx_width = n;
   2625			break;
   2626		}
   2627		n--;
   2628	}
   2629
   2630	return tx_width;
   2631}
   2632
   2633/**
   2634 * get_xmit_wait_counters - Convert HFI 's SendWaitCnt/SendWaitVlCnt
   2635 * counter in unit of TXE cycle times to flit times.
   2636 * @ppd: info of physical Hfi port
   2637 * @link_width: width of active link
   2638 * @link_speed: speed of active link
   2639 * @vl: represent VL0-VL7, VL15 for PortVLXmitWait counters request
   2640 * and if vl value is C_VL_COUNT, it represent SendWaitCnt
   2641 * counter request
   2642 * @return: return SendWaitCnt/SendWaitVlCnt counter value per vl.
   2643 *
   2644 * Convert SendWaitCnt/SendWaitVlCnt counter from TXE cycle times to
   2645 * flit times. Call this function to samples these counters. This
   2646 * function will calculate for previous state transition and update
   2647 * current state at end of function using ppd->prev_link_width and
   2648 * ppd->port_vl_xmit_wait_last to port_vl_xmit_wait_curr and link_width.
   2649 */
   2650u64 get_xmit_wait_counters(struct hfi1_pportdata *ppd,
   2651			   u16 link_width, u16 link_speed, int vl)
   2652{
   2653	u64 port_vl_xmit_wait_curr;
   2654	u64 delta_vl_xmit_wait;
   2655	u64 xmit_wait_val;
   2656
   2657	if (vl > C_VL_COUNT)
   2658		return  0;
   2659	if (vl < C_VL_COUNT)
   2660		port_vl_xmit_wait_curr =
   2661			read_port_cntr(ppd, C_TX_WAIT_VL, vl);
   2662	else
   2663		port_vl_xmit_wait_curr =
   2664			read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL);
   2665
   2666	xmit_wait_val =
   2667		port_vl_xmit_wait_curr -
   2668		ppd->port_vl_xmit_wait_last[vl];
   2669	delta_vl_xmit_wait =
   2670		convert_xmit_counter(xmit_wait_val,
   2671				     ppd->prev_link_width,
   2672				     link_speed);
   2673
   2674	ppd->vl_xmit_flit_cnt[vl] += delta_vl_xmit_wait;
   2675	ppd->port_vl_xmit_wait_last[vl] = port_vl_xmit_wait_curr;
   2676	ppd->prev_link_width = link_width;
   2677
   2678	return ppd->vl_xmit_flit_cnt[vl];
   2679}
   2680
   2681static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
   2682				  struct ib_device *ibdev,
   2683				  u32 port, u32 *resp_len)
   2684{
   2685	struct opa_port_status_req *req =
   2686		(struct opa_port_status_req *)pmp->data;
   2687	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   2688	struct opa_port_status_rsp *rsp;
   2689	unsigned long vl_select_mask = be32_to_cpu(req->vl_select_mask);
   2690	unsigned long vl;
   2691	size_t response_data_size;
   2692	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
   2693	u32 port_num = req->port_num;
   2694	u8 num_vls = hweight64(vl_select_mask);
   2695	struct _vls_pctrs *vlinfo;
   2696	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   2697	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   2698	int vfi;
   2699	u64 tmp, tmp2;
   2700	u16 link_width;
   2701	u16 link_speed;
   2702
   2703	response_data_size = struct_size(rsp, vls, num_vls);
   2704	if (response_data_size > sizeof(pmp->data)) {
   2705		pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE;
   2706		return reply((struct ib_mad_hdr *)pmp);
   2707	}
   2708
   2709	if (nports != 1 || (port_num && port_num != port) ||
   2710	    num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
   2711		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
   2712		return reply((struct ib_mad_hdr *)pmp);
   2713	}
   2714
   2715	memset(pmp->data, 0, sizeof(pmp->data));
   2716
   2717	rsp = (struct opa_port_status_rsp *)pmp->data;
   2718	if (port_num)
   2719		rsp->port_num = port_num;
   2720	else
   2721		rsp->port_num = port;
   2722
   2723	rsp->port_rcv_constraint_errors =
   2724		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
   2725					   CNTR_INVALID_VL));
   2726
   2727	hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
   2728
   2729	rsp->vl_select_mask = cpu_to_be32((u32)vl_select_mask);
   2730	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
   2731					  CNTR_INVALID_VL));
   2732	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
   2733					 CNTR_INVALID_VL));
   2734	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
   2735					  CNTR_INVALID_VL));
   2736	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
   2737					 CNTR_INVALID_VL));
   2738	rsp->port_multicast_xmit_pkts =
   2739		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
   2740					  CNTR_INVALID_VL));
   2741	rsp->port_multicast_rcv_pkts =
   2742		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
   2743					  CNTR_INVALID_VL));
   2744	/*
   2745	 * Convert PortXmitWait counter from TXE cycle times
   2746	 * to flit times.
   2747	 */
   2748	link_width =
   2749		tx_link_width(ppd->link_width_downgrade_tx_active);
   2750	link_speed = get_link_speed(ppd->link_speed_active);
   2751	rsp->port_xmit_wait =
   2752		cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
   2753						   link_speed, C_VL_COUNT));
   2754	rsp->port_rcv_fecn =
   2755		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
   2756	rsp->port_rcv_becn =
   2757		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
   2758	rsp->port_xmit_discards =
   2759		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
   2760					   CNTR_INVALID_VL));
   2761	rsp->port_xmit_constraint_errors =
   2762		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
   2763					   CNTR_INVALID_VL));
   2764	rsp->port_rcv_remote_physical_errors =
   2765		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
   2766					  CNTR_INVALID_VL));
   2767	rsp->local_link_integrity_errors =
   2768		cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
   2769					  CNTR_INVALID_VL));
   2770	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
   2771	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
   2772				   CNTR_INVALID_VL);
   2773	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
   2774		/* overflow/wrapped */
   2775		rsp->link_error_recovery = cpu_to_be32(~0);
   2776	} else {
   2777		rsp->link_error_recovery = cpu_to_be32(tmp2);
   2778	}
   2779	rsp->port_rcv_errors =
   2780		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
   2781	rsp->excessive_buffer_overruns =
   2782		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
   2783	rsp->fm_config_errors =
   2784		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
   2785					  CNTR_INVALID_VL));
   2786	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
   2787						      CNTR_INVALID_VL));
   2788
   2789	/* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
   2790	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
   2791	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
   2792
   2793	vlinfo = &rsp->vls[0];
   2794	vfi = 0;
   2795	/* The vl_select_mask has been checked above, and we know
   2796	 * that it contains only entries which represent valid VLs.
   2797	 * So in the for_each_set_bit() loop below, we don't need
   2798	 * any additional checks for vl.
   2799	 */
   2800	for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
   2801		memset(vlinfo, 0, sizeof(*vlinfo));
   2802
   2803		tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
   2804		rsp->vls[vfi].port_vl_rcv_data = cpu_to_be64(tmp);
   2805
   2806		rsp->vls[vfi].port_vl_rcv_pkts =
   2807			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
   2808						  idx_from_vl(vl)));
   2809
   2810		rsp->vls[vfi].port_vl_xmit_data =
   2811			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
   2812						   idx_from_vl(vl)));
   2813
   2814		rsp->vls[vfi].port_vl_xmit_pkts =
   2815			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
   2816						   idx_from_vl(vl)));
   2817		/*
   2818		 * Convert PortVlXmitWait counter from TXE cycle
   2819		 * times to flit times.
   2820		 */
   2821		rsp->vls[vfi].port_vl_xmit_wait =
   2822			cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
   2823							   link_speed,
   2824							   idx_from_vl(vl)));
   2825
   2826		rsp->vls[vfi].port_vl_rcv_fecn =
   2827			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
   2828						  idx_from_vl(vl)));
   2829
   2830		rsp->vls[vfi].port_vl_rcv_becn =
   2831			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
   2832						  idx_from_vl(vl)));
   2833
   2834		rsp->vls[vfi].port_vl_xmit_discards =
   2835			cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
   2836						   idx_from_vl(vl)));
   2837		vlinfo++;
   2838		vfi++;
   2839	}
   2840
   2841	a0_portstatus(ppd, rsp);
   2842
   2843	if (resp_len)
   2844		*resp_len += response_data_size;
   2845
   2846	return reply((struct ib_mad_hdr *)pmp);
   2847}
   2848
   2849static u64 get_error_counter_summary(struct ib_device *ibdev, u32 port,
   2850				     u8 res_lli, u8 res_ler)
   2851{
   2852	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   2853	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   2854	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   2855	u64 error_counter_summary = 0, tmp;
   2856
   2857	error_counter_summary += read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
   2858						CNTR_INVALID_VL);
   2859	/* port_rcv_switch_relay_errors is 0 for HFIs */
   2860	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_DSCD,
   2861						CNTR_INVALID_VL);
   2862	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
   2863						CNTR_INVALID_VL);
   2864	error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
   2865					       CNTR_INVALID_VL);
   2866	/* local link integrity must be right-shifted by the lli resolution */
   2867	error_counter_summary += (read_dev_cntr(dd, C_DC_RX_REPLAY,
   2868						CNTR_INVALID_VL) >> res_lli);
   2869	/* link error recovery must b right-shifted by the ler resolution */
   2870	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
   2871	tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
   2872	error_counter_summary += (tmp >> res_ler);
   2873	error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
   2874					       CNTR_INVALID_VL);
   2875	error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
   2876	error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
   2877					       CNTR_INVALID_VL);
   2878	/* ppd->link_downed is a 32-bit value */
   2879	error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
   2880						CNTR_INVALID_VL);
   2881	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
   2882	/* this is an 8-bit quantity */
   2883	error_counter_summary += tmp < 0x100 ? (tmp & 0xff) : 0xff;
   2884
   2885	return error_counter_summary;
   2886}
   2887
   2888static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp)
   2889{
   2890	if (!is_bx(ppd->dd)) {
   2891		unsigned long vl;
   2892		u64 sum_vl_xmit_wait = 0;
   2893		unsigned long vl_all_mask = VL_MASK_ALL;
   2894
   2895		for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
   2896			u64 tmp = sum_vl_xmit_wait +
   2897				  read_port_cntr(ppd, C_TX_WAIT_VL,
   2898						 idx_from_vl(vl));
   2899			if (tmp < sum_vl_xmit_wait) {
   2900				/* we wrapped */
   2901				sum_vl_xmit_wait = (u64)~0;
   2902				break;
   2903			}
   2904			sum_vl_xmit_wait = tmp;
   2905		}
   2906		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
   2907			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
   2908	}
   2909}
   2910
   2911static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
   2912				   struct _port_dctrs *rsp)
   2913{
   2914	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   2915
   2916	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
   2917						CNTR_INVALID_VL));
   2918	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
   2919						CNTR_INVALID_VL));
   2920	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
   2921						CNTR_INVALID_VL));
   2922	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
   2923						CNTR_INVALID_VL));
   2924	rsp->port_multicast_xmit_pkts =
   2925		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
   2926					  CNTR_INVALID_VL));
   2927	rsp->port_multicast_rcv_pkts =
   2928		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
   2929					  CNTR_INVALID_VL));
   2930}
   2931
   2932static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
   2933				    struct ib_device *ibdev,
   2934				    u32 port, u32 *resp_len)
   2935{
   2936	struct opa_port_data_counters_msg *req =
   2937		(struct opa_port_data_counters_msg *)pmp->data;
   2938	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   2939	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   2940	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   2941	struct _port_dctrs *rsp;
   2942	struct _vls_dctrs *vlinfo;
   2943	size_t response_data_size;
   2944	u32 num_ports;
   2945	u8 lq, num_vls;
   2946	u8 res_lli, res_ler;
   2947	u64 port_mask;
   2948	u32 port_num;
   2949	unsigned long vl;
   2950	unsigned long vl_select_mask;
   2951	int vfi;
   2952	u16 link_width;
   2953	u16 link_speed;
   2954
   2955	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
   2956	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
   2957	vl_select_mask = be32_to_cpu(req->vl_select_mask);
   2958	res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT;
   2959	res_lli = res_lli ? res_lli + ADD_LLI : 0;
   2960	res_ler = (u8)(be32_to_cpu(req->resolution) & MSK_LER) >> MSK_LER_SFT;
   2961	res_ler = res_ler ? res_ler + ADD_LER : 0;
   2962
   2963	if (num_ports != 1 || (vl_select_mask & ~VL_MASK_ALL)) {
   2964		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
   2965		return reply((struct ib_mad_hdr *)pmp);
   2966	}
   2967
   2968	/* Sanity check */
   2969	response_data_size = struct_size(req, port[0].vls, num_vls);
   2970
   2971	if (response_data_size > sizeof(pmp->data)) {
   2972		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
   2973		return reply((struct ib_mad_hdr *)pmp);
   2974	}
   2975
   2976	/*
   2977	 * The bit set in the mask needs to be consistent with the
   2978	 * port the request came in on.
   2979	 */
   2980	port_mask = be64_to_cpu(req->port_select_mask[3]);
   2981	port_num = find_first_bit((unsigned long *)&port_mask,
   2982				  sizeof(port_mask) * 8);
   2983
   2984	if (port_num != port) {
   2985		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
   2986		return reply((struct ib_mad_hdr *)pmp);
   2987	}
   2988
   2989	rsp = &req->port[0];
   2990	memset(rsp, 0, sizeof(*rsp));
   2991
   2992	rsp->port_number = port;
   2993	/*
   2994	 * Note that link_quality_indicator is a 32 bit quantity in
   2995	 * 'datacounters' queries (as opposed to 'portinfo' queries,
   2996	 * where it's a byte).
   2997	 */
   2998	hfi1_read_link_quality(dd, &lq);
   2999	rsp->link_quality_indicator = cpu_to_be32((u32)lq);
   3000	pma_get_opa_port_dctrs(ibdev, rsp);
   3001
   3002	/*
   3003	 * Convert PortXmitWait counter from TXE
   3004	 * cycle times to flit times.
   3005	 */
   3006	link_width =
   3007		tx_link_width(ppd->link_width_downgrade_tx_active);
   3008	link_speed = get_link_speed(ppd->link_speed_active);
   3009	rsp->port_xmit_wait =
   3010		cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
   3011						   link_speed, C_VL_COUNT));
   3012	rsp->port_rcv_fecn =
   3013		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
   3014	rsp->port_rcv_becn =
   3015		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
   3016	rsp->port_error_counter_summary =
   3017		cpu_to_be64(get_error_counter_summary(ibdev, port,
   3018						      res_lli, res_ler));
   3019
   3020	vlinfo = &rsp->vls[0];
   3021	vfi = 0;
   3022	/* The vl_select_mask has been checked above, and we know
   3023	 * that it contains only entries which represent valid VLs.
   3024	 * So in the for_each_set_bit() loop below, we don't need
   3025	 * any additional checks for vl.
   3026	 */
   3027	for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
   3028		memset(vlinfo, 0, sizeof(*vlinfo));
   3029
   3030		rsp->vls[vfi].port_vl_xmit_data =
   3031			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
   3032						   idx_from_vl(vl)));
   3033
   3034		rsp->vls[vfi].port_vl_rcv_data =
   3035			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
   3036						  idx_from_vl(vl)));
   3037
   3038		rsp->vls[vfi].port_vl_xmit_pkts =
   3039			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
   3040						   idx_from_vl(vl)));
   3041
   3042		rsp->vls[vfi].port_vl_rcv_pkts =
   3043			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
   3044						  idx_from_vl(vl)));
   3045
   3046		/*
   3047		 * Convert PortVlXmitWait counter from TXE
   3048		 * cycle times to flit times.
   3049		 */
   3050		rsp->vls[vfi].port_vl_xmit_wait =
   3051			cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
   3052							   link_speed,
   3053							   idx_from_vl(vl)));
   3054
   3055		rsp->vls[vfi].port_vl_rcv_fecn =
   3056			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
   3057						  idx_from_vl(vl)));
   3058		rsp->vls[vfi].port_vl_rcv_becn =
   3059			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
   3060						  idx_from_vl(vl)));
   3061
   3062		/* rsp->port_vl_xmit_time_cong is 0 for HFIs */
   3063		/* rsp->port_vl_xmit_wasted_bw ??? */
   3064		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
   3065		 * does this differ from rsp->vls[vfi].port_vl_xmit_wait
   3066		 */
   3067		/*rsp->vls[vfi].port_vl_mark_fecn =
   3068		 *	cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
   3069		 *		+ offset));
   3070		 */
   3071		vlinfo++;
   3072		vfi++;
   3073	}
   3074
   3075	a0_datacounters(ppd, rsp);
   3076
   3077	if (resp_len)
   3078		*resp_len += response_data_size;
   3079
   3080	return reply((struct ib_mad_hdr *)pmp);
   3081}
   3082
   3083static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
   3084				       struct ib_device *ibdev, u32 port)
   3085{
   3086	struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
   3087						pmp->data;
   3088	struct _port_dctrs rsp;
   3089
   3090	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
   3091		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
   3092		goto bail;
   3093	}
   3094
   3095	memset(&rsp, 0, sizeof(rsp));
   3096	pma_get_opa_port_dctrs(ibdev, &rsp);
   3097
   3098	p->port_xmit_data = rsp.port_xmit_data;
   3099	p->port_rcv_data = rsp.port_rcv_data;
   3100	p->port_xmit_packets = rsp.port_xmit_pkts;
   3101	p->port_rcv_packets = rsp.port_rcv_pkts;
   3102	p->port_unicast_xmit_packets = 0;
   3103	p->port_unicast_rcv_packets =  0;
   3104	p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
   3105	p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
   3106
   3107bail:
   3108	return reply((struct ib_mad_hdr *)pmp);
   3109}
   3110
   3111static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
   3112				   struct _port_ectrs *rsp, u32 port)
   3113{
   3114	u64 tmp, tmp2;
   3115	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   3116	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   3117	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   3118
   3119	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
   3120	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
   3121					CNTR_INVALID_VL);
   3122	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
   3123		/* overflow/wrapped */
   3124		rsp->link_error_recovery = cpu_to_be32(~0);
   3125	} else {
   3126		rsp->link_error_recovery = cpu_to_be32(tmp2);
   3127	}
   3128
   3129	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
   3130						CNTR_INVALID_VL));
   3131	rsp->port_rcv_errors =
   3132		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
   3133	rsp->port_rcv_remote_physical_errors =
   3134		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
   3135					  CNTR_INVALID_VL));
   3136	rsp->port_rcv_switch_relay_errors = 0;
   3137	rsp->port_xmit_discards =
   3138		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
   3139					   CNTR_INVALID_VL));
   3140	rsp->port_xmit_constraint_errors =
   3141		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
   3142					   CNTR_INVALID_VL));
   3143	rsp->port_rcv_constraint_errors =
   3144		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
   3145					   CNTR_INVALID_VL));
   3146	rsp->local_link_integrity_errors =
   3147		cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
   3148					  CNTR_INVALID_VL));
   3149	rsp->excessive_buffer_overruns =
   3150		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
   3151}
   3152
   3153static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
   3154				  struct ib_device *ibdev,
   3155				  u32 port, u32 *resp_len)
   3156{
   3157	size_t response_data_size;
   3158	struct _port_ectrs *rsp;
   3159	u32 port_num;
   3160	struct opa_port_error_counters64_msg *req;
   3161	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   3162	u32 num_ports;
   3163	u8 num_pslm;
   3164	u8 num_vls;
   3165	struct hfi1_ibport *ibp;
   3166	struct hfi1_pportdata *ppd;
   3167	struct _vls_ectrs *vlinfo;
   3168	unsigned long vl;
   3169	u64 port_mask, tmp;
   3170	unsigned long vl_select_mask;
   3171	int vfi;
   3172
   3173	req = (struct opa_port_error_counters64_msg *)pmp->data;
   3174
   3175	num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
   3176
   3177	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
   3178	num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
   3179
   3180	if (num_ports != 1 || num_ports != num_pslm) {
   3181		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
   3182		return reply((struct ib_mad_hdr *)pmp);
   3183	}
   3184
   3185	response_data_size = struct_size(req, port[0].vls, num_vls);
   3186
   3187	if (response_data_size > sizeof(pmp->data)) {
   3188		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
   3189		return reply((struct ib_mad_hdr *)pmp);
   3190	}
   3191	/*
   3192	 * The bit set in the mask needs to be consistent with the
   3193	 * port the request came in on.
   3194	 */
   3195	port_mask = be64_to_cpu(req->port_select_mask[3]);
   3196	port_num = find_first_bit((unsigned long *)&port_mask,
   3197				  sizeof(port_mask) * 8);
   3198
   3199	if (port_num != port) {
   3200		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
   3201		return reply((struct ib_mad_hdr *)pmp);
   3202	}
   3203
   3204	rsp = &req->port[0];
   3205
   3206	ibp = to_iport(ibdev, port_num);
   3207	ppd = ppd_from_ibp(ibp);
   3208
   3209	memset(rsp, 0, sizeof(*rsp));
   3210	rsp->port_number = port_num;
   3211
   3212	pma_get_opa_port_ectrs(ibdev, rsp, port_num);
   3213
   3214	rsp->port_rcv_remote_physical_errors =
   3215		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
   3216					  CNTR_INVALID_VL));
   3217	rsp->fm_config_errors =
   3218		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
   3219					  CNTR_INVALID_VL));
   3220	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
   3221
   3222	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
   3223	rsp->port_rcv_errors =
   3224		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
   3225	vlinfo = &rsp->vls[0];
   3226	vfi = 0;
   3227	vl_select_mask = be32_to_cpu(req->vl_select_mask);
   3228	for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
   3229		memset(vlinfo, 0, sizeof(*vlinfo));
   3230		rsp->vls[vfi].port_vl_xmit_discards =
   3231			cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
   3232						   idx_from_vl(vl)));
   3233		vlinfo += 1;
   3234		vfi++;
   3235	}
   3236
   3237	if (resp_len)
   3238		*resp_len += response_data_size;
   3239
   3240	return reply((struct ib_mad_hdr *)pmp);
   3241}
   3242
   3243static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
   3244				   struct ib_device *ibdev, u32 port)
   3245{
   3246	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
   3247		pmp->data;
   3248	struct _port_ectrs rsp;
   3249	u64 temp_link_overrun_errors;
   3250	u64 temp_64;
   3251	u32 temp_32;
   3252
   3253	memset(&rsp, 0, sizeof(rsp));
   3254	pma_get_opa_port_ectrs(ibdev, &rsp, port);
   3255
   3256	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
   3257		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
   3258		goto bail;
   3259	}
   3260
   3261	p->symbol_error_counter = 0; /* N/A for OPA */
   3262
   3263	temp_32 = be32_to_cpu(rsp.link_error_recovery);
   3264	if (temp_32 > 0xFFUL)
   3265		p->link_error_recovery_counter = 0xFF;
   3266	else
   3267		p->link_error_recovery_counter = (u8)temp_32;
   3268
   3269	temp_32 = be32_to_cpu(rsp.link_downed);
   3270	if (temp_32 > 0xFFUL)
   3271		p->link_downed_counter = 0xFF;
   3272	else
   3273		p->link_downed_counter = (u8)temp_32;
   3274
   3275	temp_64 = be64_to_cpu(rsp.port_rcv_errors);
   3276	if (temp_64 > 0xFFFFUL)
   3277		p->port_rcv_errors = cpu_to_be16(0xFFFF);
   3278	else
   3279		p->port_rcv_errors = cpu_to_be16((u16)temp_64);
   3280
   3281	temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
   3282	if (temp_64 > 0xFFFFUL)
   3283		p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
   3284	else
   3285		p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
   3286
   3287	temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
   3288	p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
   3289
   3290	temp_64 = be64_to_cpu(rsp.port_xmit_discards);
   3291	if (temp_64 > 0xFFFFUL)
   3292		p->port_xmit_discards = cpu_to_be16(0xFFFF);
   3293	else
   3294		p->port_xmit_discards = cpu_to_be16((u16)temp_64);
   3295
   3296	temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
   3297	if (temp_64 > 0xFFUL)
   3298		p->port_xmit_constraint_errors = 0xFF;
   3299	else
   3300		p->port_xmit_constraint_errors = (u8)temp_64;
   3301
   3302	temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
   3303	if (temp_64 > 0xFFUL)
   3304		p->port_rcv_constraint_errors = 0xFFUL;
   3305	else
   3306		p->port_rcv_constraint_errors = (u8)temp_64;
   3307
   3308	/* LocalLink: 7:4, BufferOverrun: 3:0 */
   3309	temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
   3310	if (temp_64 > 0xFUL)
   3311		temp_64 = 0xFUL;
   3312
   3313	temp_link_overrun_errors = temp_64 << 4;
   3314
   3315	temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
   3316	if (temp_64 > 0xFUL)
   3317		temp_64 = 0xFUL;
   3318	temp_link_overrun_errors |= temp_64;
   3319
   3320	p->link_overrun_errors = (u8)temp_link_overrun_errors;
   3321
   3322	p->vl15_dropped = 0; /* N/A for OPA */
   3323
   3324bail:
   3325	return reply((struct ib_mad_hdr *)pmp);
   3326}
   3327
   3328static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
   3329				 struct ib_device *ibdev,
   3330				 u32 port, u32 *resp_len)
   3331{
   3332	size_t response_data_size;
   3333	struct _port_ei *rsp;
   3334	struct opa_port_error_info_msg *req;
   3335	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   3336	u64 port_mask;
   3337	u32 num_ports;
   3338	u32 port_num;
   3339	u8 num_pslm;
   3340	u64 reg;
   3341
   3342	req = (struct opa_port_error_info_msg *)pmp->data;
   3343	rsp = &req->port[0];
   3344
   3345	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
   3346	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
   3347
   3348	memset(rsp, 0, sizeof(*rsp));
   3349
   3350	if (num_ports != 1 || num_ports != num_pslm) {
   3351		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
   3352		return reply((struct ib_mad_hdr *)pmp);
   3353	}
   3354
   3355	/* Sanity check */
   3356	response_data_size = sizeof(struct opa_port_error_info_msg);
   3357
   3358	if (response_data_size > sizeof(pmp->data)) {
   3359		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
   3360		return reply((struct ib_mad_hdr *)pmp);
   3361	}
   3362
   3363	/*
   3364	 * The bit set in the mask needs to be consistent with the port
   3365	 * the request came in on.
   3366	 */
   3367	port_mask = be64_to_cpu(req->port_select_mask[3]);
   3368	port_num = find_first_bit((unsigned long *)&port_mask,
   3369				  sizeof(port_mask) * 8);
   3370
   3371	if (port_num != port) {
   3372		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
   3373		return reply((struct ib_mad_hdr *)pmp);
   3374	}
   3375	rsp->port_number = port;
   3376
   3377	/* PortRcvErrorInfo */
   3378	rsp->port_rcv_ei.status_and_code =
   3379		dd->err_info_rcvport.status_and_code;
   3380	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
   3381	       &dd->err_info_rcvport.packet_flit1, sizeof(u64));
   3382	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
   3383	       &dd->err_info_rcvport.packet_flit2, sizeof(u64));
   3384
   3385	/* ExcessiverBufferOverrunInfo */
   3386	reg = read_csr(dd, RCV_ERR_INFO);
   3387	if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
   3388		/*
   3389		 * if the RcvExcessBufferOverrun bit is set, save SC of
   3390		 * first pkt that encountered an excess buffer overrun
   3391		 */
   3392		u8 tmp = (u8)reg;
   3393
   3394		tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
   3395		tmp <<= 2;
   3396		rsp->excessive_buffer_overrun_ei.status_and_sc = tmp;
   3397		/* set the status bit */
   3398		rsp->excessive_buffer_overrun_ei.status_and_sc |= 0x80;
   3399	}
   3400
   3401	rsp->port_xmit_constraint_ei.status =
   3402		dd->err_info_xmit_constraint.status;
   3403	rsp->port_xmit_constraint_ei.pkey =
   3404		cpu_to_be16(dd->err_info_xmit_constraint.pkey);
   3405	rsp->port_xmit_constraint_ei.slid =
   3406		cpu_to_be32(dd->err_info_xmit_constraint.slid);
   3407
   3408	rsp->port_rcv_constraint_ei.status =
   3409		dd->err_info_rcv_constraint.status;
   3410	rsp->port_rcv_constraint_ei.pkey =
   3411		cpu_to_be16(dd->err_info_rcv_constraint.pkey);
   3412	rsp->port_rcv_constraint_ei.slid =
   3413		cpu_to_be32(dd->err_info_rcv_constraint.slid);
   3414
   3415	/* UncorrectableErrorInfo */
   3416	rsp->uncorrectable_ei.status_and_code = dd->err_info_uncorrectable;
   3417
   3418	/* FMConfigErrorInfo */
   3419	rsp->fm_config_ei.status_and_code = dd->err_info_fmconfig;
   3420
   3421	if (resp_len)
   3422		*resp_len += response_data_size;
   3423
   3424	return reply((struct ib_mad_hdr *)pmp);
   3425}
   3426
   3427static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
   3428				  struct ib_device *ibdev,
   3429				  u32 port, u32 *resp_len)
   3430{
   3431	struct opa_clear_port_status *req =
   3432		(struct opa_clear_port_status *)pmp->data;
   3433	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   3434	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   3435	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   3436	u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
   3437	u64 portn = be64_to_cpu(req->port_select_mask[3]);
   3438	u32 counter_select = be32_to_cpu(req->counter_select_mask);
   3439	unsigned long vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
   3440	unsigned long vl;
   3441
   3442	if ((nports != 1) || (portn != 1 << port)) {
   3443		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
   3444		return reply((struct ib_mad_hdr *)pmp);
   3445	}
   3446	/*
   3447	 * only counters returned by pma_get_opa_portstatus() are
   3448	 * handled, so when pma_get_opa_portstatus() gets a fix,
   3449	 * the corresponding change should be made here as well.
   3450	 */
   3451
   3452	if (counter_select & CS_PORT_XMIT_DATA)
   3453		write_dev_cntr(dd, C_DC_XMIT_FLITS, CNTR_INVALID_VL, 0);
   3454
   3455	if (counter_select & CS_PORT_RCV_DATA)
   3456		write_dev_cntr(dd, C_DC_RCV_FLITS, CNTR_INVALID_VL, 0);
   3457
   3458	if (counter_select & CS_PORT_XMIT_PKTS)
   3459		write_dev_cntr(dd, C_DC_XMIT_PKTS, CNTR_INVALID_VL, 0);
   3460
   3461	if (counter_select & CS_PORT_RCV_PKTS)
   3462		write_dev_cntr(dd, C_DC_RCV_PKTS, CNTR_INVALID_VL, 0);
   3463
   3464	if (counter_select & CS_PORT_MCAST_XMIT_PKTS)
   3465		write_dev_cntr(dd, C_DC_MC_XMIT_PKTS, CNTR_INVALID_VL, 0);
   3466
   3467	if (counter_select & CS_PORT_MCAST_RCV_PKTS)
   3468		write_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL, 0);
   3469
   3470	if (counter_select & CS_PORT_XMIT_WAIT) {
   3471		write_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL, 0);
   3472		ppd->port_vl_xmit_wait_last[C_VL_COUNT] = 0;
   3473		ppd->vl_xmit_flit_cnt[C_VL_COUNT] = 0;
   3474	}
   3475	/* ignore cs_sw_portCongestion for HFIs */
   3476
   3477	if (counter_select & CS_PORT_RCV_FECN)
   3478		write_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL, 0);
   3479
   3480	if (counter_select & CS_PORT_RCV_BECN)
   3481		write_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL, 0);
   3482
   3483	/* ignore cs_port_xmit_time_cong for HFIs */
   3484	/* ignore cs_port_xmit_wasted_bw for now */
   3485	/* ignore cs_port_xmit_wait_data for now */
   3486	if (counter_select & CS_PORT_RCV_BUBBLE)
   3487		write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
   3488
   3489	/* Only applicable for switch */
   3490	/* if (counter_select & CS_PORT_MARK_FECN)
   3491	 *	write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
   3492	 */
   3493
   3494	if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
   3495		write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
   3496
   3497	/* ignore cs_port_rcv_switch_relay_errors for HFIs */
   3498	if (counter_select & CS_PORT_XMIT_DISCARDS)
   3499		write_port_cntr(ppd, C_SW_XMIT_DSCD, CNTR_INVALID_VL, 0);
   3500
   3501	if (counter_select & CS_PORT_XMIT_CONSTRAINT_ERRORS)
   3502		write_port_cntr(ppd, C_SW_XMIT_CSTR_ERR, CNTR_INVALID_VL, 0);
   3503
   3504	if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS)
   3505		write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0);
   3506
   3507	if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS)
   3508		write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
   3509
   3510	if (counter_select & CS_LINK_ERROR_RECOVERY) {
   3511		write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
   3512		write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
   3513			       CNTR_INVALID_VL, 0);
   3514	}
   3515
   3516	if (counter_select & CS_PORT_RCV_ERRORS)
   3517		write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
   3518
   3519	if (counter_select & CS_EXCESSIVE_BUFFER_OVERRUNS) {
   3520		write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
   3521		dd->rcv_ovfl_cnt = 0;
   3522	}
   3523
   3524	if (counter_select & CS_FM_CONFIG_ERRORS)
   3525		write_dev_cntr(dd, C_DC_FM_CFG_ERR, CNTR_INVALID_VL, 0);
   3526
   3527	if (counter_select & CS_LINK_DOWNED)
   3528		write_port_cntr(ppd, C_SW_LINK_DOWN, CNTR_INVALID_VL, 0);
   3529
   3530	if (counter_select & CS_UNCORRECTABLE_ERRORS)
   3531		write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
   3532
   3533	for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
   3534		if (counter_select & CS_PORT_XMIT_DATA)
   3535			write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
   3536
   3537		if (counter_select & CS_PORT_RCV_DATA)
   3538			write_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl), 0);
   3539
   3540		if (counter_select & CS_PORT_XMIT_PKTS)
   3541			write_port_cntr(ppd, C_TX_PKT_VL, idx_from_vl(vl), 0);
   3542
   3543		if (counter_select & CS_PORT_RCV_PKTS)
   3544			write_dev_cntr(dd, C_DC_RX_PKT_VL, idx_from_vl(vl), 0);
   3545
   3546		if (counter_select & CS_PORT_XMIT_WAIT) {
   3547			write_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl), 0);
   3548			ppd->port_vl_xmit_wait_last[idx_from_vl(vl)] = 0;
   3549			ppd->vl_xmit_flit_cnt[idx_from_vl(vl)] = 0;
   3550		}
   3551
   3552		/* sw_port_vl_congestion is 0 for HFIs */
   3553		if (counter_select & CS_PORT_RCV_FECN)
   3554			write_dev_cntr(dd, C_DC_RCV_FCN_VL, idx_from_vl(vl), 0);
   3555
   3556		if (counter_select & CS_PORT_RCV_BECN)
   3557			write_dev_cntr(dd, C_DC_RCV_BCN_VL, idx_from_vl(vl), 0);
   3558
   3559		/* port_vl_xmit_time_cong is 0 for HFIs */
   3560		/* port_vl_xmit_wasted_bw ??? */
   3561		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ??? */
   3562		if (counter_select & CS_PORT_RCV_BUBBLE)
   3563			write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
   3564
   3565		/* if (counter_select & CS_PORT_MARK_FECN)
   3566		 *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
   3567		 */
   3568		if (counter_select & C_SW_XMIT_DSCD_VL)
   3569			write_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
   3570					idx_from_vl(vl), 0);
   3571	}
   3572
   3573	if (resp_len)
   3574		*resp_len += sizeof(*req);
   3575
   3576	return reply((struct ib_mad_hdr *)pmp);
   3577}
   3578
   3579static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
   3580				 struct ib_device *ibdev,
   3581				 u32 port, u32 *resp_len)
   3582{
   3583	struct _port_ei *rsp;
   3584	struct opa_port_error_info_msg *req;
   3585	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   3586	u64 port_mask;
   3587	u32 num_ports;
   3588	u32 port_num;
   3589	u8 num_pslm;
   3590	u32 error_info_select;
   3591
   3592	req = (struct opa_port_error_info_msg *)pmp->data;
   3593	rsp = &req->port[0];
   3594
   3595	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
   3596	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
   3597
   3598	memset(rsp, 0, sizeof(*rsp));
   3599
   3600	if (num_ports != 1 || num_ports != num_pslm) {
   3601		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
   3602		return reply((struct ib_mad_hdr *)pmp);
   3603	}
   3604
   3605	/*
   3606	 * The bit set in the mask needs to be consistent with the port
   3607	 * the request came in on.
   3608	 */
   3609	port_mask = be64_to_cpu(req->port_select_mask[3]);
   3610	port_num = find_first_bit((unsigned long *)&port_mask,
   3611				  sizeof(port_mask) * 8);
   3612
   3613	if (port_num != port) {
   3614		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
   3615		return reply((struct ib_mad_hdr *)pmp);
   3616	}
   3617
   3618	error_info_select = be32_to_cpu(req->error_info_select_mask);
   3619
   3620	/* PortRcvErrorInfo */
   3621	if (error_info_select & ES_PORT_RCV_ERROR_INFO)
   3622		/* turn off status bit */
   3623		dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
   3624
   3625	/* ExcessiverBufferOverrunInfo */
   3626	if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
   3627		/*
   3628		 * status bit is essentially kept in the h/w - bit 5 of
   3629		 * RCV_ERR_INFO
   3630		 */
   3631		write_csr(dd, RCV_ERR_INFO,
   3632			  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
   3633
   3634	if (error_info_select & ES_PORT_XMIT_CONSTRAINT_ERROR_INFO)
   3635		dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
   3636
   3637	if (error_info_select & ES_PORT_RCV_CONSTRAINT_ERROR_INFO)
   3638		dd->err_info_rcv_constraint.status &= ~OPA_EI_STATUS_SMASK;
   3639
   3640	/* UncorrectableErrorInfo */
   3641	if (error_info_select & ES_UNCORRECTABLE_ERROR_INFO)
   3642		/* turn off status bit */
   3643		dd->err_info_uncorrectable &= ~OPA_EI_STATUS_SMASK;
   3644
   3645	/* FMConfigErrorInfo */
   3646	if (error_info_select & ES_FM_CONFIG_ERROR_INFO)
   3647		/* turn off status bit */
   3648		dd->err_info_fmconfig &= ~OPA_EI_STATUS_SMASK;
   3649
   3650	if (resp_len)
   3651		*resp_len += sizeof(*req);
   3652
   3653	return reply((struct ib_mad_hdr *)pmp);
   3654}
   3655
   3656struct opa_congestion_info_attr {
   3657	__be16 congestion_info;
   3658	u8 control_table_cap;	/* Multiple of 64 entry unit CCTs */
   3659	u8 congestion_log_length;
   3660} __packed;
   3661
   3662static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
   3663				    struct ib_device *ibdev, u32 port,
   3664				    u32 *resp_len, u32 max_len)
   3665{
   3666	struct opa_congestion_info_attr *p =
   3667		(struct opa_congestion_info_attr *)data;
   3668	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   3669	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   3670
   3671	if (smp_length_check(sizeof(*p), max_len)) {
   3672		smp->status |= IB_SMP_INVALID_FIELD;
   3673		return reply((struct ib_mad_hdr *)smp);
   3674	}
   3675
   3676	p->congestion_info = 0;
   3677	p->control_table_cap = ppd->cc_max_table_entries;
   3678	p->congestion_log_length = OPA_CONG_LOG_ELEMS;
   3679
   3680	if (resp_len)
   3681		*resp_len += sizeof(*p);
   3682
   3683	return reply((struct ib_mad_hdr *)smp);
   3684}
   3685
   3686static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
   3687				       u8 *data, struct ib_device *ibdev,
   3688				       u32 port, u32 *resp_len, u32 max_len)
   3689{
   3690	int i;
   3691	struct opa_congestion_setting_attr *p =
   3692		(struct opa_congestion_setting_attr *)data;
   3693	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   3694	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   3695	struct opa_congestion_setting_entry_shadow *entries;
   3696	struct cc_state *cc_state;
   3697
   3698	if (smp_length_check(sizeof(*p), max_len)) {
   3699		smp->status |= IB_SMP_INVALID_FIELD;
   3700		return reply((struct ib_mad_hdr *)smp);
   3701	}
   3702
   3703	rcu_read_lock();
   3704
   3705	cc_state = get_cc_state(ppd);
   3706
   3707	if (!cc_state) {
   3708		rcu_read_unlock();
   3709		return reply((struct ib_mad_hdr *)smp);
   3710	}
   3711
   3712	entries = cc_state->cong_setting.entries;
   3713	p->port_control = cpu_to_be16(cc_state->cong_setting.port_control);
   3714	p->control_map = cpu_to_be32(cc_state->cong_setting.control_map);
   3715	for (i = 0; i < OPA_MAX_SLS; i++) {
   3716		p->entries[i].ccti_increase = entries[i].ccti_increase;
   3717		p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
   3718		p->entries[i].trigger_threshold =
   3719			entries[i].trigger_threshold;
   3720		p->entries[i].ccti_min = entries[i].ccti_min;
   3721	}
   3722
   3723	rcu_read_unlock();
   3724
   3725	if (resp_len)
   3726		*resp_len += sizeof(*p);
   3727
   3728	return reply((struct ib_mad_hdr *)smp);
   3729}
   3730
   3731/*
   3732 * Apply congestion control information stored in the ppd to the
   3733 * active structure.
   3734 */
   3735static void apply_cc_state(struct hfi1_pportdata *ppd)
   3736{
   3737	struct cc_state *old_cc_state, *new_cc_state;
   3738
   3739	new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
   3740	if (!new_cc_state)
   3741		return;
   3742
   3743	/*
   3744	 * Hold the lock for updating *and* to prevent ppd information
   3745	 * from changing during the update.
   3746	 */
   3747	spin_lock(&ppd->cc_state_lock);
   3748
   3749	old_cc_state = get_cc_state_protected(ppd);
   3750	if (!old_cc_state) {
   3751		/* never active, or shutting down */
   3752		spin_unlock(&ppd->cc_state_lock);
   3753		kfree(new_cc_state);
   3754		return;
   3755	}
   3756
   3757	*new_cc_state = *old_cc_state;
   3758
   3759	if (ppd->total_cct_entry)
   3760		new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
   3761	else
   3762		new_cc_state->cct.ccti_limit = 0;
   3763
   3764	memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
   3765	       ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
   3766
   3767	new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
   3768	new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
   3769	memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
   3770	       OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
   3771
   3772	rcu_assign_pointer(ppd->cc_state, new_cc_state);
   3773
   3774	spin_unlock(&ppd->cc_state_lock);
   3775
   3776	kfree_rcu(old_cc_state, rcu);
   3777}
   3778
   3779static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
   3780				       struct ib_device *ibdev, u32 port,
   3781				       u32 *resp_len, u32 max_len)
   3782{
   3783	struct opa_congestion_setting_attr *p =
   3784		(struct opa_congestion_setting_attr *)data;
   3785	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   3786	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   3787	struct opa_congestion_setting_entry_shadow *entries;
   3788	int i;
   3789
   3790	if (smp_length_check(sizeof(*p), max_len)) {
   3791		smp->status |= IB_SMP_INVALID_FIELD;
   3792		return reply((struct ib_mad_hdr *)smp);
   3793	}
   3794
   3795	/*
   3796	 * Save details from packet into the ppd.  Hold the cc_state_lock so
   3797	 * our information is consistent with anyone trying to apply the state.
   3798	 */
   3799	spin_lock(&ppd->cc_state_lock);
   3800	ppd->cc_sl_control_map = be32_to_cpu(p->control_map);
   3801
   3802	entries = ppd->congestion_entries;
   3803	for (i = 0; i < OPA_MAX_SLS; i++) {
   3804		entries[i].ccti_increase = p->entries[i].ccti_increase;
   3805		entries[i].ccti_timer = be16_to_cpu(p->entries[i].ccti_timer);
   3806		entries[i].trigger_threshold =
   3807			p->entries[i].trigger_threshold;
   3808		entries[i].ccti_min = p->entries[i].ccti_min;
   3809	}
   3810	spin_unlock(&ppd->cc_state_lock);
   3811
   3812	/* now apply the information */
   3813	apply_cc_state(ppd);
   3814
   3815	return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
   3816					   resp_len, max_len);
   3817}
   3818
   3819static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
   3820					u8 *data, struct ib_device *ibdev,
   3821					u32 port, u32 *resp_len, u32 max_len)
   3822{
   3823	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   3824	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   3825	struct opa_hfi1_cong_log *cong_log = (struct opa_hfi1_cong_log *)data;
   3826	u64 ts;
   3827	int i;
   3828
   3829	if (am || smp_length_check(sizeof(*cong_log), max_len)) {
   3830		smp->status |= IB_SMP_INVALID_FIELD;
   3831		return reply((struct ib_mad_hdr *)smp);
   3832	}
   3833
   3834	spin_lock_irq(&ppd->cc_log_lock);
   3835
   3836	cong_log->log_type = OPA_CC_LOG_TYPE_HFI;
   3837	cong_log->congestion_flags = 0;
   3838	cong_log->threshold_event_counter =
   3839		cpu_to_be16(ppd->threshold_event_counter);
   3840	memcpy(cong_log->threshold_cong_event_map,
   3841	       ppd->threshold_cong_event_map,
   3842	       sizeof(cong_log->threshold_cong_event_map));
   3843	/* keep timestamp in units of 1.024 usec */
   3844	ts = ktime_get_ns() / 1024;
   3845	cong_log->current_time_stamp = cpu_to_be32(ts);
   3846	for (i = 0; i < OPA_CONG_LOG_ELEMS; i++) {
   3847		struct opa_hfi1_cong_log_event_internal *cce =
   3848			&ppd->cc_events[ppd->cc_mad_idx++];
   3849		if (ppd->cc_mad_idx == OPA_CONG_LOG_ELEMS)
   3850			ppd->cc_mad_idx = 0;
   3851		/*
   3852		 * Entries which are older than twice the time
   3853		 * required to wrap the counter are supposed to
   3854		 * be zeroed (CA10-49 IBTA, release 1.2.1, V1).
   3855		 */
   3856		if ((ts - cce->timestamp) / 2 > U32_MAX)
   3857			continue;
   3858		memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
   3859		memcpy(cong_log->events[i].remote_qp_number_cn_entry,
   3860		       &cce->rqpn, 3);
   3861		cong_log->events[i].sl_svc_type_cn_entry =
   3862			((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
   3863		cong_log->events[i].remote_lid_cn_entry =
   3864			cpu_to_be32(cce->rlid);
   3865		cong_log->events[i].timestamp_cn_entry =
   3866			cpu_to_be32(cce->timestamp);
   3867	}
   3868
   3869	/*
   3870	 * Reset threshold_cong_event_map, and threshold_event_counter
   3871	 * to 0 when log is read.
   3872	 */
   3873	memset(ppd->threshold_cong_event_map, 0x0,
   3874	       sizeof(ppd->threshold_cong_event_map));
   3875	ppd->threshold_event_counter = 0;
   3876
   3877	spin_unlock_irq(&ppd->cc_log_lock);
   3878
   3879	if (resp_len)
   3880		*resp_len += sizeof(struct opa_hfi1_cong_log);
   3881
   3882	return reply((struct ib_mad_hdr *)smp);
   3883}
   3884
   3885static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
   3886				   struct ib_device *ibdev, u32 port,
   3887				   u32 *resp_len, u32 max_len)
   3888{
   3889	struct ib_cc_table_attr *cc_table_attr =
   3890		(struct ib_cc_table_attr *)data;
   3891	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   3892	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   3893	u32 start_block = OPA_AM_START_BLK(am);
   3894	u32 n_blocks = OPA_AM_NBLK(am);
   3895	struct ib_cc_table_entry_shadow *entries;
   3896	int i, j;
   3897	u32 sentry, eentry;
   3898	struct cc_state *cc_state;
   3899	u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
   3900
   3901	/* sanity check n_blocks, start_block */
   3902	if (n_blocks == 0 || smp_length_check(size, max_len) ||
   3903	    start_block + n_blocks > ppd->cc_max_table_entries) {
   3904		smp->status |= IB_SMP_INVALID_FIELD;
   3905		return reply((struct ib_mad_hdr *)smp);
   3906	}
   3907
   3908	rcu_read_lock();
   3909
   3910	cc_state = get_cc_state(ppd);
   3911
   3912	if (!cc_state) {
   3913		rcu_read_unlock();
   3914		return reply((struct ib_mad_hdr *)smp);
   3915	}
   3916
   3917	sentry = start_block * IB_CCT_ENTRIES;
   3918	eentry = sentry + (IB_CCT_ENTRIES * n_blocks);
   3919
   3920	cc_table_attr->ccti_limit = cpu_to_be16(cc_state->cct.ccti_limit);
   3921
   3922	entries = cc_state->cct.entries;
   3923
   3924	/* return n_blocks, though the last block may not be full */
   3925	for (j = 0, i = sentry; i < eentry; j++, i++)
   3926		cc_table_attr->ccti_entries[j].entry =
   3927			cpu_to_be16(entries[i].entry);
   3928
   3929	rcu_read_unlock();
   3930
   3931	if (resp_len)
   3932		*resp_len += size;
   3933
   3934	return reply((struct ib_mad_hdr *)smp);
   3935}
   3936
   3937static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
   3938				   struct ib_device *ibdev, u32 port,
   3939				   u32 *resp_len, u32 max_len)
   3940{
   3941	struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
   3942	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   3943	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   3944	u32 start_block = OPA_AM_START_BLK(am);
   3945	u32 n_blocks = OPA_AM_NBLK(am);
   3946	struct ib_cc_table_entry_shadow *entries;
   3947	int i, j;
   3948	u32 sentry, eentry;
   3949	u16 ccti_limit;
   3950	u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
   3951
   3952	/* sanity check n_blocks, start_block */
   3953	if (n_blocks == 0 || smp_length_check(size, max_len) ||
   3954	    start_block + n_blocks > ppd->cc_max_table_entries) {
   3955		smp->status |= IB_SMP_INVALID_FIELD;
   3956		return reply((struct ib_mad_hdr *)smp);
   3957	}
   3958
   3959	sentry = start_block * IB_CCT_ENTRIES;
   3960	eentry = sentry + ((n_blocks - 1) * IB_CCT_ENTRIES) +
   3961		 (be16_to_cpu(p->ccti_limit)) % IB_CCT_ENTRIES + 1;
   3962
   3963	/* sanity check ccti_limit */
   3964	ccti_limit = be16_to_cpu(p->ccti_limit);
   3965	if (ccti_limit + 1 > eentry) {
   3966		smp->status |= IB_SMP_INVALID_FIELD;
   3967		return reply((struct ib_mad_hdr *)smp);
   3968	}
   3969
   3970	/*
   3971	 * Save details from packet into the ppd.  Hold the cc_state_lock so
   3972	 * our information is consistent with anyone trying to apply the state.
   3973	 */
   3974	spin_lock(&ppd->cc_state_lock);
   3975	ppd->total_cct_entry = ccti_limit + 1;
   3976	entries = ppd->ccti_entries;
   3977	for (j = 0, i = sentry; i < eentry; j++, i++)
   3978		entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry);
   3979	spin_unlock(&ppd->cc_state_lock);
   3980
   3981	/* now apply the information */
   3982	apply_cc_state(ppd);
   3983
   3984	return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len,
   3985				       max_len);
   3986}
   3987
   3988struct opa_led_info {
   3989	__be32 rsvd_led_mask;
   3990	__be32 rsvd;
   3991};
   3992
   3993#define OPA_LED_SHIFT	31
   3994#define OPA_LED_MASK	BIT(OPA_LED_SHIFT)
   3995
   3996static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
   3997				   struct ib_device *ibdev, u32 port,
   3998				   u32 *resp_len, u32 max_len)
   3999{
   4000	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   4001	struct hfi1_pportdata *ppd = dd->pport;
   4002	struct opa_led_info *p = (struct opa_led_info *)data;
   4003	u32 nport = OPA_AM_NPORT(am);
   4004	u32 is_beaconing_active;
   4005
   4006	if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
   4007		smp->status |= IB_SMP_INVALID_FIELD;
   4008		return reply((struct ib_mad_hdr *)smp);
   4009	}
   4010
   4011	/*
   4012	 * This pairs with the memory barrier in hfi1_start_led_override to
   4013	 * ensure that we read the correct state of LED beaconing represented
   4014	 * by led_override_timer_active
   4015	 */
   4016	smp_rmb();
   4017	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
   4018	p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
   4019
   4020	if (resp_len)
   4021		*resp_len += sizeof(struct opa_led_info);
   4022
   4023	return reply((struct ib_mad_hdr *)smp);
   4024}
   4025
   4026static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
   4027				   struct ib_device *ibdev, u32 port,
   4028				   u32 *resp_len, u32 max_len)
   4029{
   4030	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   4031	struct opa_led_info *p = (struct opa_led_info *)data;
   4032	u32 nport = OPA_AM_NPORT(am);
   4033	int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
   4034
   4035	if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
   4036		smp->status |= IB_SMP_INVALID_FIELD;
   4037		return reply((struct ib_mad_hdr *)smp);
   4038	}
   4039
   4040	if (on)
   4041		hfi1_start_led_override(dd->pport, 2000, 1500);
   4042	else
   4043		shutdown_led_override(dd->pport);
   4044
   4045	return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len,
   4046				       max_len);
   4047}
   4048
   4049static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
   4050			    u8 *data, struct ib_device *ibdev, u32 port,
   4051			    u32 *resp_len, u32 max_len)
   4052{
   4053	int ret;
   4054	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   4055
   4056	switch (attr_id) {
   4057	case IB_SMP_ATTR_NODE_DESC:
   4058		ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
   4059					      resp_len, max_len);
   4060		break;
   4061	case IB_SMP_ATTR_NODE_INFO:
   4062		ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
   4063					      resp_len, max_len);
   4064		break;
   4065	case IB_SMP_ATTR_PORT_INFO:
   4066		ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
   4067					      resp_len, max_len);
   4068		break;
   4069	case IB_SMP_ATTR_PKEY_TABLE:
   4070		ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
   4071					       resp_len, max_len);
   4072		break;
   4073	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
   4074		ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
   4075					      resp_len, max_len);
   4076		break;
   4077	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
   4078		ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
   4079					      resp_len, max_len);
   4080		break;
   4081	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
   4082		ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
   4083					       resp_len, max_len);
   4084		break;
   4085	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
   4086		ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
   4087						resp_len, max_len);
   4088		break;
   4089	case OPA_ATTRIB_ID_PORT_STATE_INFO:
   4090		ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
   4091					 resp_len, max_len);
   4092		break;
   4093	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
   4094		ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
   4095					 resp_len, max_len);
   4096		break;
   4097	case OPA_ATTRIB_ID_CABLE_INFO:
   4098		ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
   4099						resp_len, max_len);
   4100		break;
   4101	case IB_SMP_ATTR_VL_ARB_TABLE:
   4102		ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
   4103					    resp_len, max_len);
   4104		break;
   4105	case OPA_ATTRIB_ID_CONGESTION_INFO:
   4106		ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
   4107					       resp_len, max_len);
   4108		break;
   4109	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
   4110		ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
   4111						  port, resp_len, max_len);
   4112		break;
   4113	case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
   4114		ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
   4115						   port, resp_len, max_len);
   4116		break;
   4117	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
   4118		ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
   4119					      resp_len, max_len);
   4120		break;
   4121	case IB_SMP_ATTR_LED_INFO:
   4122		ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
   4123					      resp_len, max_len);
   4124		break;
   4125	case IB_SMP_ATTR_SM_INFO:
   4126		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
   4127			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
   4128		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
   4129			return IB_MAD_RESULT_SUCCESS;
   4130		fallthrough;
   4131	default:
   4132		smp->status |= IB_SMP_UNSUP_METH_ATTR;
   4133		ret = reply((struct ib_mad_hdr *)smp);
   4134		break;
   4135	}
   4136	return ret;
   4137}
   4138
   4139static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
   4140			    u8 *data, struct ib_device *ibdev, u32 port,
   4141			    u32 *resp_len, u32 max_len, int local_mad)
   4142{
   4143	int ret;
   4144	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   4145
   4146	switch (attr_id) {
   4147	case IB_SMP_ATTR_PORT_INFO:
   4148		ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
   4149					      resp_len, max_len, local_mad);
   4150		break;
   4151	case IB_SMP_ATTR_PKEY_TABLE:
   4152		ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
   4153					       resp_len, max_len);
   4154		break;
   4155	case OPA_ATTRIB_ID_SL_TO_SC_MAP:
   4156		ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
   4157					      resp_len, max_len);
   4158		break;
   4159	case OPA_ATTRIB_ID_SC_TO_SL_MAP:
   4160		ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
   4161					      resp_len, max_len);
   4162		break;
   4163	case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
   4164		ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
   4165					       resp_len, max_len);
   4166		break;
   4167	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
   4168		ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
   4169						resp_len, max_len);
   4170		break;
   4171	case OPA_ATTRIB_ID_PORT_STATE_INFO:
   4172		ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
   4173					 resp_len, max_len, local_mad);
   4174		break;
   4175	case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
   4176		ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
   4177					 resp_len, max_len);
   4178		break;
   4179	case IB_SMP_ATTR_VL_ARB_TABLE:
   4180		ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
   4181					    resp_len, max_len);
   4182		break;
   4183	case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
   4184		ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
   4185						  port, resp_len, max_len);
   4186		break;
   4187	case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
   4188		ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
   4189					      resp_len, max_len);
   4190		break;
   4191	case IB_SMP_ATTR_LED_INFO:
   4192		ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
   4193					      resp_len, max_len);
   4194		break;
   4195	case IB_SMP_ATTR_SM_INFO:
   4196		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
   4197			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
   4198		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
   4199			return IB_MAD_RESULT_SUCCESS;
   4200		fallthrough;
   4201	default:
   4202		smp->status |= IB_SMP_UNSUP_METH_ATTR;
   4203		ret = reply((struct ib_mad_hdr *)smp);
   4204		break;
   4205	}
   4206	return ret;
   4207}
   4208
   4209static inline void set_aggr_error(struct opa_aggregate *ag)
   4210{
   4211	ag->err_reqlength |= cpu_to_be16(0x8000);
   4212}
   4213
   4214static int subn_get_opa_aggregate(struct opa_smp *smp,
   4215				  struct ib_device *ibdev, u32 port,
   4216				  u32 *resp_len)
   4217{
   4218	int i;
   4219	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
   4220	u8 *next_smp = opa_get_smp_data(smp);
   4221
   4222	if (num_attr < 1 || num_attr > 117) {
   4223		smp->status |= IB_SMP_INVALID_FIELD;
   4224		return reply((struct ib_mad_hdr *)smp);
   4225	}
   4226
   4227	for (i = 0; i < num_attr; i++) {
   4228		struct opa_aggregate *agg;
   4229		size_t agg_data_len;
   4230		size_t agg_size;
   4231		u32 am;
   4232
   4233		agg = (struct opa_aggregate *)next_smp;
   4234		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
   4235		agg_size = sizeof(*agg) + agg_data_len;
   4236		am = be32_to_cpu(agg->attr_mod);
   4237
   4238		*resp_len += agg_size;
   4239
   4240		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
   4241			smp->status |= IB_SMP_INVALID_FIELD;
   4242			return reply((struct ib_mad_hdr *)smp);
   4243		}
   4244
   4245		/* zero the payload for this segment */
   4246		memset(next_smp + sizeof(*agg), 0, agg_data_len);
   4247
   4248		(void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
   4249				       ibdev, port, NULL, (u32)agg_data_len);
   4250
   4251		if (smp->status & IB_SMP_INVALID_FIELD)
   4252			break;
   4253		if (smp->status & ~IB_SMP_DIRECTION) {
   4254			set_aggr_error(agg);
   4255			return reply((struct ib_mad_hdr *)smp);
   4256		}
   4257		next_smp += agg_size;
   4258	}
   4259
   4260	return reply((struct ib_mad_hdr *)smp);
   4261}
   4262
   4263static int subn_set_opa_aggregate(struct opa_smp *smp,
   4264				  struct ib_device *ibdev, u32 port,
   4265				  u32 *resp_len, int local_mad)
   4266{
   4267	int i;
   4268	u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
   4269	u8 *next_smp = opa_get_smp_data(smp);
   4270
   4271	if (num_attr < 1 || num_attr > 117) {
   4272		smp->status |= IB_SMP_INVALID_FIELD;
   4273		return reply((struct ib_mad_hdr *)smp);
   4274	}
   4275
   4276	for (i = 0; i < num_attr; i++) {
   4277		struct opa_aggregate *agg;
   4278		size_t agg_data_len;
   4279		size_t agg_size;
   4280		u32 am;
   4281
   4282		agg = (struct opa_aggregate *)next_smp;
   4283		agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
   4284		agg_size = sizeof(*agg) + agg_data_len;
   4285		am = be32_to_cpu(agg->attr_mod);
   4286
   4287		*resp_len += agg_size;
   4288
   4289		if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
   4290			smp->status |= IB_SMP_INVALID_FIELD;
   4291			return reply((struct ib_mad_hdr *)smp);
   4292		}
   4293
   4294		(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
   4295				       ibdev, port, NULL, (u32)agg_data_len,
   4296				       local_mad);
   4297
   4298		if (smp->status & IB_SMP_INVALID_FIELD)
   4299			break;
   4300		if (smp->status & ~IB_SMP_DIRECTION) {
   4301			set_aggr_error(agg);
   4302			return reply((struct ib_mad_hdr *)smp);
   4303		}
   4304		next_smp += agg_size;
   4305	}
   4306
   4307	return reply((struct ib_mad_hdr *)smp);
   4308}
   4309
   4310/*
   4311 * OPAv1 specifies that, on the transition to link up, these counters
   4312 * are cleared:
   4313 *   PortRcvErrors [*]
   4314 *   LinkErrorRecovery
   4315 *   LocalLinkIntegrityErrors
   4316 *   ExcessiveBufferOverruns [*]
   4317 *
   4318 * [*] Error info associated with these counters is retained, but the
   4319 * error info status is reset to 0.
   4320 */
   4321void clear_linkup_counters(struct hfi1_devdata *dd)
   4322{
   4323	/* PortRcvErrors */
   4324	write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
   4325	dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
   4326	/* LinkErrorRecovery */
   4327	write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
   4328	write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0);
   4329	/* LocalLinkIntegrityErrors */
   4330	write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
   4331	/* ExcessiveBufferOverruns */
   4332	write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
   4333	dd->rcv_ovfl_cnt = 0;
   4334	dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
   4335}
   4336
   4337static int is_full_mgmt_pkey_in_table(struct hfi1_ibport *ibp)
   4338{
   4339	unsigned int i;
   4340	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   4341
   4342	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); ++i)
   4343		if (ppd->pkeys[i] == FULL_MGMT_P_KEY)
   4344			return 1;
   4345
   4346	return 0;
   4347}
   4348
   4349/*
   4350 * is_local_mad() returns 1 if 'mad' is sent from, and destined to the
   4351 * local node, 0 otherwise.
   4352 */
   4353static int is_local_mad(struct hfi1_ibport *ibp, const struct opa_mad *mad,
   4354			const struct ib_wc *in_wc)
   4355{
   4356	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   4357	const struct opa_smp *smp = (const struct opa_smp *)mad;
   4358
   4359	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
   4360		return (smp->hop_cnt == 0 &&
   4361			smp->route.dr.dr_slid == OPA_LID_PERMISSIVE &&
   4362			smp->route.dr.dr_dlid == OPA_LID_PERMISSIVE);
   4363	}
   4364
   4365	return (in_wc->slid == ppd->lid);
   4366}
   4367
   4368/*
   4369 * opa_local_smp_check() should only be called on MADs for which
   4370 * is_local_mad() returns true. It applies the SMP checks that are
   4371 * specific to SMPs which are sent from, and destined to this node.
   4372 * opa_local_smp_check() returns 0 if the SMP passes its checks, 1
   4373 * otherwise.
   4374 *
   4375 * SMPs which arrive from other nodes are instead checked by
   4376 * opa_smp_check().
   4377 */
   4378static int opa_local_smp_check(struct hfi1_ibport *ibp,
   4379			       const struct ib_wc *in_wc)
   4380{
   4381	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   4382	u16 pkey;
   4383
   4384	if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys))
   4385		return 1;
   4386
   4387	pkey = ppd->pkeys[in_wc->pkey_index];
   4388	/*
   4389	 * We need to do the "node-local" checks specified in OPAv1,
   4390	 * rev 0.90, section 9.10.26, which are:
   4391	 *   - pkey is 0x7fff, or 0xffff
   4392	 *   - Source QPN == 0 || Destination QPN == 0
   4393	 *   - the MAD header's management class is either
   4394	 *     IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE or
   4395	 *     IB_MGMT_CLASS_SUBN_LID_ROUTED
   4396	 *   - SLID != 0
   4397	 *
   4398	 * However, we know (and so don't need to check again) that,
   4399	 * for local SMPs, the MAD stack passes MADs with:
   4400	 *   - Source QPN of 0
   4401	 *   - MAD mgmt_class is IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
   4402	 *   - SLID is either: OPA_LID_PERMISSIVE (0xFFFFFFFF), or
   4403	 *     our own port's lid
   4404	 *
   4405	 */
   4406	if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
   4407		return 0;
   4408	ingress_pkey_table_fail(ppd, pkey, in_wc->slid);
   4409	return 1;
   4410}
   4411
   4412/**
   4413 * hfi1_pkey_validation_pma - It validates PKEYs for incoming PMA MAD packets.
   4414 * @ibp: IB port data
   4415 * @in_mad: MAD packet with header and data
   4416 * @in_wc: Work completion data such as source LID, port number, etc.
   4417 *
   4418 * These are all the possible logic rules for validating a pkey:
   4419 *
   4420 * a) If pkey neither FULL_MGMT_P_KEY nor LIM_MGMT_P_KEY,
   4421 *    and NOT self-originated packet:
   4422 *     Drop MAD packet as it should always be part of the
   4423 *     management partition unless it's a self-originated packet.
   4424 *
   4425 * b) If pkey_index -> FULL_MGMT_P_KEY, and LIM_MGMT_P_KEY in pkey table:
   4426 *     The packet is coming from a management node and the receiving node
   4427 *     is also a management node, so it is safe for the packet to go through.
   4428 *
   4429 * c) If pkey_index -> FULL_MGMT_P_KEY, and LIM_MGMT_P_KEY is NOT in pkey table:
   4430 *     Drop the packet as LIM_MGMT_P_KEY should always be in the pkey table.
   4431 *     It could be an FM misconfiguration.
   4432 *
   4433 * d) If pkey_index -> LIM_MGMT_P_KEY and FULL_MGMT_P_KEY is NOT in pkey table:
   4434 *     It is safe for the packet to go through since a non-management node is
   4435 *     talking to another non-management node.
   4436 *
   4437 * e) If pkey_index -> LIM_MGMT_P_KEY and FULL_MGMT_P_KEY in pkey table:
   4438 *     Drop the packet because a non-management node is talking to a
   4439 *     management node, and it could be an attack.
   4440 *
   4441 * For the implementation, these rules can be simplied to only checking
   4442 * for (a) and (e). There's no need to check for rule (b) as
   4443 * the packet doesn't need to be dropped. Rule (c) is not possible in
   4444 * the driver as LIM_MGMT_P_KEY is always in the pkey table.
   4445 *
   4446 * Return:
   4447 * 0 - pkey is okay, -EINVAL it's a bad pkey
   4448 */
   4449static int hfi1_pkey_validation_pma(struct hfi1_ibport *ibp,
   4450				    const struct opa_mad *in_mad,
   4451				    const struct ib_wc *in_wc)
   4452{
   4453	u16 pkey_value = hfi1_lookup_pkey_value(ibp, in_wc->pkey_index);
   4454
   4455	/* Rule (a) from above */
   4456	if (!is_local_mad(ibp, in_mad, in_wc) &&
   4457	    pkey_value != LIM_MGMT_P_KEY &&
   4458	    pkey_value != FULL_MGMT_P_KEY)
   4459		return -EINVAL;
   4460
   4461	/* Rule (e) from above */
   4462	if (pkey_value == LIM_MGMT_P_KEY &&
   4463	    is_full_mgmt_pkey_in_table(ibp))
   4464		return -EINVAL;
   4465
   4466	return 0;
   4467}
   4468
   4469static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
   4470			    u32 port, const struct opa_mad *in_mad,
   4471			    struct opa_mad *out_mad,
   4472			    u32 *resp_len, int local_mad)
   4473{
   4474	struct opa_smp *smp = (struct opa_smp *)out_mad;
   4475	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   4476	u8 *data;
   4477	u32 am, data_size;
   4478	__be16 attr_id;
   4479	int ret;
   4480
   4481	*out_mad = *in_mad;
   4482	data = opa_get_smp_data(smp);
   4483	data_size = (u32)opa_get_smp_data_size(smp);
   4484
   4485	am = be32_to_cpu(smp->attr_mod);
   4486	attr_id = smp->attr_id;
   4487	if (smp->class_version != OPA_SM_CLASS_VERSION) {
   4488		smp->status |= IB_SMP_UNSUP_VERSION;
   4489		ret = reply((struct ib_mad_hdr *)smp);
   4490		return ret;
   4491	}
   4492	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
   4493			 smp->route.dr.dr_slid, smp->route.dr.return_path,
   4494			 smp->hop_cnt);
   4495	if (ret) {
   4496		u32 port_num = be32_to_cpu(smp->attr_mod);
   4497
   4498		/*
   4499		 * If this is a get/set portinfo, we already check the
   4500		 * M_Key if the MAD is for another port and the M_Key
   4501		 * is OK on the receiving port. This check is needed
   4502		 * to increment the error counters when the M_Key
   4503		 * fails to match on *both* ports.
   4504		 */
   4505		if (attr_id == IB_SMP_ATTR_PORT_INFO &&
   4506		    (smp->method == IB_MGMT_METHOD_GET ||
   4507		     smp->method == IB_MGMT_METHOD_SET) &&
   4508		    port_num && port_num <= ibdev->phys_port_cnt &&
   4509		    port != port_num)
   4510			(void)check_mkey(to_iport(ibdev, port_num),
   4511					  (struct ib_mad_hdr *)smp, 0,
   4512					  smp->mkey, smp->route.dr.dr_slid,
   4513					  smp->route.dr.return_path,
   4514					  smp->hop_cnt);
   4515		ret = IB_MAD_RESULT_FAILURE;
   4516		return ret;
   4517	}
   4518
   4519	*resp_len = opa_get_smp_header_size(smp);
   4520
   4521	switch (smp->method) {
   4522	case IB_MGMT_METHOD_GET:
   4523		switch (attr_id) {
   4524		default:
   4525			clear_opa_smp_data(smp);
   4526			ret = subn_get_opa_sma(attr_id, smp, am, data,
   4527					       ibdev, port, resp_len,
   4528					       data_size);
   4529			break;
   4530		case OPA_ATTRIB_ID_AGGREGATE:
   4531			ret = subn_get_opa_aggregate(smp, ibdev, port,
   4532						     resp_len);
   4533			break;
   4534		}
   4535		break;
   4536	case IB_MGMT_METHOD_SET:
   4537		switch (attr_id) {
   4538		default:
   4539			ret = subn_set_opa_sma(attr_id, smp, am, data,
   4540					       ibdev, port, resp_len,
   4541					       data_size, local_mad);
   4542			break;
   4543		case OPA_ATTRIB_ID_AGGREGATE:
   4544			ret = subn_set_opa_aggregate(smp, ibdev, port,
   4545						     resp_len, local_mad);
   4546			break;
   4547		}
   4548		break;
   4549	case IB_MGMT_METHOD_TRAP:
   4550	case IB_MGMT_METHOD_REPORT:
   4551	case IB_MGMT_METHOD_REPORT_RESP:
   4552	case IB_MGMT_METHOD_GET_RESP:
   4553		/*
   4554		 * The ib_mad module will call us to process responses
   4555		 * before checking for other consumers.
   4556		 * Just tell the caller to process it normally.
   4557		 */
   4558		ret = IB_MAD_RESULT_SUCCESS;
   4559		break;
   4560	case IB_MGMT_METHOD_TRAP_REPRESS:
   4561		subn_handle_opa_trap_repress(ibp, smp);
   4562		/* Always successful */
   4563		ret = IB_MAD_RESULT_SUCCESS;
   4564		break;
   4565	default:
   4566		smp->status |= IB_SMP_UNSUP_METHOD;
   4567		ret = reply((struct ib_mad_hdr *)smp);
   4568		break;
   4569	}
   4570
   4571	return ret;
   4572}
   4573
   4574static int process_subn(struct ib_device *ibdev, int mad_flags,
   4575			u32 port, const struct ib_mad *in_mad,
   4576			struct ib_mad *out_mad)
   4577{
   4578	struct ib_smp *smp = (struct ib_smp *)out_mad;
   4579	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   4580	int ret;
   4581
   4582	*out_mad = *in_mad;
   4583	if (smp->class_version != 1) {
   4584		smp->status |= IB_SMP_UNSUP_VERSION;
   4585		ret = reply((struct ib_mad_hdr *)smp);
   4586		return ret;
   4587	}
   4588
   4589	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
   4590			 smp->mkey, (__force __be32)smp->dr_slid,
   4591			 smp->return_path, smp->hop_cnt);
   4592	if (ret) {
   4593		u32 port_num = be32_to_cpu(smp->attr_mod);
   4594
   4595		/*
   4596		 * If this is a get/set portinfo, we already check the
   4597		 * M_Key if the MAD is for another port and the M_Key
   4598		 * is OK on the receiving port. This check is needed
   4599		 * to increment the error counters when the M_Key
   4600		 * fails to match on *both* ports.
   4601		 */
   4602		if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
   4603		    (smp->method == IB_MGMT_METHOD_GET ||
   4604		     smp->method == IB_MGMT_METHOD_SET) &&
   4605		    port_num && port_num <= ibdev->phys_port_cnt &&
   4606		    port != port_num)
   4607			(void)check_mkey(to_iport(ibdev, port_num),
   4608					 (struct ib_mad_hdr *)smp, 0,
   4609					 smp->mkey,
   4610					 (__force __be32)smp->dr_slid,
   4611					 smp->return_path, smp->hop_cnt);
   4612		ret = IB_MAD_RESULT_FAILURE;
   4613		return ret;
   4614	}
   4615
   4616	switch (smp->method) {
   4617	case IB_MGMT_METHOD_GET:
   4618		switch (smp->attr_id) {
   4619		case IB_SMP_ATTR_NODE_INFO:
   4620			ret = subn_get_nodeinfo(smp, ibdev, port);
   4621			break;
   4622		default:
   4623			smp->status |= IB_SMP_UNSUP_METH_ATTR;
   4624			ret = reply((struct ib_mad_hdr *)smp);
   4625			break;
   4626		}
   4627		break;
   4628	}
   4629
   4630	return ret;
   4631}
   4632
   4633static int process_perf(struct ib_device *ibdev, u32 port,
   4634			const struct ib_mad *in_mad,
   4635			struct ib_mad *out_mad)
   4636{
   4637	struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
   4638	struct ib_class_port_info *cpi = (struct ib_class_port_info *)
   4639						&pmp->data;
   4640	int ret = IB_MAD_RESULT_FAILURE;
   4641
   4642	*out_mad = *in_mad;
   4643	if (pmp->mad_hdr.class_version != 1) {
   4644		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
   4645		ret = reply((struct ib_mad_hdr *)pmp);
   4646		return ret;
   4647	}
   4648
   4649	switch (pmp->mad_hdr.method) {
   4650	case IB_MGMT_METHOD_GET:
   4651		switch (pmp->mad_hdr.attr_id) {
   4652		case IB_PMA_PORT_COUNTERS:
   4653			ret = pma_get_ib_portcounters(pmp, ibdev, port);
   4654			break;
   4655		case IB_PMA_PORT_COUNTERS_EXT:
   4656			ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
   4657			break;
   4658		case IB_PMA_CLASS_PORT_INFO:
   4659			cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
   4660			ret = reply((struct ib_mad_hdr *)pmp);
   4661			break;
   4662		default:
   4663			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
   4664			ret = reply((struct ib_mad_hdr *)pmp);
   4665			break;
   4666		}
   4667		break;
   4668
   4669	case IB_MGMT_METHOD_SET:
   4670		if (pmp->mad_hdr.attr_id) {
   4671			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
   4672			ret = reply((struct ib_mad_hdr *)pmp);
   4673		}
   4674		break;
   4675
   4676	case IB_MGMT_METHOD_TRAP:
   4677	case IB_MGMT_METHOD_GET_RESP:
   4678		/*
   4679		 * The ib_mad module will call us to process responses
   4680		 * before checking for other consumers.
   4681		 * Just tell the caller to process it normally.
   4682		 */
   4683		ret = IB_MAD_RESULT_SUCCESS;
   4684		break;
   4685
   4686	default:
   4687		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
   4688		ret = reply((struct ib_mad_hdr *)pmp);
   4689		break;
   4690	}
   4691
   4692	return ret;
   4693}
   4694
   4695static int process_perf_opa(struct ib_device *ibdev, u32 port,
   4696			    const struct opa_mad *in_mad,
   4697			    struct opa_mad *out_mad, u32 *resp_len)
   4698{
   4699	struct opa_pma_mad *pmp = (struct opa_pma_mad *)out_mad;
   4700	int ret;
   4701
   4702	*out_mad = *in_mad;
   4703
   4704	if (pmp->mad_hdr.class_version != OPA_SM_CLASS_VERSION) {
   4705		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
   4706		return reply((struct ib_mad_hdr *)pmp);
   4707	}
   4708
   4709	*resp_len = sizeof(pmp->mad_hdr);
   4710
   4711	switch (pmp->mad_hdr.method) {
   4712	case IB_MGMT_METHOD_GET:
   4713		switch (pmp->mad_hdr.attr_id) {
   4714		case IB_PMA_CLASS_PORT_INFO:
   4715			ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
   4716			break;
   4717		case OPA_PM_ATTRIB_ID_PORT_STATUS:
   4718			ret = pma_get_opa_portstatus(pmp, ibdev, port,
   4719						     resp_len);
   4720			break;
   4721		case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
   4722			ret = pma_get_opa_datacounters(pmp, ibdev, port,
   4723						       resp_len);
   4724			break;
   4725		case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
   4726			ret = pma_get_opa_porterrors(pmp, ibdev, port,
   4727						     resp_len);
   4728			break;
   4729		case OPA_PM_ATTRIB_ID_ERROR_INFO:
   4730			ret = pma_get_opa_errorinfo(pmp, ibdev, port,
   4731						    resp_len);
   4732			break;
   4733		default:
   4734			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
   4735			ret = reply((struct ib_mad_hdr *)pmp);
   4736			break;
   4737		}
   4738		break;
   4739
   4740	case IB_MGMT_METHOD_SET:
   4741		switch (pmp->mad_hdr.attr_id) {
   4742		case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
   4743			ret = pma_set_opa_portstatus(pmp, ibdev, port,
   4744						     resp_len);
   4745			break;
   4746		case OPA_PM_ATTRIB_ID_ERROR_INFO:
   4747			ret = pma_set_opa_errorinfo(pmp, ibdev, port,
   4748						    resp_len);
   4749			break;
   4750		default:
   4751			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
   4752			ret = reply((struct ib_mad_hdr *)pmp);
   4753			break;
   4754		}
   4755		break;
   4756
   4757	case IB_MGMT_METHOD_TRAP:
   4758	case IB_MGMT_METHOD_GET_RESP:
   4759		/*
   4760		 * The ib_mad module will call us to process responses
   4761		 * before checking for other consumers.
   4762		 * Just tell the caller to process it normally.
   4763		 */
   4764		ret = IB_MAD_RESULT_SUCCESS;
   4765		break;
   4766
   4767	default:
   4768		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
   4769		ret = reply((struct ib_mad_hdr *)pmp);
   4770		break;
   4771	}
   4772
   4773	return ret;
   4774}
   4775
   4776static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
   4777				u32 port, const struct ib_wc *in_wc,
   4778				const struct ib_grh *in_grh,
   4779				const struct opa_mad *in_mad,
   4780				struct opa_mad *out_mad, size_t *out_mad_size,
   4781				u16 *out_mad_pkey_index)
   4782{
   4783	int ret;
   4784	int pkey_idx;
   4785	int local_mad = 0;
   4786	u32 resp_len = in_wc->byte_len - sizeof(*in_grh);
   4787	struct hfi1_ibport *ibp = to_iport(ibdev, port);
   4788
   4789	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
   4790	if (pkey_idx < 0) {
   4791		pr_warn("failed to find limited mgmt pkey, defaulting 0x%x\n",
   4792			hfi1_get_pkey(ibp, 1));
   4793		pkey_idx = 1;
   4794	}
   4795	*out_mad_pkey_index = (u16)pkey_idx;
   4796
   4797	switch (in_mad->mad_hdr.mgmt_class) {
   4798	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
   4799	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
   4800		local_mad = is_local_mad(ibp, in_mad, in_wc);
   4801		if (local_mad) {
   4802			ret = opa_local_smp_check(ibp, in_wc);
   4803			if (ret)
   4804				return IB_MAD_RESULT_FAILURE;
   4805		}
   4806		ret = process_subn_opa(ibdev, mad_flags, port, in_mad,
   4807				       out_mad, &resp_len, local_mad);
   4808		goto bail;
   4809	case IB_MGMT_CLASS_PERF_MGMT:
   4810		ret = hfi1_pkey_validation_pma(ibp, in_mad, in_wc);
   4811		if (ret)
   4812			return IB_MAD_RESULT_FAILURE;
   4813
   4814		ret = process_perf_opa(ibdev, port, in_mad, out_mad, &resp_len);
   4815		goto bail;
   4816
   4817	default:
   4818		ret = IB_MAD_RESULT_SUCCESS;
   4819	}
   4820
   4821bail:
   4822	if (ret & IB_MAD_RESULT_REPLY)
   4823		*out_mad_size = round_up(resp_len, 8);
   4824	else if (ret & IB_MAD_RESULT_SUCCESS)
   4825		*out_mad_size = in_wc->byte_len - sizeof(struct ib_grh);
   4826
   4827	return ret;
   4828}
   4829
   4830static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u32 port,
   4831			       const struct ib_wc *in_wc,
   4832			       const struct ib_grh *in_grh,
   4833			       const struct ib_mad *in_mad,
   4834			       struct ib_mad *out_mad)
   4835{
   4836	int ret;
   4837
   4838	switch (in_mad->mad_hdr.mgmt_class) {
   4839	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
   4840	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
   4841		ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
   4842		break;
   4843	case IB_MGMT_CLASS_PERF_MGMT:
   4844		ret = process_perf(ibdev, port, in_mad, out_mad);
   4845		break;
   4846	default:
   4847		ret = IB_MAD_RESULT_SUCCESS;
   4848		break;
   4849	}
   4850
   4851	return ret;
   4852}
   4853
   4854/**
   4855 * hfi1_process_mad - process an incoming MAD packet
   4856 * @ibdev: the infiniband device this packet came in on
   4857 * @mad_flags: MAD flags
   4858 * @port: the port number this packet came in on
   4859 * @in_wc: the work completion entry for this packet
   4860 * @in_grh: the global route header for this packet
   4861 * @in_mad: the incoming MAD
   4862 * @out_mad: any outgoing MAD reply
   4863 * @out_mad_size: size of the outgoing MAD reply
   4864 * @out_mad_pkey_index: used to apss back the packet key index
   4865 *
   4866 * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
   4867 * interested in processing.
   4868 *
   4869 * Note that the verbs framework has already done the MAD sanity checks,
   4870 * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
   4871 * MADs.
   4872 *
   4873 * This is called by the ib_mad module.
   4874 */
   4875int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u32 port,
   4876		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
   4877		     const struct ib_mad *in_mad, struct ib_mad *out_mad,
   4878		     size_t *out_mad_size, u16 *out_mad_pkey_index)
   4879{
   4880	switch (in_mad->mad_hdr.base_version) {
   4881	case OPA_MGMT_BASE_VERSION:
   4882		return hfi1_process_opa_mad(ibdev, mad_flags, port,
   4883					    in_wc, in_grh,
   4884					    (struct opa_mad *)in_mad,
   4885					    (struct opa_mad *)out_mad,
   4886					    out_mad_size,
   4887					    out_mad_pkey_index);
   4888	case IB_MGMT_BASE_VERSION:
   4889		return hfi1_process_ib_mad(ibdev, mad_flags, port, in_wc,
   4890					   in_grh, in_mad, out_mad);
   4891	default:
   4892		break;
   4893	}
   4894
   4895	return IB_MAD_RESULT_FAILURE;
   4896}