cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

hfi.h (77336B)


      1/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
      2/*
      3 * Copyright(c) 2020 Cornelis Networks, Inc.
      4 * Copyright(c) 2015-2020 Intel Corporation.
      5 */
      6
      7#ifndef _HFI1_KERNEL_H
      8#define _HFI1_KERNEL_H
      9
     10#include <linux/refcount.h>
     11#include <linux/interrupt.h>
     12#include <linux/pci.h>
     13#include <linux/dma-mapping.h>
     14#include <linux/mutex.h>
     15#include <linux/list.h>
     16#include <linux/scatterlist.h>
     17#include <linux/slab.h>
     18#include <linux/io.h>
     19#include <linux/fs.h>
     20#include <linux/completion.h>
     21#include <linux/kref.h>
     22#include <linux/sched.h>
     23#include <linux/cdev.h>
     24#include <linux/delay.h>
     25#include <linux/kthread.h>
     26#include <linux/i2c.h>
     27#include <linux/i2c-algo-bit.h>
     28#include <linux/xarray.h>
     29#include <rdma/ib_hdrs.h>
     30#include <rdma/opa_addr.h>
     31#include <linux/rhashtable.h>
     32#include <rdma/rdma_vt.h>
     33
     34#include "chip_registers.h"
     35#include "common.h"
     36#include "opfn.h"
     37#include "verbs.h"
     38#include "pio.h"
     39#include "chip.h"
     40#include "mad.h"
     41#include "qsfp.h"
     42#include "platform.h"
     43#include "affinity.h"
     44#include "msix.h"
     45
     46/* bumped 1 from s/w major version of TrueScale */
     47#define HFI1_CHIP_VERS_MAJ 3U
     48
     49/* don't care about this except printing */
     50#define HFI1_CHIP_VERS_MIN 0U
     51
     52/* The Organization Unique Identifier (Mfg code), and its position in GUID */
     53#define HFI1_OUI 0x001175
     54#define HFI1_OUI_LSB 40
     55
     56#define DROP_PACKET_OFF		0
     57#define DROP_PACKET_ON		1
     58
     59#define NEIGHBOR_TYPE_HFI		0
     60#define NEIGHBOR_TYPE_SWITCH	1
     61
     62#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
     63
     64extern unsigned long hfi1_cap_mask;
     65#define HFI1_CAP_KGET_MASK(mask, cap) ((mask) & HFI1_CAP_##cap)
     66#define HFI1_CAP_UGET_MASK(mask, cap) \
     67	(((mask) >> HFI1_CAP_USER_SHIFT) & HFI1_CAP_##cap)
     68#define HFI1_CAP_KGET(cap) (HFI1_CAP_KGET_MASK(hfi1_cap_mask, cap))
     69#define HFI1_CAP_UGET(cap) (HFI1_CAP_UGET_MASK(hfi1_cap_mask, cap))
     70#define HFI1_CAP_IS_KSET(cap) (!!HFI1_CAP_KGET(cap))
     71#define HFI1_CAP_IS_USET(cap) (!!HFI1_CAP_UGET(cap))
     72#define HFI1_MISC_GET() ((hfi1_cap_mask >> HFI1_CAP_MISC_SHIFT) & \
     73			HFI1_CAP_MISC_MASK)
     74/* Offline Disabled Reason is 4-bits */
     75#define HFI1_ODR_MASK(rsn) ((rsn) & OPA_PI_MASK_OFFLINE_REASON)
     76
     77/*
     78 * Control context is always 0 and handles the error packets.
     79 * It also handles the VL15 and multicast packets.
     80 */
     81#define HFI1_CTRL_CTXT    0
     82
     83/*
     84 * Driver context will store software counters for each of the events
     85 * associated with these status registers
     86 */
     87#define NUM_CCE_ERR_STATUS_COUNTERS 41
     88#define NUM_RCV_ERR_STATUS_COUNTERS 64
     89#define NUM_MISC_ERR_STATUS_COUNTERS 13
     90#define NUM_SEND_PIO_ERR_STATUS_COUNTERS 36
     91#define NUM_SEND_DMA_ERR_STATUS_COUNTERS 4
     92#define NUM_SEND_EGRESS_ERR_STATUS_COUNTERS 64
     93#define NUM_SEND_ERR_STATUS_COUNTERS 3
     94#define NUM_SEND_CTXT_ERR_STATUS_COUNTERS 5
     95#define NUM_SEND_DMA_ENG_ERR_STATUS_COUNTERS 24
     96
     97/*
     98 * per driver stats, either not device nor port-specific, or
     99 * summed over all of the devices and ports.
    100 * They are described by name via ipathfs filesystem, so layout
    101 * and number of elements can change without breaking compatibility.
    102 * If members are added or deleted hfi1_statnames[] in debugfs.c must
    103 * change to match.
    104 */
    105struct hfi1_ib_stats {
    106	__u64 sps_ints; /* number of interrupts handled */
    107	__u64 sps_errints; /* number of error interrupts */
    108	__u64 sps_txerrs; /* tx-related packet errors */
    109	__u64 sps_rcverrs; /* non-crc rcv packet errors */
    110	__u64 sps_hwerrs; /* hardware errors reported (parity, etc.) */
    111	__u64 sps_nopiobufs; /* no pio bufs avail from kernel */
    112	__u64 sps_ctxts; /* number of contexts currently open */
    113	__u64 sps_lenerrs; /* number of kernel packets where RHF != LRH len */
    114	__u64 sps_buffull;
    115	__u64 sps_hdrfull;
    116};
    117
    118extern struct hfi1_ib_stats hfi1_stats;
    119extern const struct pci_error_handlers hfi1_pci_err_handler;
    120
    121extern int num_driver_cntrs;
    122
    123/*
    124 * First-cut criterion for "device is active" is
    125 * two thousand dwords combined Tx, Rx traffic per
    126 * 5-second interval. SMA packets are 64 dwords,
    127 * and occur "a few per second", presumably each way.
    128 */
    129#define HFI1_TRAFFIC_ACTIVE_THRESHOLD (2000)
    130
    131/*
    132 * Below contains all data related to a single context (formerly called port).
    133 */
    134
    135struct hfi1_opcode_stats_perctx;
    136
    137struct ctxt_eager_bufs {
    138	struct eager_buffer {
    139		void *addr;
    140		dma_addr_t dma;
    141		ssize_t len;
    142	} *buffers;
    143	struct {
    144		void *addr;
    145		dma_addr_t dma;
    146	} *rcvtids;
    147	u32 size;                /* total size of eager buffers */
    148	u32 rcvtid_size;         /* size of each eager rcv tid */
    149	u16 count;               /* size of buffers array */
    150	u16 numbufs;             /* number of buffers allocated */
    151	u16 alloced;             /* number of rcvarray entries used */
    152	u16 threshold;           /* head update threshold */
    153};
    154
    155struct exp_tid_set {
    156	struct list_head list;
    157	u32 count;
    158};
    159
    160struct hfi1_ctxtdata;
    161typedef int (*intr_handler)(struct hfi1_ctxtdata *rcd, int data);
    162typedef void (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
    163
    164struct tid_queue {
    165	struct list_head queue_head;
    166			/* queue head for QP TID resource waiters */
    167	u32 enqueue;	/* count of tid enqueues */
    168	u32 dequeue;	/* count of tid dequeues */
    169};
    170
    171struct hfi1_ctxtdata {
    172	/* rcvhdrq base, needs mmap before useful */
    173	void *rcvhdrq;
    174	/* kernel virtual address where hdrqtail is updated */
    175	volatile __le64 *rcvhdrtail_kvaddr;
    176	/* so functions that need physical port can get it easily */
    177	struct hfi1_pportdata *ppd;
    178	/* so file ops can get at unit */
    179	struct hfi1_devdata *dd;
    180	/* this receive context's assigned PIO ACK send context */
    181	struct send_context *sc;
    182	/* per context recv functions */
    183	const rhf_rcv_function_ptr *rhf_rcv_function_map;
    184	/*
    185	 * The interrupt handler for a particular receive context can vary
    186	 * throughout it's lifetime. This is not a lock protected data member so
    187	 * it must be updated atomically and the prev and new value must always
    188	 * be valid. Worst case is we process an extra interrupt and up to 64
    189	 * packets with the wrong interrupt handler.
    190	 */
    191	intr_handler do_interrupt;
    192	/** fast handler after autoactive */
    193	intr_handler fast_handler;
    194	/** slow handler */
    195	intr_handler slow_handler;
    196	/* napi pointer assiociated with netdev */
    197	struct napi_struct *napi;
    198	/* verbs rx_stats per rcd */
    199	struct hfi1_opcode_stats_perctx *opstats;
    200	/* clear interrupt mask */
    201	u64 imask;
    202	/* ctxt rcvhdrq head offset */
    203	u32 head;
    204	/* number of rcvhdrq entries */
    205	u16 rcvhdrq_cnt;
    206	u8 ireg;	/* clear interrupt register */
    207	/* receive packet sequence counter */
    208	u8 seq_cnt;
    209	/* size of each of the rcvhdrq entries */
    210	u8 rcvhdrqentsize;
    211	/* offset of RHF within receive header entry */
    212	u8 rhf_offset;
    213	/* dynamic receive available interrupt timeout */
    214	u8 rcvavail_timeout;
    215	/* Indicates that this is vnic context */
    216	bool is_vnic;
    217	/* vnic queue index this context is mapped to */
    218	u8 vnic_q_idx;
    219	/* Is ASPM interrupt supported for this context */
    220	bool aspm_intr_supported;
    221	/* ASPM state (enabled/disabled) for this context */
    222	bool aspm_enabled;
    223	/* Is ASPM processing enabled for this context (in intr context) */
    224	bool aspm_intr_enable;
    225	struct ctxt_eager_bufs egrbufs;
    226	/* QPs waiting for context processing */
    227	struct list_head qp_wait_list;
    228	/* tid allocation lists */
    229	struct exp_tid_set tid_group_list;
    230	struct exp_tid_set tid_used_list;
    231	struct exp_tid_set tid_full_list;
    232
    233	/* Timer for re-enabling ASPM if interrupt activity quiets down */
    234	struct timer_list aspm_timer;
    235	/* per-context configuration flags */
    236	unsigned long flags;
    237	/* array of tid_groups */
    238	struct tid_group  *groups;
    239	/* mmap of hdrq, must fit in 44 bits */
    240	dma_addr_t rcvhdrq_dma;
    241	dma_addr_t rcvhdrqtailaddr_dma;
    242	/* Last interrupt timestamp */
    243	ktime_t aspm_ts_last_intr;
    244	/* Last timestamp at which we scheduled a timer for this context */
    245	ktime_t aspm_ts_timer_sched;
    246	/* Lock to serialize between intr, timer intr and user threads */
    247	spinlock_t aspm_lock;
    248	/* Reference count the base context usage */
    249	struct kref kref;
    250	/* numa node of this context */
    251	int numa_id;
    252	/* associated msix interrupt. */
    253	s16 msix_intr;
    254	/* job key */
    255	u16 jkey;
    256	/* number of RcvArray groups for this context. */
    257	u16 rcv_array_groups;
    258	/* index of first eager TID entry. */
    259	u16 eager_base;
    260	/* number of expected TID entries */
    261	u16 expected_count;
    262	/* index of first expected TID entry. */
    263	u16 expected_base;
    264	/* Device context index */
    265	u8 ctxt;
    266
    267	/* PSM Specific fields */
    268	/* lock protecting all Expected TID data */
    269	struct mutex exp_mutex;
    270	/* lock protecting all Expected TID data of kernel contexts */
    271	spinlock_t exp_lock;
    272	/* Queue for QP's waiting for HW TID flows */
    273	struct tid_queue flow_queue;
    274	/* Queue for QP's waiting for HW receive array entries */
    275	struct tid_queue rarr_queue;
    276	/* when waiting for rcv or pioavail */
    277	wait_queue_head_t wait;
    278	/* uuid from PSM */
    279	u8 uuid[16];
    280	/* same size as task_struct .comm[], command that opened context */
    281	char comm[TASK_COMM_LEN];
    282	/* Bitmask of in use context(s) */
    283	DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS);
    284	/* per-context event flags for fileops/intr communication */
    285	unsigned long event_flags;
    286	/* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
    287	void *subctxt_uregbase;
    288	/* An array of pages for the eager receive buffers * N */
    289	void *subctxt_rcvegrbuf;
    290	/* An array of pages for the eager header queue entries * N */
    291	void *subctxt_rcvhdr_base;
    292	/* total number of polled urgent packets */
    293	u32 urgent;
    294	/* saved total number of polled urgent packets for poll edge trigger */
    295	u32 urgent_poll;
    296	/* Type of packets or conditions we want to poll for */
    297	u16 poll_type;
    298	/* non-zero if ctxt is being shared. */
    299	u16 subctxt_id;
    300	/* The version of the library which opened this ctxt */
    301	u32 userversion;
    302	/*
    303	 * non-zero if ctxt can be shared, and defines the maximum number of
    304	 * sub-contexts for this device context.
    305	 */
    306	u8 subctxt_cnt;
    307
    308	/* Bit mask to track free TID RDMA HW flows */
    309	unsigned long flow_mask;
    310	struct tid_flow_state flows[RXE_NUM_TID_FLOWS];
    311};
    312
    313/**
    314 * rcvhdrq_size - return total size in bytes for header queue
    315 * @rcd: the receive context
    316 *
    317 * rcvhdrqentsize is in DWs, so we have to convert to bytes
    318 *
    319 */
    320static inline u32 rcvhdrq_size(struct hfi1_ctxtdata *rcd)
    321{
    322	return PAGE_ALIGN(rcd->rcvhdrq_cnt *
    323			  rcd->rcvhdrqentsize * sizeof(u32));
    324}
    325
    326/*
    327 * Represents a single packet at a high level. Put commonly computed things in
    328 * here so we do not have to keep doing them over and over. The rule of thumb is
    329 * if something is used one time to derive some value, store that something in
    330 * here. If it is used multiple times, then store the result of that derivation
    331 * in here.
    332 */
    333struct hfi1_packet {
    334	void *ebuf;
    335	void *hdr;
    336	void *payload;
    337	struct hfi1_ctxtdata *rcd;
    338	__le32 *rhf_addr;
    339	struct rvt_qp *qp;
    340	struct ib_other_headers *ohdr;
    341	struct ib_grh *grh;
    342	struct opa_16b_mgmt *mgmt;
    343	u64 rhf;
    344	u32 maxcnt;
    345	u32 rhqoff;
    346	u32 dlid;
    347	u32 slid;
    348	int numpkt;
    349	u16 tlen;
    350	s16 etail;
    351	u16 pkey;
    352	u8 hlen;
    353	u8 rsize;
    354	u8 updegr;
    355	u8 etype;
    356	u8 extra_byte;
    357	u8 pad;
    358	u8 sc;
    359	u8 sl;
    360	u8 opcode;
    361	bool migrated;
    362};
    363
    364/* Packet types */
    365#define HFI1_PKT_TYPE_9B  0
    366#define HFI1_PKT_TYPE_16B 1
    367
    368/*
    369 * OPA 16B Header
    370 */
    371#define OPA_16B_L4_MASK		0xFFull
    372#define OPA_16B_SC_MASK		0x1F00000ull
    373#define OPA_16B_SC_SHIFT	20
    374#define OPA_16B_LID_MASK	0xFFFFFull
    375#define OPA_16B_DLID_MASK	0xF000ull
    376#define OPA_16B_DLID_SHIFT	20
    377#define OPA_16B_DLID_HIGH_SHIFT	12
    378#define OPA_16B_SLID_MASK	0xF00ull
    379#define OPA_16B_SLID_SHIFT	20
    380#define OPA_16B_SLID_HIGH_SHIFT	8
    381#define OPA_16B_BECN_MASK       0x80000000ull
    382#define OPA_16B_BECN_SHIFT      31
    383#define OPA_16B_FECN_MASK       0x10000000ull
    384#define OPA_16B_FECN_SHIFT      28
    385#define OPA_16B_L2_MASK		0x60000000ull
    386#define OPA_16B_L2_SHIFT	29
    387#define OPA_16B_PKEY_MASK	0xFFFF0000ull
    388#define OPA_16B_PKEY_SHIFT	16
    389#define OPA_16B_LEN_MASK	0x7FF00000ull
    390#define OPA_16B_LEN_SHIFT	20
    391#define OPA_16B_RC_MASK		0xE000000ull
    392#define OPA_16B_RC_SHIFT	25
    393#define OPA_16B_AGE_MASK	0xFF0000ull
    394#define OPA_16B_AGE_SHIFT	16
    395#define OPA_16B_ENTROPY_MASK	0xFFFFull
    396
    397/*
    398 * OPA 16B L2/L4 Encodings
    399 */
    400#define OPA_16B_L4_9B		0x00
    401#define OPA_16B_L2_TYPE		0x02
    402#define OPA_16B_L4_FM		0x08
    403#define OPA_16B_L4_IB_LOCAL	0x09
    404#define OPA_16B_L4_IB_GLOBAL	0x0A
    405#define OPA_16B_L4_ETHR		OPA_VNIC_L4_ETHR
    406
    407/*
    408 * OPA 16B Management
    409 */
    410#define OPA_16B_L4_FM_PAD	3  /* fixed 3B pad */
    411#define OPA_16B_L4_FM_HLEN	24 /* 16B(16) + L4_FM(8) */
    412
    413static inline u8 hfi1_16B_get_l4(struct hfi1_16b_header *hdr)
    414{
    415	return (u8)(hdr->lrh[2] & OPA_16B_L4_MASK);
    416}
    417
    418static inline u8 hfi1_16B_get_sc(struct hfi1_16b_header *hdr)
    419{
    420	return (u8)((hdr->lrh[1] & OPA_16B_SC_MASK) >> OPA_16B_SC_SHIFT);
    421}
    422
    423static inline u32 hfi1_16B_get_dlid(struct hfi1_16b_header *hdr)
    424{
    425	return (u32)((hdr->lrh[1] & OPA_16B_LID_MASK) |
    426		     (((hdr->lrh[2] & OPA_16B_DLID_MASK) >>
    427		     OPA_16B_DLID_HIGH_SHIFT) << OPA_16B_DLID_SHIFT));
    428}
    429
    430static inline u32 hfi1_16B_get_slid(struct hfi1_16b_header *hdr)
    431{
    432	return (u32)((hdr->lrh[0] & OPA_16B_LID_MASK) |
    433		     (((hdr->lrh[2] & OPA_16B_SLID_MASK) >>
    434		     OPA_16B_SLID_HIGH_SHIFT) << OPA_16B_SLID_SHIFT));
    435}
    436
    437static inline u8 hfi1_16B_get_becn(struct hfi1_16b_header *hdr)
    438{
    439	return (u8)((hdr->lrh[0] & OPA_16B_BECN_MASK) >> OPA_16B_BECN_SHIFT);
    440}
    441
    442static inline u8 hfi1_16B_get_fecn(struct hfi1_16b_header *hdr)
    443{
    444	return (u8)((hdr->lrh[1] & OPA_16B_FECN_MASK) >> OPA_16B_FECN_SHIFT);
    445}
    446
    447static inline u8 hfi1_16B_get_l2(struct hfi1_16b_header *hdr)
    448{
    449	return (u8)((hdr->lrh[1] & OPA_16B_L2_MASK) >> OPA_16B_L2_SHIFT);
    450}
    451
    452static inline u16 hfi1_16B_get_pkey(struct hfi1_16b_header *hdr)
    453{
    454	return (u16)((hdr->lrh[2] & OPA_16B_PKEY_MASK) >> OPA_16B_PKEY_SHIFT);
    455}
    456
    457static inline u8 hfi1_16B_get_rc(struct hfi1_16b_header *hdr)
    458{
    459	return (u8)((hdr->lrh[1] & OPA_16B_RC_MASK) >> OPA_16B_RC_SHIFT);
    460}
    461
    462static inline u8 hfi1_16B_get_age(struct hfi1_16b_header *hdr)
    463{
    464	return (u8)((hdr->lrh[3] & OPA_16B_AGE_MASK) >> OPA_16B_AGE_SHIFT);
    465}
    466
    467static inline u16 hfi1_16B_get_len(struct hfi1_16b_header *hdr)
    468{
    469	return (u16)((hdr->lrh[0] & OPA_16B_LEN_MASK) >> OPA_16B_LEN_SHIFT);
    470}
    471
    472static inline u16 hfi1_16B_get_entropy(struct hfi1_16b_header *hdr)
    473{
    474	return (u16)(hdr->lrh[3] & OPA_16B_ENTROPY_MASK);
    475}
    476
    477#define OPA_16B_MAKE_QW(low_dw, high_dw) (((u64)(high_dw) << 32) | (low_dw))
    478
    479/*
    480 * BTH
    481 */
    482#define OPA_16B_BTH_PAD_MASK	7
    483static inline u8 hfi1_16B_bth_get_pad(struct ib_other_headers *ohdr)
    484{
    485	return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_PAD_SHIFT) &
    486		   OPA_16B_BTH_PAD_MASK);
    487}
    488
    489/*
    490 * 16B Management
    491 */
    492#define OPA_16B_MGMT_QPN_MASK	0xFFFFFF
    493static inline u32 hfi1_16B_get_dest_qpn(struct opa_16b_mgmt *mgmt)
    494{
    495	return be32_to_cpu(mgmt->dest_qpn) & OPA_16B_MGMT_QPN_MASK;
    496}
    497
    498static inline u32 hfi1_16B_get_src_qpn(struct opa_16b_mgmt *mgmt)
    499{
    500	return be32_to_cpu(mgmt->src_qpn) & OPA_16B_MGMT_QPN_MASK;
    501}
    502
    503static inline void hfi1_16B_set_qpn(struct opa_16b_mgmt *mgmt,
    504				    u32 dest_qp, u32 src_qp)
    505{
    506	mgmt->dest_qpn = cpu_to_be32(dest_qp & OPA_16B_MGMT_QPN_MASK);
    507	mgmt->src_qpn = cpu_to_be32(src_qp & OPA_16B_MGMT_QPN_MASK);
    508}
    509
    510/**
    511 * hfi1_get_rc_ohdr - get extended header
    512 * @opah - the opaheader
    513 */
    514static inline struct ib_other_headers *
    515hfi1_get_rc_ohdr(struct hfi1_opa_header *opah)
    516{
    517	struct ib_other_headers *ohdr;
    518	struct ib_header *hdr = NULL;
    519	struct hfi1_16b_header *hdr_16b = NULL;
    520
    521	/* Find out where the BTH is */
    522	if (opah->hdr_type == HFI1_PKT_TYPE_9B) {
    523		hdr = &opah->ibh;
    524		if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
    525			ohdr = &hdr->u.oth;
    526		else
    527			ohdr = &hdr->u.l.oth;
    528	} else {
    529		u8 l4;
    530
    531		hdr_16b = &opah->opah;
    532		l4  = hfi1_16B_get_l4(hdr_16b);
    533		if (l4 == OPA_16B_L4_IB_LOCAL)
    534			ohdr = &hdr_16b->u.oth;
    535		else
    536			ohdr = &hdr_16b->u.l.oth;
    537	}
    538	return ohdr;
    539}
    540
    541struct rvt_sge_state;
    542
    543/*
    544 * Get/Set IB link-level config parameters for f_get/set_ib_cfg()
    545 * Mostly for MADs that set or query link parameters, also ipath
    546 * config interfaces
    547 */
    548#define HFI1_IB_CFG_LIDLMC 0 /* LID (LS16b) and Mask (MS16b) */
    549#define HFI1_IB_CFG_LWID_DG_ENB 1 /* allowed Link-width downgrade */
    550#define HFI1_IB_CFG_LWID_ENB 2 /* allowed Link-width */
    551#define HFI1_IB_CFG_LWID 3 /* currently active Link-width */
    552#define HFI1_IB_CFG_SPD_ENB 4 /* allowed Link speeds */
    553#define HFI1_IB_CFG_SPD 5 /* current Link spd */
    554#define HFI1_IB_CFG_RXPOL_ENB 6 /* Auto-RX-polarity enable */
    555#define HFI1_IB_CFG_LREV_ENB 7 /* Auto-Lane-reversal enable */
    556#define HFI1_IB_CFG_LINKLATENCY 8 /* Link Latency (IB1.2 only) */
    557#define HFI1_IB_CFG_HRTBT 9 /* IB heartbeat off/enable/auto; DDR/QDR only */
    558#define HFI1_IB_CFG_OP_VLS 10 /* operational VLs */
    559#define HFI1_IB_CFG_VL_HIGH_CAP 11 /* num of VL high priority weights */
    560#define HFI1_IB_CFG_VL_LOW_CAP 12 /* num of VL low priority weights */
    561#define HFI1_IB_CFG_OVERRUN_THRESH 13 /* IB overrun threshold */
    562#define HFI1_IB_CFG_PHYERR_THRESH 14 /* IB PHY error threshold */
    563#define HFI1_IB_CFG_LINKDEFAULT 15 /* IB link default (sleep/poll) */
    564#define HFI1_IB_CFG_PKEYS 16 /* update partition keys */
    565#define HFI1_IB_CFG_MTU 17 /* update MTU in IBC */
    566#define HFI1_IB_CFG_VL_HIGH_LIMIT 19
    567#define HFI1_IB_CFG_PMA_TICKS 20 /* PMA sample tick resolution */
    568#define HFI1_IB_CFG_PORT 21 /* switch port we are connected to */
    569
    570/*
    571 * HFI or Host Link States
    572 *
    573 * These describe the states the driver thinks the logical and physical
    574 * states are in.  Used as an argument to set_link_state().  Implemented
    575 * as bits for easy multi-state checking.  The actual state can only be
    576 * one.
    577 */
    578#define __HLS_UP_INIT_BP	0
    579#define __HLS_UP_ARMED_BP	1
    580#define __HLS_UP_ACTIVE_BP	2
    581#define __HLS_DN_DOWNDEF_BP	3	/* link down default */
    582#define __HLS_DN_POLL_BP	4
    583#define __HLS_DN_DISABLE_BP	5
    584#define __HLS_DN_OFFLINE_BP	6
    585#define __HLS_VERIFY_CAP_BP	7
    586#define __HLS_GOING_UP_BP	8
    587#define __HLS_GOING_OFFLINE_BP  9
    588#define __HLS_LINK_COOLDOWN_BP 10
    589
    590#define HLS_UP_INIT	  BIT(__HLS_UP_INIT_BP)
    591#define HLS_UP_ARMED	  BIT(__HLS_UP_ARMED_BP)
    592#define HLS_UP_ACTIVE	  BIT(__HLS_UP_ACTIVE_BP)
    593#define HLS_DN_DOWNDEF	  BIT(__HLS_DN_DOWNDEF_BP) /* link down default */
    594#define HLS_DN_POLL	  BIT(__HLS_DN_POLL_BP)
    595#define HLS_DN_DISABLE	  BIT(__HLS_DN_DISABLE_BP)
    596#define HLS_DN_OFFLINE	  BIT(__HLS_DN_OFFLINE_BP)
    597#define HLS_VERIFY_CAP	  BIT(__HLS_VERIFY_CAP_BP)
    598#define HLS_GOING_UP	  BIT(__HLS_GOING_UP_BP)
    599#define HLS_GOING_OFFLINE BIT(__HLS_GOING_OFFLINE_BP)
    600#define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP)
    601
    602#define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE)
    603#define HLS_DOWN ~(HLS_UP)
    604
    605#define HLS_DEFAULT HLS_DN_POLL
    606
    607/* use this MTU size if none other is given */
    608#define HFI1_DEFAULT_ACTIVE_MTU 10240
    609/* use this MTU size as the default maximum */
    610#define HFI1_DEFAULT_MAX_MTU 10240
    611/* default partition key */
    612#define DEFAULT_PKEY 0xffff
    613
    614/*
    615 * Possible fabric manager config parameters for fm_{get,set}_table()
    616 */
    617#define FM_TBL_VL_HIGH_ARB		1 /* Get/set VL high prio weights */
    618#define FM_TBL_VL_LOW_ARB		2 /* Get/set VL low prio weights */
    619#define FM_TBL_BUFFER_CONTROL		3 /* Get/set Buffer Control */
    620#define FM_TBL_SC2VLNT			4 /* Get/set SC->VLnt */
    621#define FM_TBL_VL_PREEMPT_ELEMS		5 /* Get (no set) VL preempt elems */
    622#define FM_TBL_VL_PREEMPT_MATRIX	6 /* Get (no set) VL preempt matrix */
    623
    624/*
    625 * Possible "operations" for f_rcvctrl(ppd, op, ctxt)
    626 * these are bits so they can be combined, e.g.
    627 * HFI1_RCVCTRL_INTRAVAIL_ENB | HFI1_RCVCTRL_CTXT_ENB
    628 */
    629#define HFI1_RCVCTRL_TAILUPD_ENB 0x01
    630#define HFI1_RCVCTRL_TAILUPD_DIS 0x02
    631#define HFI1_RCVCTRL_CTXT_ENB 0x04
    632#define HFI1_RCVCTRL_CTXT_DIS 0x08
    633#define HFI1_RCVCTRL_INTRAVAIL_ENB 0x10
    634#define HFI1_RCVCTRL_INTRAVAIL_DIS 0x20
    635#define HFI1_RCVCTRL_PKEY_ENB 0x40  /* Note, default is enabled */
    636#define HFI1_RCVCTRL_PKEY_DIS 0x80
    637#define HFI1_RCVCTRL_TIDFLOW_ENB 0x0400
    638#define HFI1_RCVCTRL_TIDFLOW_DIS 0x0800
    639#define HFI1_RCVCTRL_ONE_PKT_EGR_ENB 0x1000
    640#define HFI1_RCVCTRL_ONE_PKT_EGR_DIS 0x2000
    641#define HFI1_RCVCTRL_NO_RHQ_DROP_ENB 0x4000
    642#define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000
    643#define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000
    644#define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000
    645#define HFI1_RCVCTRL_URGENT_ENB 0x40000
    646#define HFI1_RCVCTRL_URGENT_DIS 0x80000
    647
    648/* partition enforcement flags */
    649#define HFI1_PART_ENFORCE_IN	0x1
    650#define HFI1_PART_ENFORCE_OUT	0x2
    651
    652/* how often we check for synthetic counter wrap around */
    653#define SYNTH_CNT_TIME 3
    654
    655/* Counter flags */
    656#define CNTR_NORMAL		0x0 /* Normal counters, just read register */
    657#define CNTR_SYNTH		0x1 /* Synthetic counters, saturate at all 1s */
    658#define CNTR_DISABLED		0x2 /* Disable this counter */
    659#define CNTR_32BIT		0x4 /* Simulate 64 bits for this counter */
    660#define CNTR_VL			0x8 /* Per VL counter */
    661#define CNTR_SDMA              0x10
    662#define CNTR_INVALID_VL		-1  /* Specifies invalid VL */
    663#define CNTR_MODE_W		0x0
    664#define CNTR_MODE_R		0x1
    665
    666/* VLs Supported/Operational */
    667#define HFI1_MIN_VLS_SUPPORTED 1
    668#define HFI1_MAX_VLS_SUPPORTED 8
    669
    670#define HFI1_GUIDS_PER_PORT  5
    671#define HFI1_PORT_GUID_INDEX 0
    672
    673static inline void incr_cntr64(u64 *cntr)
    674{
    675	if (*cntr < (u64)-1LL)
    676		(*cntr)++;
    677}
    678
    679#define MAX_NAME_SIZE 64
    680struct hfi1_msix_entry {
    681	enum irq_type type;
    682	int irq;
    683	void *arg;
    684	cpumask_t mask;
    685	struct irq_affinity_notify notify;
    686};
    687
    688struct hfi1_msix_info {
    689	/* lock to synchronize in_use_msix access */
    690	spinlock_t msix_lock;
    691	DECLARE_BITMAP(in_use_msix, CCE_NUM_MSIX_VECTORS);
    692	struct hfi1_msix_entry *msix_entries;
    693	u16 max_requested;
    694};
    695
    696/* per-SL CCA information */
    697struct cca_timer {
    698	struct hrtimer hrtimer;
    699	struct hfi1_pportdata *ppd; /* read-only */
    700	int sl; /* read-only */
    701	u16 ccti; /* read/write - current value of CCTI */
    702};
    703
    704struct link_down_reason {
    705	/*
    706	 * SMA-facing value.  Should be set from .latest when
    707	 * HLS_UP_* -> HLS_DN_* transition actually occurs.
    708	 */
    709	u8 sma;
    710	u8 latest;
    711};
    712
    713enum {
    714	LO_PRIO_TABLE,
    715	HI_PRIO_TABLE,
    716	MAX_PRIO_TABLE
    717};
    718
    719struct vl_arb_cache {
    720	/* protect vl arb cache */
    721	spinlock_t lock;
    722	struct ib_vl_weight_elem table[VL_ARB_TABLE_SIZE];
    723};
    724
    725/*
    726 * The structure below encapsulates data relevant to a physical IB Port.
    727 * Current chips support only one such port, but the separation
    728 * clarifies things a bit. Note that to conform to IB conventions,
    729 * port-numbers are one-based. The first or only port is port1.
    730 */
    731struct hfi1_pportdata {
    732	struct hfi1_ibport ibport_data;
    733
    734	struct hfi1_devdata *dd;
    735
    736	/* PHY support */
    737	struct qsfp_data qsfp_info;
    738	/* Values for SI tuning of SerDes */
    739	u32 port_type;
    740	u32 tx_preset_eq;
    741	u32 tx_preset_noeq;
    742	u32 rx_preset;
    743	u8  local_atten;
    744	u8  remote_atten;
    745	u8  default_atten;
    746	u8  max_power_class;
    747
    748	/* did we read platform config from scratch registers? */
    749	bool config_from_scratch;
    750
    751	/* GUIDs for this interface, in host order, guids[0] is a port guid */
    752	u64 guids[HFI1_GUIDS_PER_PORT];
    753
    754	/* GUID for peer interface, in host order */
    755	u64 neighbor_guid;
    756
    757	/* up or down physical link state */
    758	u32 linkup;
    759
    760	/*
    761	 * this address is mapped read-only into user processes so they can
    762	 * get status cheaply, whenever they want.  One qword of status per port
    763	 */
    764	u64 *statusp;
    765
    766	/* SendDMA related entries */
    767
    768	struct workqueue_struct *hfi1_wq;
    769	struct workqueue_struct *link_wq;
    770
    771	/* move out of interrupt context */
    772	struct work_struct link_vc_work;
    773	struct work_struct link_up_work;
    774	struct work_struct link_down_work;
    775	struct work_struct sma_message_work;
    776	struct work_struct freeze_work;
    777	struct work_struct link_downgrade_work;
    778	struct work_struct link_bounce_work;
    779	struct delayed_work start_link_work;
    780	/* host link state variables */
    781	struct mutex hls_lock;
    782	u32 host_link_state;
    783
    784	/* these are the "32 bit" regs */
    785
    786	u32 ibmtu; /* The MTU programmed for this unit */
    787	/*
    788	 * Current max size IB packet (in bytes) including IB headers, that
    789	 * we can send. Changes when ibmtu changes.
    790	 */
    791	u32 ibmaxlen;
    792	u32 current_egress_rate; /* units [10^6 bits/sec] */
    793	/* LID programmed for this instance */
    794	u32 lid;
    795	/* list of pkeys programmed; 0 if not set */
    796	u16 pkeys[MAX_PKEY_VALUES];
    797	u16 link_width_supported;
    798	u16 link_width_downgrade_supported;
    799	u16 link_speed_supported;
    800	u16 link_width_enabled;
    801	u16 link_width_downgrade_enabled;
    802	u16 link_speed_enabled;
    803	u16 link_width_active;
    804	u16 link_width_downgrade_tx_active;
    805	u16 link_width_downgrade_rx_active;
    806	u16 link_speed_active;
    807	u8 vls_supported;
    808	u8 vls_operational;
    809	u8 actual_vls_operational;
    810	/* LID mask control */
    811	u8 lmc;
    812	/* Rx Polarity inversion (compensate for ~tx on partner) */
    813	u8 rx_pol_inv;
    814
    815	u8 hw_pidx;     /* physical port index */
    816	u32 port;        /* IB port number and index into dd->pports - 1 */
    817	/* type of neighbor node */
    818	u8 neighbor_type;
    819	u8 neighbor_normal;
    820	u8 neighbor_fm_security; /* 1 if firmware checking is disabled */
    821	u8 neighbor_port_number;
    822	u8 is_sm_config_started;
    823	u8 offline_disabled_reason;
    824	u8 is_active_optimize_enabled;
    825	u8 driver_link_ready;	/* driver ready for active link */
    826	u8 link_enabled;	/* link enabled? */
    827	u8 linkinit_reason;
    828	u8 local_tx_rate;	/* rate given to 8051 firmware */
    829	u8 qsfp_retry_count;
    830
    831	/* placeholders for IB MAD packet settings */
    832	u8 overrun_threshold;
    833	u8 phy_error_threshold;
    834	unsigned int is_link_down_queued;
    835
    836	/* Used to override LED behavior for things like maintenance beaconing*/
    837	/*
    838	 * Alternates per phase of blink
    839	 * [0] holds LED off duration, [1] holds LED on duration
    840	 */
    841	unsigned long led_override_vals[2];
    842	u8 led_override_phase; /* LSB picks from vals[] */
    843	atomic_t led_override_timer_active;
    844	/* Used to flash LEDs in override mode */
    845	struct timer_list led_override_timer;
    846
    847	u32 sm_trap_qp;
    848	u32 sa_qp;
    849
    850	/*
    851	 * cca_timer_lock protects access to the per-SL cca_timer
    852	 * structures (specifically the ccti member).
    853	 */
    854	spinlock_t cca_timer_lock ____cacheline_aligned_in_smp;
    855	struct cca_timer cca_timer[OPA_MAX_SLS];
    856
    857	/* List of congestion control table entries */
    858	struct ib_cc_table_entry_shadow ccti_entries[CC_TABLE_SHADOW_MAX];
    859
    860	/* congestion entries, each entry corresponding to a SL */
    861	struct opa_congestion_setting_entry_shadow
    862		congestion_entries[OPA_MAX_SLS];
    863
    864	/*
    865	 * cc_state_lock protects (write) access to the per-port
    866	 * struct cc_state.
    867	 */
    868	spinlock_t cc_state_lock ____cacheline_aligned_in_smp;
    869
    870	struct cc_state __rcu *cc_state;
    871
    872	/* Total number of congestion control table entries */
    873	u16 total_cct_entry;
    874
    875	/* Bit map identifying service level */
    876	u32 cc_sl_control_map;
    877
    878	/* CA's max number of 64 entry units in the congestion control table */
    879	u8 cc_max_table_entries;
    880
    881	/*
    882	 * begin congestion log related entries
    883	 * cc_log_lock protects all congestion log related data
    884	 */
    885	spinlock_t cc_log_lock ____cacheline_aligned_in_smp;
    886	u8 threshold_cong_event_map[OPA_MAX_SLS / 8];
    887	u16 threshold_event_counter;
    888	struct opa_hfi1_cong_log_event_internal cc_events[OPA_CONG_LOG_ELEMS];
    889	int cc_log_idx; /* index for logging events */
    890	int cc_mad_idx; /* index for reporting events */
    891	/* end congestion log related entries */
    892
    893	struct vl_arb_cache vl_arb_cache[MAX_PRIO_TABLE];
    894
    895	/* port relative counter buffer */
    896	u64 *cntrs;
    897	/* port relative synthetic counter buffer */
    898	u64 *scntrs;
    899	/* port_xmit_discards are synthesized from different egress errors */
    900	u64 port_xmit_discards;
    901	u64 port_xmit_discards_vl[C_VL_COUNT];
    902	u64 port_xmit_constraint_errors;
    903	u64 port_rcv_constraint_errors;
    904	/* count of 'link_err' interrupts from DC */
    905	u64 link_downed;
    906	/* number of times link retrained successfully */
    907	u64 link_up;
    908	/* number of times a link unknown frame was reported */
    909	u64 unknown_frame_count;
    910	/* port_ltp_crc_mode is returned in 'portinfo' MADs */
    911	u16 port_ltp_crc_mode;
    912	/* port_crc_mode_enabled is the crc we support */
    913	u8 port_crc_mode_enabled;
    914	/* mgmt_allowed is also returned in 'portinfo' MADs */
    915	u8 mgmt_allowed;
    916	u8 part_enforce; /* partition enforcement flags */
    917	struct link_down_reason local_link_down_reason;
    918	struct link_down_reason neigh_link_down_reason;
    919	/* Value to be sent to link peer on LinkDown .*/
    920	u8 remote_link_down_reason;
    921	/* Error events that will cause a port bounce. */
    922	u32 port_error_action;
    923	struct work_struct linkstate_active_work;
    924	/* Does this port need to prescan for FECNs */
    925	bool cc_prescan;
    926	/*
    927	 * Sample sendWaitCnt & sendWaitVlCnt during link transition
    928	 * and counter request.
    929	 */
    930	u64 port_vl_xmit_wait_last[C_VL_COUNT + 1];
    931	u16 prev_link_width;
    932	u64 vl_xmit_flit_cnt[C_VL_COUNT + 1];
    933};
    934
    935typedef void (*opcode_handler)(struct hfi1_packet *packet);
    936typedef void (*hfi1_make_req)(struct rvt_qp *qp,
    937			      struct hfi1_pkt_state *ps,
    938			      struct rvt_swqe *wqe);
    939extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[];
    940extern const rhf_rcv_function_ptr netdev_rhf_rcv_functions[];
    941
    942/* return values for the RHF receive functions */
    943#define RHF_RCV_CONTINUE  0	/* keep going */
    944#define RHF_RCV_DONE	  1	/* stop, this packet processed */
    945#define RHF_RCV_REPROCESS 2	/* stop. retain this packet */
    946
    947struct rcv_array_data {
    948	u16 ngroups;
    949	u16 nctxt_extra;
    950	u8 group_size;
    951};
    952
    953struct per_vl_data {
    954	u16 mtu;
    955	struct send_context *sc;
    956};
    957
    958/* 16 to directly index */
    959#define PER_VL_SEND_CONTEXTS 16
    960
    961struct err_info_rcvport {
    962	u8 status_and_code;
    963	u64 packet_flit1;
    964	u64 packet_flit2;
    965};
    966
    967struct err_info_constraint {
    968	u8 status;
    969	u16 pkey;
    970	u32 slid;
    971};
    972
    973struct hfi1_temp {
    974	unsigned int curr;       /* current temperature */
    975	unsigned int lo_lim;     /* low temperature limit */
    976	unsigned int hi_lim;     /* high temperature limit */
    977	unsigned int crit_lim;   /* critical temperature limit */
    978	u8 triggers;      /* temperature triggers */
    979};
    980
    981struct hfi1_i2c_bus {
    982	struct hfi1_devdata *controlling_dd; /* current controlling device */
    983	struct i2c_adapter adapter;	/* bus details */
    984	struct i2c_algo_bit_data algo;	/* bus algorithm details */
    985	int num;			/* bus number, 0 or 1 */
    986};
    987
    988/* common data between shared ASIC HFIs */
    989struct hfi1_asic_data {
    990	struct hfi1_devdata *dds[2];	/* back pointers */
    991	struct mutex asic_resource_mutex;
    992	struct hfi1_i2c_bus *i2c_bus0;
    993	struct hfi1_i2c_bus *i2c_bus1;
    994};
    995
    996/* sizes for both the QP and RSM map tables */
    997#define NUM_MAP_ENTRIES	 256
    998#define NUM_MAP_REGS      32
    999
   1000/* Virtual NIC information */
   1001struct hfi1_vnic_data {
   1002	struct kmem_cache *txreq_cache;
   1003	u8 num_vports;
   1004};
   1005
   1006struct hfi1_vnic_vport_info;
   1007
   1008/* device data struct now contains only "general per-device" info.
   1009 * fields related to a physical IB port are in a hfi1_pportdata struct.
   1010 */
   1011struct sdma_engine;
   1012struct sdma_vl_map;
   1013
   1014#define BOARD_VERS_MAX 96 /* how long the version string can be */
   1015#define SERIAL_MAX 16 /* length of the serial number */
   1016
   1017typedef int (*send_routine)(struct rvt_qp *, struct hfi1_pkt_state *, u64);
   1018struct hfi1_netdev_rx;
   1019struct hfi1_devdata {
   1020	struct hfi1_ibdev verbs_dev;     /* must be first */
   1021	/* pointers to related structs for this device */
   1022	/* pci access data structure */
   1023	struct pci_dev *pcidev;
   1024	struct cdev user_cdev;
   1025	struct cdev diag_cdev;
   1026	struct cdev ui_cdev;
   1027	struct device *user_device;
   1028	struct device *diag_device;
   1029	struct device *ui_device;
   1030
   1031	/* first mapping up to RcvArray */
   1032	u8 __iomem *kregbase1;
   1033	resource_size_t physaddr;
   1034
   1035	/* second uncached mapping from RcvArray to pio send buffers */
   1036	u8 __iomem *kregbase2;
   1037	/* for detecting offset above kregbase2 address */
   1038	u32 base2_start;
   1039
   1040	/* Per VL data. Enough for all VLs but not all elements are set/used. */
   1041	struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
   1042	/* send context data */
   1043	struct send_context_info *send_contexts;
   1044	/* map hardware send contexts to software index */
   1045	u8 *hw_to_sw;
   1046	/* spinlock for allocating and releasing send context resources */
   1047	spinlock_t sc_lock;
   1048	/* lock for pio_map */
   1049	spinlock_t pio_map_lock;
   1050	/* Send Context initialization lock. */
   1051	spinlock_t sc_init_lock;
   1052	/* lock for sdma_map */
   1053	spinlock_t                          sde_map_lock;
   1054	/* array of kernel send contexts */
   1055	struct send_context **kernel_send_context;
   1056	/* array of vl maps */
   1057	struct pio_vl_map __rcu *pio_map;
   1058	/* default flags to last descriptor */
   1059	u64 default_desc1;
   1060
   1061	/* fields common to all SDMA engines */
   1062
   1063	volatile __le64                    *sdma_heads_dma; /* DMA'ed by chip */
   1064	dma_addr_t                          sdma_heads_phys;
   1065	void                               *sdma_pad_dma; /* DMA'ed by chip */
   1066	dma_addr_t                          sdma_pad_phys;
   1067	/* for deallocation */
   1068	size_t                              sdma_heads_size;
   1069	/* num used */
   1070	u32                                 num_sdma;
   1071	/* array of engines sized by num_sdma */
   1072	struct sdma_engine                 *per_sdma;
   1073	/* array of vl maps */
   1074	struct sdma_vl_map __rcu           *sdma_map;
   1075	/* SPC freeze waitqueue and variable */
   1076	wait_queue_head_t		  sdma_unfreeze_wq;
   1077	atomic_t			  sdma_unfreeze_count;
   1078
   1079	u32 lcb_access_count;		/* count of LCB users */
   1080
   1081	/* common data between shared ASIC HFIs in this OS */
   1082	struct hfi1_asic_data *asic_data;
   1083
   1084	/* mem-mapped pointer to base of PIO buffers */
   1085	void __iomem *piobase;
   1086	/*
   1087	 * write-combining mem-mapped pointer to base of RcvArray
   1088	 * memory.
   1089	 */
   1090	void __iomem *rcvarray_wc;
   1091	/*
   1092	 * credit return base - a per-NUMA range of DMA address that
   1093	 * the chip will use to update the per-context free counter
   1094	 */
   1095	struct credit_return_base *cr_base;
   1096
   1097	/* send context numbers and sizes for each type */
   1098	struct sc_config_sizes sc_sizes[SC_MAX];
   1099
   1100	char *boardname; /* human readable board info */
   1101
   1102	u64 ctx0_seq_drop;
   1103
   1104	/* reset value */
   1105	u64 z_int_counter;
   1106	u64 z_rcv_limit;
   1107	u64 z_send_schedule;
   1108
   1109	u64 __percpu *send_schedule;
   1110	/* number of reserved contexts for netdev usage */
   1111	u16 num_netdev_contexts;
   1112	/* number of receive contexts in use by the driver */
   1113	u32 num_rcv_contexts;
   1114	/* number of pio send contexts in use by the driver */
   1115	u32 num_send_contexts;
   1116	/*
   1117	 * number of ctxts available for PSM open
   1118	 */
   1119	u32 freectxts;
   1120	/* total number of available user/PSM contexts */
   1121	u32 num_user_contexts;
   1122	/* base receive interrupt timeout, in CSR units */
   1123	u32 rcv_intr_timeout_csr;
   1124
   1125	spinlock_t sendctrl_lock; /* protect changes to SendCtrl */
   1126	spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
   1127	spinlock_t uctxt_lock; /* protect rcd changes */
   1128	struct mutex dc8051_lock; /* exclusive access to 8051 */
   1129	struct workqueue_struct *update_cntr_wq;
   1130	struct work_struct update_cntr_work;
   1131	/* exclusive access to 8051 memory */
   1132	spinlock_t dc8051_memlock;
   1133	int dc8051_timed_out;	/* remember if the 8051 timed out */
   1134	/*
   1135	 * A page that will hold event notification bitmaps for all
   1136	 * contexts. This page will be mapped into all processes.
   1137	 */
   1138	unsigned long *events;
   1139	/*
   1140	 * per unit status, see also portdata statusp
   1141	 * mapped read-only into user processes so they can get unit and
   1142	 * IB link status cheaply
   1143	 */
   1144	struct hfi1_status *status;
   1145
   1146	/* revision register shadow */
   1147	u64 revision;
   1148	/* Base GUID for device (network order) */
   1149	u64 base_guid;
   1150
   1151	/* both sides of the PCIe link are gen3 capable */
   1152	u8 link_gen3_capable;
   1153	u8 dc_shutdown;
   1154	/* localbus width (1, 2,4,8,16,32) from config space  */
   1155	u32 lbus_width;
   1156	/* localbus speed in MHz */
   1157	u32 lbus_speed;
   1158	int unit; /* unit # of this chip */
   1159	int node; /* home node of this chip */
   1160
   1161	/* save these PCI fields to restore after a reset */
   1162	u32 pcibar0;
   1163	u32 pcibar1;
   1164	u32 pci_rom;
   1165	u16 pci_command;
   1166	u16 pcie_devctl;
   1167	u16 pcie_lnkctl;
   1168	u16 pcie_devctl2;
   1169	u32 pci_msix0;
   1170	u32 pci_tph2;
   1171
   1172	/*
   1173	 * ASCII serial number, from flash, large enough for original
   1174	 * all digit strings, and longer serial number format
   1175	 */
   1176	u8 serial[SERIAL_MAX];
   1177	/* human readable board version */
   1178	u8 boardversion[BOARD_VERS_MAX];
   1179	u8 lbus_info[32]; /* human readable localbus info */
   1180	/* chip major rev, from CceRevision */
   1181	u8 majrev;
   1182	/* chip minor rev, from CceRevision */
   1183	u8 minrev;
   1184	/* hardware ID */
   1185	u8 hfi1_id;
   1186	/* implementation code */
   1187	u8 icode;
   1188	/* vAU of this device */
   1189	u8 vau;
   1190	/* vCU of this device */
   1191	u8 vcu;
   1192	/* link credits of this device */
   1193	u16 link_credits;
   1194	/* initial vl15 credits to use */
   1195	u16 vl15_init;
   1196
   1197	/*
   1198	 * Cached value for vl15buf, read during verify cap interrupt. VL15
   1199	 * credits are to be kept at 0 and set when handling the link-up
   1200	 * interrupt. This removes the possibility of receiving VL15 MAD
   1201	 * packets before this HFI is ready.
   1202	 */
   1203	u16 vl15buf_cached;
   1204
   1205	/* Misc small ints */
   1206	u8 n_krcv_queues;
   1207	u8 qos_shift;
   1208
   1209	u16 irev;	/* implementation revision */
   1210	u32 dc8051_ver; /* 8051 firmware version */
   1211
   1212	spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
   1213	struct platform_config platform_config;
   1214	struct platform_config_cache pcfg_cache;
   1215
   1216	struct diag_client *diag_client;
   1217
   1218	/* general interrupt: mask of handled interrupts */
   1219	u64 gi_mask[CCE_NUM_INT_CSRS];
   1220
   1221	struct rcv_array_data rcv_entries;
   1222
   1223	/* cycle length of PS* counters in HW (in picoseconds) */
   1224	u16 psxmitwait_check_rate;
   1225
   1226	/*
   1227	 * 64 bit synthetic counters
   1228	 */
   1229	struct timer_list synth_stats_timer;
   1230
   1231	/* MSI-X information */
   1232	struct hfi1_msix_info msix_info;
   1233
   1234	/*
   1235	 * device counters
   1236	 */
   1237	char *cntrnames;
   1238	size_t cntrnameslen;
   1239	size_t ndevcntrs;
   1240	u64 *cntrs;
   1241	u64 *scntrs;
   1242
   1243	/*
   1244	 * remembered values for synthetic counters
   1245	 */
   1246	u64 last_tx;
   1247	u64 last_rx;
   1248
   1249	/*
   1250	 * per-port counters
   1251	 */
   1252	size_t nportcntrs;
   1253	char *portcntrnames;
   1254	size_t portcntrnameslen;
   1255
   1256	struct err_info_rcvport err_info_rcvport;
   1257	struct err_info_constraint err_info_rcv_constraint;
   1258	struct err_info_constraint err_info_xmit_constraint;
   1259
   1260	atomic_t drop_packet;
   1261	bool do_drop;
   1262	u8 err_info_uncorrectable;
   1263	u8 err_info_fmconfig;
   1264
   1265	/*
   1266	 * Software counters for the status bits defined by the
   1267	 * associated error status registers
   1268	 */
   1269	u64 cce_err_status_cnt[NUM_CCE_ERR_STATUS_COUNTERS];
   1270	u64 rcv_err_status_cnt[NUM_RCV_ERR_STATUS_COUNTERS];
   1271	u64 misc_err_status_cnt[NUM_MISC_ERR_STATUS_COUNTERS];
   1272	u64 send_pio_err_status_cnt[NUM_SEND_PIO_ERR_STATUS_COUNTERS];
   1273	u64 send_dma_err_status_cnt[NUM_SEND_DMA_ERR_STATUS_COUNTERS];
   1274	u64 send_egress_err_status_cnt[NUM_SEND_EGRESS_ERR_STATUS_COUNTERS];
   1275	u64 send_err_status_cnt[NUM_SEND_ERR_STATUS_COUNTERS];
   1276
   1277	/* Software counter that spans all contexts */
   1278	u64 sw_ctxt_err_status_cnt[NUM_SEND_CTXT_ERR_STATUS_COUNTERS];
   1279	/* Software counter that spans all DMA engines */
   1280	u64 sw_send_dma_eng_err_status_cnt[
   1281		NUM_SEND_DMA_ENG_ERR_STATUS_COUNTERS];
   1282	/* Software counter that aggregates all cce_err_status errors */
   1283	u64 sw_cce_err_status_aggregate;
   1284	/* Software counter that aggregates all bypass packet rcv errors */
   1285	u64 sw_rcv_bypass_packet_errors;
   1286
   1287	/* Save the enabled LCB error bits */
   1288	u64 lcb_err_en;
   1289	struct cpu_mask_set *comp_vect;
   1290	int *comp_vect_mappings;
   1291	u32 comp_vect_possible_cpus;
   1292
   1293	/*
   1294	 * Capability to have different send engines simply by changing a
   1295	 * pointer value.
   1296	 */
   1297	send_routine process_pio_send ____cacheline_aligned_in_smp;
   1298	send_routine process_dma_send;
   1299	void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
   1300				u64 pbc, const void *from, size_t count);
   1301	int (*process_vnic_dma_send)(struct hfi1_devdata *dd, u8 q_idx,
   1302				     struct hfi1_vnic_vport_info *vinfo,
   1303				     struct sk_buff *skb, u64 pbc, u8 plen);
   1304	/* hfi1_pportdata, points to array of (physical) port-specific
   1305	 * data structs, indexed by pidx (0..n-1)
   1306	 */
   1307	struct hfi1_pportdata *pport;
   1308	/* receive context data */
   1309	struct hfi1_ctxtdata **rcd;
   1310	u64 __percpu *int_counter;
   1311	/* verbs tx opcode stats */
   1312	struct hfi1_opcode_stats_perctx __percpu *tx_opstats;
   1313	/* device (not port) flags, basically device capabilities */
   1314	u16 flags;
   1315	/* Number of physical ports available */
   1316	u8 num_pports;
   1317	/* Lowest context number which can be used by user processes or VNIC */
   1318	u8 first_dyn_alloc_ctxt;
   1319	/* adding a new field here would make it part of this cacheline */
   1320
   1321	/* seqlock for sc2vl */
   1322	seqlock_t sc2vl_lock ____cacheline_aligned_in_smp;
   1323	u64 sc2vl[4];
   1324	u64 __percpu *rcv_limit;
   1325	/* adding a new field here would make it part of this cacheline */
   1326
   1327	/* OUI comes from the HW. Used everywhere as 3 separate bytes. */
   1328	u8 oui1;
   1329	u8 oui2;
   1330	u8 oui3;
   1331
   1332	/* Timer and counter used to detect RcvBufOvflCnt changes */
   1333	struct timer_list rcverr_timer;
   1334
   1335	wait_queue_head_t event_queue;
   1336
   1337	/* receive context tail dummy address */
   1338	__le64 *rcvhdrtail_dummy_kvaddr;
   1339	dma_addr_t rcvhdrtail_dummy_dma;
   1340
   1341	u32 rcv_ovfl_cnt;
   1342	/* Serialize ASPM enable/disable between multiple verbs contexts */
   1343	spinlock_t aspm_lock;
   1344	/* Number of verbs contexts which have disabled ASPM */
   1345	atomic_t aspm_disabled_cnt;
   1346	/* Keeps track of user space clients */
   1347	refcount_t user_refcount;
   1348	/* Used to wait for outstanding user space clients before dev removal */
   1349	struct completion user_comp;
   1350
   1351	bool eprom_available;	/* true if EPROM is available for this device */
   1352	bool aspm_supported;	/* Does HW support ASPM */
   1353	bool aspm_enabled;	/* ASPM state: enabled/disabled */
   1354	struct rhashtable *sdma_rht;
   1355
   1356	/* vnic data */
   1357	struct hfi1_vnic_data vnic;
   1358	/* Lock to protect IRQ SRC register access */
   1359	spinlock_t irq_src_lock;
   1360	int vnic_num_vports;
   1361	struct hfi1_netdev_rx *netdev_rx;
   1362	struct hfi1_affinity_node *affinity_entry;
   1363
   1364	/* Keeps track of IPoIB RSM rule users */
   1365	atomic_t ipoib_rsm_usr_num;
   1366};
   1367
   1368/* 8051 firmware version helper */
   1369#define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c))
   1370#define dc8051_ver_maj(a) (((a) & 0xff0000) >> 16)
   1371#define dc8051_ver_min(a) (((a) & 0x00ff00) >> 8)
   1372#define dc8051_ver_patch(a) ((a) & 0x0000ff)
   1373
   1374/* f_put_tid types */
   1375#define PT_EXPECTED       0
   1376#define PT_EAGER          1
   1377#define PT_INVALID_FLUSH  2
   1378#define PT_INVALID        3
   1379
   1380struct tid_rb_node;
   1381struct mmu_rb_node;
   1382struct mmu_rb_handler;
   1383
   1384/* Private data for file operations */
   1385struct hfi1_filedata {
   1386	struct srcu_struct pq_srcu;
   1387	struct hfi1_devdata *dd;
   1388	struct hfi1_ctxtdata *uctxt;
   1389	struct hfi1_user_sdma_comp_q *cq;
   1390	/* update side lock for SRCU */
   1391	spinlock_t pq_rcu_lock;
   1392	struct hfi1_user_sdma_pkt_q __rcu *pq;
   1393	u16 subctxt;
   1394	/* for cpu affinity; -1 if none */
   1395	int rec_cpu_num;
   1396	u32 tid_n_pinned;
   1397	bool use_mn;
   1398	struct tid_rb_node **entry_to_rb;
   1399	spinlock_t tid_lock; /* protect tid_[limit,used] counters */
   1400	u32 tid_limit;
   1401	u32 tid_used;
   1402	u32 *invalid_tids;
   1403	u32 invalid_tid_idx;
   1404	/* protect invalid_tids array and invalid_tid_idx */
   1405	spinlock_t invalid_lock;
   1406};
   1407
   1408extern struct xarray hfi1_dev_table;
   1409struct hfi1_devdata *hfi1_lookup(int unit);
   1410
   1411static inline unsigned long uctxt_offset(struct hfi1_ctxtdata *uctxt)
   1412{
   1413	return (uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
   1414		HFI1_MAX_SHARED_CTXTS;
   1415}
   1416
   1417int hfi1_init(struct hfi1_devdata *dd, int reinit);
   1418int hfi1_count_active_units(void);
   1419
   1420int hfi1_diag_add(struct hfi1_devdata *dd);
   1421void hfi1_diag_remove(struct hfi1_devdata *dd);
   1422void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup);
   1423
   1424void handle_user_interrupt(struct hfi1_ctxtdata *rcd);
   1425
   1426int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
   1427int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd);
   1428int hfi1_create_kctxts(struct hfi1_devdata *dd);
   1429int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
   1430			 struct hfi1_ctxtdata **rcd);
   1431void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd);
   1432void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
   1433			 struct hfi1_devdata *dd, u8 hw_pidx, u32 port);
   1434void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
   1435int hfi1_rcd_put(struct hfi1_ctxtdata *rcd);
   1436int hfi1_rcd_get(struct hfi1_ctxtdata *rcd);
   1437struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd,
   1438						 u16 ctxt);
   1439struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt);
   1440int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
   1441int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
   1442int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
   1443int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget);
   1444int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget);
   1445void set_all_slowpath(struct hfi1_devdata *dd);
   1446
   1447extern const struct pci_device_id hfi1_pci_tbl[];
   1448void hfi1_make_ud_req_9B(struct rvt_qp *qp,
   1449			 struct hfi1_pkt_state *ps,
   1450			 struct rvt_swqe *wqe);
   1451
   1452void hfi1_make_ud_req_16B(struct rvt_qp *qp,
   1453			  struct hfi1_pkt_state *ps,
   1454			  struct rvt_swqe *wqe);
   1455
   1456/* receive packet handler dispositions */
   1457#define RCV_PKT_OK      0x0 /* keep going */
   1458#define RCV_PKT_LIMIT   0x1 /* stop, hit limit, start thread */
   1459#define RCV_PKT_DONE    0x2 /* stop, no more packets detected */
   1460
   1461/**
   1462 * hfi1_rcd_head - add accessor for rcd head
   1463 * @rcd: the context
   1464 */
   1465static inline u32 hfi1_rcd_head(struct hfi1_ctxtdata *rcd)
   1466{
   1467	return rcd->head;
   1468}
   1469
   1470/**
   1471 * hfi1_set_rcd_head - add accessor for rcd head
   1472 * @rcd: the context
   1473 * @head: the new head
   1474 */
   1475static inline void hfi1_set_rcd_head(struct hfi1_ctxtdata *rcd, u32 head)
   1476{
   1477	rcd->head = head;
   1478}
   1479
   1480/* calculate the current RHF address */
   1481static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd)
   1482{
   1483	return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset;
   1484}
   1485
   1486/* return DMA_RTAIL configuration */
   1487static inline bool get_dma_rtail_setting(struct hfi1_ctxtdata *rcd)
   1488{
   1489	return !!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL);
   1490}
   1491
   1492/**
   1493 * hfi1_seq_incr_wrap - wrapping increment for sequence
   1494 * @seq: the current sequence number
   1495 *
   1496 * Returns: the incremented seq
   1497 */
   1498static inline u8 hfi1_seq_incr_wrap(u8 seq)
   1499{
   1500	if (++seq > RHF_MAX_SEQ)
   1501		seq = 1;
   1502	return seq;
   1503}
   1504
   1505/**
   1506 * hfi1_seq_cnt - return seq_cnt member
   1507 * @rcd: the receive context
   1508 *
   1509 * Return seq_cnt member
   1510 */
   1511static inline u8 hfi1_seq_cnt(struct hfi1_ctxtdata *rcd)
   1512{
   1513	return rcd->seq_cnt;
   1514}
   1515
   1516/**
   1517 * hfi1_set_seq_cnt - return seq_cnt member
   1518 * @rcd: the receive context
   1519 *
   1520 * Return seq_cnt member
   1521 */
   1522static inline void hfi1_set_seq_cnt(struct hfi1_ctxtdata *rcd, u8 cnt)
   1523{
   1524	rcd->seq_cnt = cnt;
   1525}
   1526
   1527/**
   1528 * last_rcv_seq - is last
   1529 * @rcd: the receive context
   1530 * @seq: sequence
   1531 *
   1532 * return true if last packet
   1533 */
   1534static inline bool last_rcv_seq(struct hfi1_ctxtdata *rcd, u32 seq)
   1535{
   1536	return seq != rcd->seq_cnt;
   1537}
   1538
   1539/**
   1540 * rcd_seq_incr - increment context sequence number
   1541 * @rcd: the receive context
   1542 * @seq: the current sequence number
   1543 *
   1544 * Returns: true if the this was the last packet
   1545 */
   1546static inline bool hfi1_seq_incr(struct hfi1_ctxtdata *rcd, u32 seq)
   1547{
   1548	rcd->seq_cnt = hfi1_seq_incr_wrap(rcd->seq_cnt);
   1549	return last_rcv_seq(rcd, seq);
   1550}
   1551
   1552/**
   1553 * get_hdrqentsize - return hdrq entry size
   1554 * @rcd: the receive context
   1555 */
   1556static inline u8 get_hdrqentsize(struct hfi1_ctxtdata *rcd)
   1557{
   1558	return rcd->rcvhdrqentsize;
   1559}
   1560
   1561/**
   1562 * get_hdrq_cnt - return hdrq count
   1563 * @rcd: the receive context
   1564 */
   1565static inline u16 get_hdrq_cnt(struct hfi1_ctxtdata *rcd)
   1566{
   1567	return rcd->rcvhdrq_cnt;
   1568}
   1569
   1570/**
   1571 * hfi1_is_slowpath - check if this context is slow path
   1572 * @rcd: the receive context
   1573 */
   1574static inline bool hfi1_is_slowpath(struct hfi1_ctxtdata *rcd)
   1575{
   1576	return rcd->do_interrupt == rcd->slow_handler;
   1577}
   1578
   1579/**
   1580 * hfi1_is_fastpath - check if this context is fast path
   1581 * @rcd: the receive context
   1582 */
   1583static inline bool hfi1_is_fastpath(struct hfi1_ctxtdata *rcd)
   1584{
   1585	if (rcd->ctxt == HFI1_CTRL_CTXT)
   1586		return false;
   1587
   1588	return rcd->do_interrupt == rcd->fast_handler;
   1589}
   1590
   1591/**
   1592 * hfi1_set_fast - change to the fast handler
   1593 * @rcd: the receive context
   1594 */
   1595static inline void hfi1_set_fast(struct hfi1_ctxtdata *rcd)
   1596{
   1597	if (unlikely(!rcd))
   1598		return;
   1599	if (unlikely(!hfi1_is_fastpath(rcd)))
   1600		rcd->do_interrupt = rcd->fast_handler;
   1601}
   1602
   1603int hfi1_reset_device(int);
   1604
   1605void receive_interrupt_work(struct work_struct *work);
   1606
   1607/* extract service channel from header and rhf */
   1608static inline int hfi1_9B_get_sc5(struct ib_header *hdr, u64 rhf)
   1609{
   1610	return ib_get_sc(hdr) | ((!!(rhf_dc_info(rhf))) << 4);
   1611}
   1612
   1613#define HFI1_JKEY_WIDTH       16
   1614#define HFI1_JKEY_MASK        (BIT(16) - 1)
   1615#define HFI1_ADMIN_JKEY_RANGE 32
   1616
   1617/*
   1618 * J_KEYs are split and allocated in the following groups:
   1619 *   0 - 31    - users with administrator privileges
   1620 *  32 - 63    - kernel protocols using KDETH packets
   1621 *  64 - 65535 - all other users using KDETH packets
   1622 */
   1623static inline u16 generate_jkey(kuid_t uid)
   1624{
   1625	u16 jkey = from_kuid(current_user_ns(), uid) & HFI1_JKEY_MASK;
   1626
   1627	if (capable(CAP_SYS_ADMIN))
   1628		jkey &= HFI1_ADMIN_JKEY_RANGE - 1;
   1629	else if (jkey < 64)
   1630		jkey |= BIT(HFI1_JKEY_WIDTH - 1);
   1631
   1632	return jkey;
   1633}
   1634
   1635/*
   1636 * active_egress_rate
   1637 *
   1638 * returns the active egress rate in units of [10^6 bits/sec]
   1639 */
   1640static inline u32 active_egress_rate(struct hfi1_pportdata *ppd)
   1641{
   1642	u16 link_speed = ppd->link_speed_active;
   1643	u16 link_width = ppd->link_width_active;
   1644	u32 egress_rate;
   1645
   1646	if (link_speed == OPA_LINK_SPEED_25G)
   1647		egress_rate = 25000;
   1648	else /* assume OPA_LINK_SPEED_12_5G */
   1649		egress_rate = 12500;
   1650
   1651	switch (link_width) {
   1652	case OPA_LINK_WIDTH_4X:
   1653		egress_rate *= 4;
   1654		break;
   1655	case OPA_LINK_WIDTH_3X:
   1656		egress_rate *= 3;
   1657		break;
   1658	case OPA_LINK_WIDTH_2X:
   1659		egress_rate *= 2;
   1660		break;
   1661	default:
   1662		/* assume IB_WIDTH_1X */
   1663		break;
   1664	}
   1665
   1666	return egress_rate;
   1667}
   1668
   1669/*
   1670 * egress_cycles
   1671 *
   1672 * Returns the number of 'fabric clock cycles' to egress a packet
   1673 * of length 'len' bytes, at 'rate' Mbit/s. Since the fabric clock
   1674 * rate is (approximately) 805 MHz, the units of the returned value
   1675 * are (1/805 MHz).
   1676 */
   1677static inline u32 egress_cycles(u32 len, u32 rate)
   1678{
   1679	u32 cycles;
   1680
   1681	/*
   1682	 * cycles is:
   1683	 *
   1684	 *          (length) [bits] / (rate) [bits/sec]
   1685	 *  ---------------------------------------------------
   1686	 *  fabric_clock_period == 1 /(805 * 10^6) [cycles/sec]
   1687	 */
   1688
   1689	cycles = len * 8; /* bits */
   1690	cycles *= 805;
   1691	cycles /= rate;
   1692
   1693	return cycles;
   1694}
   1695
   1696void set_link_ipg(struct hfi1_pportdata *ppd);
   1697void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn,
   1698		  u32 rqpn, u8 svc_type);
   1699void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
   1700		u16 pkey, u32 slid, u32 dlid, u8 sc5,
   1701		const struct ib_grh *old_grh);
   1702void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
   1703		    u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
   1704		    u8 sc5, const struct ib_grh *old_grh);
   1705typedef void (*hfi1_handle_cnp)(struct hfi1_ibport *ibp, struct rvt_qp *qp,
   1706				u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
   1707				u8 sc5, const struct ib_grh *old_grh);
   1708
   1709#define PKEY_CHECK_INVALID -1
   1710int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey,
   1711		      u8 sc5, int8_t s_pkey_index);
   1712
   1713#define PACKET_EGRESS_TIMEOUT 350
   1714static inline void pause_for_credit_return(struct hfi1_devdata *dd)
   1715{
   1716	/* Pause at least 1us, to ensure chip returns all credits */
   1717	u32 usec = cclock_to_ns(dd, PACKET_EGRESS_TIMEOUT) / 1000;
   1718
   1719	udelay(usec ? usec : 1);
   1720}
   1721
   1722/**
   1723 * sc_to_vlt() - reverse lookup sc to vl
   1724 * @dd - devdata
   1725 * @sc5 - 5 bit sc
   1726 */
   1727static inline u8 sc_to_vlt(struct hfi1_devdata *dd, u8 sc5)
   1728{
   1729	unsigned seq;
   1730	u8 rval;
   1731
   1732	if (sc5 >= OPA_MAX_SCS)
   1733		return (u8)(0xff);
   1734
   1735	do {
   1736		seq = read_seqbegin(&dd->sc2vl_lock);
   1737		rval = *(((u8 *)dd->sc2vl) + sc5);
   1738	} while (read_seqretry(&dd->sc2vl_lock, seq));
   1739
   1740	return rval;
   1741}
   1742
   1743#define PKEY_MEMBER_MASK 0x8000
   1744#define PKEY_LOW_15_MASK 0x7fff
   1745
   1746/*
   1747 * ingress_pkey_matches_entry - return 1 if the pkey matches ent (ent
   1748 * being an entry from the ingress partition key table), return 0
   1749 * otherwise. Use the matching criteria for ingress partition keys
   1750 * specified in the OPAv1 spec., section 9.10.14.
   1751 */
   1752static inline int ingress_pkey_matches_entry(u16 pkey, u16 ent)
   1753{
   1754	u16 mkey = pkey & PKEY_LOW_15_MASK;
   1755	u16 ment = ent & PKEY_LOW_15_MASK;
   1756
   1757	if (mkey == ment) {
   1758		/*
   1759		 * If pkey[15] is clear (limited partition member),
   1760		 * is bit 15 in the corresponding table element
   1761		 * clear (limited member)?
   1762		 */
   1763		if (!(pkey & PKEY_MEMBER_MASK))
   1764			return !!(ent & PKEY_MEMBER_MASK);
   1765		return 1;
   1766	}
   1767	return 0;
   1768}
   1769
   1770/*
   1771 * ingress_pkey_table_search - search the entire pkey table for
   1772 * an entry which matches 'pkey'. return 0 if a match is found,
   1773 * and 1 otherwise.
   1774 */
   1775static int ingress_pkey_table_search(struct hfi1_pportdata *ppd, u16 pkey)
   1776{
   1777	int i;
   1778
   1779	for (i = 0; i < MAX_PKEY_VALUES; i++) {
   1780		if (ingress_pkey_matches_entry(pkey, ppd->pkeys[i]))
   1781			return 0;
   1782	}
   1783	return 1;
   1784}
   1785
   1786/*
   1787 * ingress_pkey_table_fail - record a failure of ingress pkey validation,
   1788 * i.e., increment port_rcv_constraint_errors for the port, and record
   1789 * the 'error info' for this failure.
   1790 */
   1791static void ingress_pkey_table_fail(struct hfi1_pportdata *ppd, u16 pkey,
   1792				    u32 slid)
   1793{
   1794	struct hfi1_devdata *dd = ppd->dd;
   1795
   1796	incr_cntr64(&ppd->port_rcv_constraint_errors);
   1797	if (!(dd->err_info_rcv_constraint.status & OPA_EI_STATUS_SMASK)) {
   1798		dd->err_info_rcv_constraint.status |= OPA_EI_STATUS_SMASK;
   1799		dd->err_info_rcv_constraint.slid = slid;
   1800		dd->err_info_rcv_constraint.pkey = pkey;
   1801	}
   1802}
   1803
   1804/*
   1805 * ingress_pkey_check - Return 0 if the ingress pkey is valid, return 1
   1806 * otherwise. Use the criteria in the OPAv1 spec, section 9.10.14. idx
   1807 * is a hint as to the best place in the partition key table to begin
   1808 * searching. This function should not be called on the data path because
   1809 * of performance reasons. On datapath pkey check is expected to be done
   1810 * by HW and rcv_pkey_check function should be called instead.
   1811 */
   1812static inline int ingress_pkey_check(struct hfi1_pportdata *ppd, u16 pkey,
   1813				     u8 sc5, u8 idx, u32 slid, bool force)
   1814{
   1815	if (!(force) && !(ppd->part_enforce & HFI1_PART_ENFORCE_IN))
   1816		return 0;
   1817
   1818	/* If SC15, pkey[0:14] must be 0x7fff */
   1819	if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK))
   1820		goto bad;
   1821
   1822	/* Is the pkey = 0x0, or 0x8000? */
   1823	if ((pkey & PKEY_LOW_15_MASK) == 0)
   1824		goto bad;
   1825
   1826	/* The most likely matching pkey has index 'idx' */
   1827	if (ingress_pkey_matches_entry(pkey, ppd->pkeys[idx]))
   1828		return 0;
   1829
   1830	/* no match - try the whole table */
   1831	if (!ingress_pkey_table_search(ppd, pkey))
   1832		return 0;
   1833
   1834bad:
   1835	ingress_pkey_table_fail(ppd, pkey, slid);
   1836	return 1;
   1837}
   1838
   1839/*
   1840 * rcv_pkey_check - Return 0 if the ingress pkey is valid, return 1
   1841 * otherwise. It only ensures pkey is vlid for QP0. This function
   1842 * should be called on the data path instead of ingress_pkey_check
   1843 * as on data path, pkey check is done by HW (except for QP0).
   1844 */
   1845static inline int rcv_pkey_check(struct hfi1_pportdata *ppd, u16 pkey,
   1846				 u8 sc5, u16 slid)
   1847{
   1848	if (!(ppd->part_enforce & HFI1_PART_ENFORCE_IN))
   1849		return 0;
   1850
   1851	/* If SC15, pkey[0:14] must be 0x7fff */
   1852	if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK))
   1853		goto bad;
   1854
   1855	return 0;
   1856bad:
   1857	ingress_pkey_table_fail(ppd, pkey, slid);
   1858	return 1;
   1859}
   1860
   1861/* MTU handling */
   1862
   1863/* MTU enumeration, 256-4k match IB */
   1864#define OPA_MTU_0     0
   1865#define OPA_MTU_256   1
   1866#define OPA_MTU_512   2
   1867#define OPA_MTU_1024  3
   1868#define OPA_MTU_2048  4
   1869#define OPA_MTU_4096  5
   1870
   1871u32 lrh_max_header_bytes(struct hfi1_devdata *dd);
   1872int mtu_to_enum(u32 mtu, int default_if_bad);
   1873u16 enum_to_mtu(int mtu);
   1874static inline int valid_ib_mtu(unsigned int mtu)
   1875{
   1876	return mtu == 256 || mtu == 512 ||
   1877		mtu == 1024 || mtu == 2048 ||
   1878		mtu == 4096;
   1879}
   1880
   1881static inline int valid_opa_max_mtu(unsigned int mtu)
   1882{
   1883	return mtu >= 2048 &&
   1884		(valid_ib_mtu(mtu) || mtu == 8192 || mtu == 10240);
   1885}
   1886
   1887int set_mtu(struct hfi1_pportdata *ppd);
   1888
   1889int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc);
   1890void hfi1_disable_after_error(struct hfi1_devdata *dd);
   1891int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit);
   1892int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encode);
   1893
   1894int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t);
   1895int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t);
   1896
   1897void set_up_vau(struct hfi1_devdata *dd, u8 vau);
   1898void set_up_vl15(struct hfi1_devdata *dd, u16 vl15buf);
   1899void reset_link_credits(struct hfi1_devdata *dd);
   1900void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu);
   1901
   1902int set_buffer_control(struct hfi1_pportdata *ppd, struct buffer_control *bc);
   1903
   1904static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd)
   1905{
   1906	return ppd->dd;
   1907}
   1908
   1909static inline struct hfi1_devdata *dd_from_dev(struct hfi1_ibdev *dev)
   1910{
   1911	return container_of(dev, struct hfi1_devdata, verbs_dev);
   1912}
   1913
   1914static inline struct hfi1_devdata *dd_from_ibdev(struct ib_device *ibdev)
   1915{
   1916	return dd_from_dev(to_idev(ibdev));
   1917}
   1918
   1919static inline struct hfi1_pportdata *ppd_from_ibp(struct hfi1_ibport *ibp)
   1920{
   1921	return container_of(ibp, struct hfi1_pportdata, ibport_data);
   1922}
   1923
   1924static inline struct hfi1_ibdev *dev_from_rdi(struct rvt_dev_info *rdi)
   1925{
   1926	return container_of(rdi, struct hfi1_ibdev, rdi);
   1927}
   1928
   1929static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u32 port)
   1930{
   1931	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
   1932	u32 pidx = port - 1; /* IB number port from 1, hdw from 0 */
   1933
   1934	WARN_ON(pidx >= dd->num_pports);
   1935	return &dd->pport[pidx].ibport_data;
   1936}
   1937
   1938static inline struct hfi1_ibport *rcd_to_iport(struct hfi1_ctxtdata *rcd)
   1939{
   1940	return &rcd->ppd->ibport_data;
   1941}
   1942
   1943/**
   1944 * hfi1_may_ecn - Check whether FECN or BECN processing should be done
   1945 * @pkt: the packet to be evaluated
   1946 *
   1947 * Check whether the FECN or BECN bits in the packet's header are
   1948 * enabled, depending on packet type.
   1949 *
   1950 * This function only checks for FECN and BECN bits. Additional checks
   1951 * are done in the slowpath (hfi1_process_ecn_slowpath()) in order to
   1952 * ensure correct handling.
   1953 */
   1954static inline bool hfi1_may_ecn(struct hfi1_packet *pkt)
   1955{
   1956	bool fecn, becn;
   1957
   1958	if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
   1959		fecn = hfi1_16B_get_fecn(pkt->hdr);
   1960		becn = hfi1_16B_get_becn(pkt->hdr);
   1961	} else {
   1962		fecn = ib_bth_get_fecn(pkt->ohdr);
   1963		becn = ib_bth_get_becn(pkt->ohdr);
   1964	}
   1965	return fecn || becn;
   1966}
   1967
   1968bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
   1969			       bool prescan);
   1970static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt)
   1971{
   1972	bool do_work;
   1973
   1974	do_work = hfi1_may_ecn(pkt);
   1975	if (unlikely(do_work))
   1976		return hfi1_process_ecn_slowpath(qp, pkt, false);
   1977	return false;
   1978}
   1979
   1980/*
   1981 * Return the indexed PKEY from the port PKEY table.
   1982 */
   1983static inline u16 hfi1_get_pkey(struct hfi1_ibport *ibp, unsigned index)
   1984{
   1985	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   1986	u16 ret;
   1987
   1988	if (index >= ARRAY_SIZE(ppd->pkeys))
   1989		ret = 0;
   1990	else
   1991		ret = ppd->pkeys[index];
   1992
   1993	return ret;
   1994}
   1995
   1996/*
   1997 * Return the indexed GUID from the port GUIDs table.
   1998 */
   1999static inline __be64 get_sguid(struct hfi1_ibport *ibp, unsigned int index)
   2000{
   2001	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   2002
   2003	WARN_ON(index >= HFI1_GUIDS_PER_PORT);
   2004	return cpu_to_be64(ppd->guids[index]);
   2005}
   2006
   2007/*
   2008 * Called by readers of cc_state only, must call under rcu_read_lock().
   2009 */
   2010static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd)
   2011{
   2012	return rcu_dereference(ppd->cc_state);
   2013}
   2014
   2015/*
   2016 * Called by writers of cc_state only,  must call under cc_state_lock.
   2017 */
   2018static inline
   2019struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
   2020{
   2021	return rcu_dereference_protected(ppd->cc_state,
   2022					 lockdep_is_held(&ppd->cc_state_lock));
   2023}
   2024
   2025/*
   2026 * values for dd->flags (_device_ related flags)
   2027 */
   2028#define HFI1_INITTED           0x1    /* chip and driver up and initted */
   2029#define HFI1_PRESENT           0x2    /* chip accesses can be done */
   2030#define HFI1_FROZEN            0x4    /* chip in SPC freeze */
   2031#define HFI1_HAS_SDMA_TIMEOUT  0x8
   2032#define HFI1_HAS_SEND_DMA      0x10   /* Supports Send DMA */
   2033#define HFI1_FORCED_FREEZE     0x80   /* driver forced freeze mode */
   2034#define HFI1_SHUTDOWN          0x100  /* device is shutting down */
   2035
   2036/* IB dword length mask in PBC (lower 11 bits); same for all chips */
   2037#define HFI1_PBC_LENGTH_MASK                     ((1 << 11) - 1)
   2038
   2039/* ctxt_flag bit offsets */
   2040		/* base context has not finished initializing */
   2041#define HFI1_CTXT_BASE_UNINIT 1
   2042		/* base context initaliation failed */
   2043#define HFI1_CTXT_BASE_FAILED 2
   2044		/* waiting for a packet to arrive */
   2045#define HFI1_CTXT_WAITING_RCV 3
   2046		/* waiting for an urgent packet to arrive */
   2047#define HFI1_CTXT_WAITING_URG 4
   2048
   2049/* free up any allocated data at closes */
   2050int hfi1_init_dd(struct hfi1_devdata *dd);
   2051void hfi1_free_devdata(struct hfi1_devdata *dd);
   2052
   2053/* LED beaconing functions */
   2054void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
   2055			     unsigned int timeoff);
   2056void shutdown_led_override(struct hfi1_pportdata *ppd);
   2057
   2058#define HFI1_CREDIT_RETURN_RATE (100)
   2059
   2060/*
   2061 * The number of words for the KDETH protocol field.  If this is
   2062 * larger then the actual field used, then part of the payload
   2063 * will be in the header.
   2064 *
   2065 * Optimally, we want this sized so that a typical case will
   2066 * use full cache lines.  The typical local KDETH header would
   2067 * be:
   2068 *
   2069 *	Bytes	Field
   2070 *	  8	LRH
   2071 *	 12	BHT
   2072 *	 ??	KDETH
   2073 *	  8	RHF
   2074 *	---
   2075 *	 28 + KDETH
   2076 *
   2077 * For a 64-byte cache line, KDETH would need to be 36 bytes or 9 DWORDS
   2078 */
   2079#define DEFAULT_RCVHDRSIZE 9
   2080
   2081/*
   2082 * Maximal header byte count:
   2083 *
   2084 *	Bytes	Field
   2085 *	  8	LRH
   2086 *	 40	GRH (optional)
   2087 *	 12	BTH
   2088 *	 ??	KDETH
   2089 *	  8	RHF
   2090 *	---
   2091 *	 68 + KDETH
   2092 *
   2093 * We also want to maintain a cache line alignment to assist DMA'ing
   2094 * of the header bytes.  Round up to a good size.
   2095 */
   2096#define DEFAULT_RCVHDR_ENTSIZE 32
   2097
   2098bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
   2099			u32 nlocked, u32 npages);
   2100int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr,
   2101			    size_t npages, bool writable, struct page **pages);
   2102void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
   2103			     size_t npages, bool dirty);
   2104
   2105/**
   2106 * hfi1_rcvhdrtail_kvaddr - return tail kvaddr
   2107 * @rcd - the receive context
   2108 */
   2109static inline __le64 *hfi1_rcvhdrtail_kvaddr(const struct hfi1_ctxtdata *rcd)
   2110{
   2111	return (__le64 *)rcd->rcvhdrtail_kvaddr;
   2112}
   2113
   2114static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
   2115{
   2116	u64 *kv = (u64 *)hfi1_rcvhdrtail_kvaddr(rcd);
   2117
   2118	if (kv)
   2119		*kv = 0ULL;
   2120}
   2121
   2122static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
   2123{
   2124	/*
   2125	 * volatile because it's a DMA target from the chip, routine is
   2126	 * inlined, and don't want register caching or reordering.
   2127	 */
   2128	return (u32)le64_to_cpu(*hfi1_rcvhdrtail_kvaddr(rcd));
   2129}
   2130
   2131static inline bool hfi1_packet_present(struct hfi1_ctxtdata *rcd)
   2132{
   2133	if (likely(!rcd->rcvhdrtail_kvaddr)) {
   2134		u32 seq = rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd)));
   2135
   2136		return !last_rcv_seq(rcd, seq);
   2137	}
   2138	return hfi1_rcd_head(rcd) != get_rcvhdrtail(rcd);
   2139}
   2140
   2141/*
   2142 * sysfs interface.
   2143 */
   2144
   2145extern const char ib_hfi1_version[];
   2146extern const struct attribute_group ib_hfi1_attr_group;
   2147extern const struct attribute_group *hfi1_attr_port_groups[];
   2148
   2149int hfi1_device_create(struct hfi1_devdata *dd);
   2150void hfi1_device_remove(struct hfi1_devdata *dd);
   2151
   2152int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd);
   2153void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd);
   2154/* Hook for sysfs read of QSFP */
   2155int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
   2156
   2157int hfi1_pcie_init(struct hfi1_devdata *dd);
   2158void hfi1_pcie_cleanup(struct pci_dev *pdev);
   2159int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
   2160void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
   2161int pcie_speeds(struct hfi1_devdata *dd);
   2162int restore_pci_variables(struct hfi1_devdata *dd);
   2163int save_pci_variables(struct hfi1_devdata *dd);
   2164int do_pcie_gen3_transition(struct hfi1_devdata *dd);
   2165void tune_pcie_caps(struct hfi1_devdata *dd);
   2166int parse_platform_config(struct hfi1_devdata *dd);
   2167int get_platform_config_field(struct hfi1_devdata *dd,
   2168			      enum platform_config_table_type_encoding
   2169			      table_type, int table_index, int field_index,
   2170			      u32 *data, u32 len);
   2171
   2172struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi);
   2173
   2174/*
   2175 * Flush write combining store buffers (if present) and perform a write
   2176 * barrier.
   2177 */
   2178static inline void flush_wc(void)
   2179{
   2180	asm volatile("sfence" : : : "memory");
   2181}
   2182
   2183void handle_eflags(struct hfi1_packet *packet);
   2184void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd);
   2185
   2186/* global module parameter variables */
   2187extern unsigned int hfi1_max_mtu;
   2188extern unsigned int hfi1_cu;
   2189extern unsigned int user_credit_return_threshold;
   2190extern int num_user_contexts;
   2191extern unsigned long n_krcvqs;
   2192extern uint krcvqs[];
   2193extern int krcvqsset;
   2194extern uint loopback;
   2195extern uint quick_linkup;
   2196extern uint rcv_intr_timeout;
   2197extern uint rcv_intr_count;
   2198extern uint rcv_intr_dynamic;
   2199extern ushort link_crc_mask;
   2200
   2201extern struct mutex hfi1_mutex;
   2202
   2203/* Number of seconds before our card status check...  */
   2204#define STATUS_TIMEOUT 60
   2205
   2206#define DRIVER_NAME		"hfi1"
   2207#define HFI1_USER_MINOR_BASE     0
   2208#define HFI1_TRACE_MINOR         127
   2209#define HFI1_NMINORS             255
   2210
   2211#define PCI_VENDOR_ID_INTEL 0x8086
   2212#define PCI_DEVICE_ID_INTEL0 0x24f0
   2213#define PCI_DEVICE_ID_INTEL1 0x24f1
   2214
   2215#define HFI1_PKT_USER_SC_INTEGRITY					    \
   2216	(SEND_CTXT_CHECK_ENABLE_DISALLOW_NON_KDETH_PACKETS_SMASK	    \
   2217	| SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK		\
   2218	| SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_SMASK		    \
   2219	| SEND_CTXT_CHECK_ENABLE_DISALLOW_GRH_SMASK)
   2220
   2221#define HFI1_PKT_KERNEL_SC_INTEGRITY					    \
   2222	(SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK)
   2223
   2224static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
   2225						  u16 ctxt_type)
   2226{
   2227	u64 base_sc_integrity;
   2228
   2229	/* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */
   2230	if (HFI1_CAP_IS_KSET(NO_INTEGRITY))
   2231		return 0;
   2232
   2233	base_sc_integrity =
   2234	SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK
   2235	| SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK
   2236	| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
   2237	| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK
   2238	| SEND_CTXT_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK
   2239#ifndef CONFIG_FAULT_INJECTION
   2240	| SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK
   2241#endif
   2242	| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_BYPASS_PACKETS_SMASK
   2243	| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_IB_PACKETS_SMASK
   2244	| SEND_CTXT_CHECK_ENABLE_DISALLOW_RAW_IPV6_SMASK
   2245	| SEND_CTXT_CHECK_ENABLE_DISALLOW_RAW_SMASK
   2246	| SEND_CTXT_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK
   2247	| SEND_CTXT_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK
   2248	| SEND_CTXT_CHECK_ENABLE_CHECK_OPCODE_SMASK
   2249	| SEND_CTXT_CHECK_ENABLE_CHECK_SLID_SMASK
   2250	| SEND_CTXT_CHECK_ENABLE_CHECK_VL_SMASK
   2251	| SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK;
   2252
   2253	if (ctxt_type == SC_USER)
   2254		base_sc_integrity |=
   2255#ifndef CONFIG_FAULT_INJECTION
   2256			SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK |
   2257#endif
   2258			HFI1_PKT_USER_SC_INTEGRITY;
   2259	else if (ctxt_type != SC_KERNEL)
   2260		base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY;
   2261
   2262	/* turn on send-side job key checks if !A0 */
   2263	if (!is_ax(dd))
   2264		base_sc_integrity |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
   2265
   2266	return base_sc_integrity;
   2267}
   2268
   2269static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
   2270{
   2271	u64 base_sdma_integrity;
   2272
   2273	/* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */
   2274	if (HFI1_CAP_IS_KSET(NO_INTEGRITY))
   2275		return 0;
   2276
   2277	base_sdma_integrity =
   2278	SEND_DMA_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK
   2279	| SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
   2280	| SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK
   2281	| SEND_DMA_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK
   2282	| SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_SMALL_BYPASS_PACKETS_SMASK
   2283	| SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_SMALL_IB_PACKETS_SMASK
   2284	| SEND_DMA_CHECK_ENABLE_DISALLOW_RAW_IPV6_SMASK
   2285	| SEND_DMA_CHECK_ENABLE_DISALLOW_RAW_SMASK
   2286	| SEND_DMA_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK
   2287	| SEND_DMA_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK
   2288	| SEND_DMA_CHECK_ENABLE_CHECK_OPCODE_SMASK
   2289	| SEND_DMA_CHECK_ENABLE_CHECK_SLID_SMASK
   2290	| SEND_DMA_CHECK_ENABLE_CHECK_VL_SMASK
   2291	| SEND_DMA_CHECK_ENABLE_CHECK_ENABLE_SMASK;
   2292
   2293	if (!HFI1_CAP_IS_KSET(STATIC_RATE_CTRL))
   2294		base_sdma_integrity |=
   2295		SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK;
   2296
   2297	/* turn on send-side job key checks if !A0 */
   2298	if (!is_ax(dd))
   2299		base_sdma_integrity |=
   2300			SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
   2301
   2302	return base_sdma_integrity;
   2303}
   2304
   2305#define dd_dev_emerg(dd, fmt, ...) \
   2306	dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \
   2307		  rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
   2308
   2309#define dd_dev_err(dd, fmt, ...) \
   2310	dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
   2311		rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
   2312
   2313#define dd_dev_err_ratelimited(dd, fmt, ...) \
   2314	dev_err_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
   2315			    rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
   2316			    ##__VA_ARGS__)
   2317
   2318#define dd_dev_warn(dd, fmt, ...) \
   2319	dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \
   2320		 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
   2321
   2322#define dd_dev_warn_ratelimited(dd, fmt, ...) \
   2323	dev_warn_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
   2324			     rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
   2325			     ##__VA_ARGS__)
   2326
   2327#define dd_dev_info(dd, fmt, ...) \
   2328	dev_info(&(dd)->pcidev->dev, "%s: " fmt, \
   2329		 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
   2330
   2331#define dd_dev_info_ratelimited(dd, fmt, ...) \
   2332	dev_info_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
   2333			     rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
   2334			     ##__VA_ARGS__)
   2335
   2336#define dd_dev_dbg(dd, fmt, ...) \
   2337	dev_dbg(&(dd)->pcidev->dev, "%s: " fmt, \
   2338		rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
   2339
   2340#define hfi1_dev_porterr(dd, port, fmt, ...) \
   2341	dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \
   2342		rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), (port), ##__VA_ARGS__)
   2343
   2344/*
   2345 * this is used for formatting hw error messages...
   2346 */
   2347struct hfi1_hwerror_msgs {
   2348	u64 mask;
   2349	const char *msg;
   2350	size_t sz;
   2351};
   2352
   2353/* in intr.c... */
   2354void hfi1_format_hwerrors(u64 hwerrs,
   2355			  const struct hfi1_hwerror_msgs *hwerrmsgs,
   2356			  size_t nhwerrmsgs, char *msg, size_t lmsg);
   2357
   2358#define USER_OPCODE_CHECK_VAL 0xC0
   2359#define USER_OPCODE_CHECK_MASK 0xC0
   2360#define OPCODE_CHECK_VAL_DISABLED 0x0
   2361#define OPCODE_CHECK_MASK_DISABLED 0x0
   2362
   2363static inline void hfi1_reset_cpu_counters(struct hfi1_devdata *dd)
   2364{
   2365	struct hfi1_pportdata *ppd;
   2366	int i;
   2367
   2368	dd->z_int_counter = get_all_cpu_total(dd->int_counter);
   2369	dd->z_rcv_limit = get_all_cpu_total(dd->rcv_limit);
   2370	dd->z_send_schedule = get_all_cpu_total(dd->send_schedule);
   2371
   2372	ppd = (struct hfi1_pportdata *)(dd + 1);
   2373	for (i = 0; i < dd->num_pports; i++, ppd++) {
   2374		ppd->ibport_data.rvp.z_rc_acks =
   2375			get_all_cpu_total(ppd->ibport_data.rvp.rc_acks);
   2376		ppd->ibport_data.rvp.z_rc_qacks =
   2377			get_all_cpu_total(ppd->ibport_data.rvp.rc_qacks);
   2378	}
   2379}
   2380
   2381/* Control LED state */
   2382static inline void setextled(struct hfi1_devdata *dd, u32 on)
   2383{
   2384	if (on)
   2385		write_csr(dd, DCC_CFG_LED_CNTRL, 0x1F);
   2386	else
   2387		write_csr(dd, DCC_CFG_LED_CNTRL, 0x10);
   2388}
   2389
   2390/* return the i2c resource given the target */
   2391static inline u32 i2c_target(u32 target)
   2392{
   2393	return target ? CR_I2C2 : CR_I2C1;
   2394}
   2395
   2396/* return the i2c chain chip resource that this HFI uses for QSFP */
   2397static inline u32 qsfp_resource(struct hfi1_devdata *dd)
   2398{
   2399	return i2c_target(dd->hfi1_id);
   2400}
   2401
   2402/* Is this device integrated or discrete? */
   2403static inline bool is_integrated(struct hfi1_devdata *dd)
   2404{
   2405	return dd->pcidev->device == PCI_DEVICE_ID_INTEL1;
   2406}
   2407
   2408/**
   2409 * hfi1_need_drop - detect need for drop
   2410 * @dd: - the device
   2411 *
   2412 * In some cases, the first packet needs to be dropped.
   2413 *
   2414 * Return true is the current packet needs to be dropped and false otherwise.
   2415 */
   2416static inline bool hfi1_need_drop(struct hfi1_devdata *dd)
   2417{
   2418	if (unlikely(dd->do_drop &&
   2419		     atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
   2420		     DROP_PACKET_ON)) {
   2421		dd->do_drop = false;
   2422		return true;
   2423	}
   2424	return false;
   2425}
   2426
   2427int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
   2428
   2429#define DD_DEV_ENTRY(dd)       __string(dev, dev_name(&(dd)->pcidev->dev))
   2430#define DD_DEV_ASSIGN(dd)      __assign_str(dev, dev_name(&(dd)->pcidev->dev))
   2431
   2432static inline void hfi1_update_ah_attr(struct ib_device *ibdev,
   2433				       struct rdma_ah_attr *attr)
   2434{
   2435	struct hfi1_pportdata *ppd;
   2436	struct hfi1_ibport *ibp;
   2437	u32 dlid = rdma_ah_get_dlid(attr);
   2438
   2439	/*
   2440	 * Kernel clients may not have setup GRH information
   2441	 * Set that here.
   2442	 */
   2443	ibp = to_iport(ibdev, rdma_ah_get_port_num(attr));
   2444	ppd = ppd_from_ibp(ibp);
   2445	if ((((dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) ||
   2446	      (ppd->lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))) &&
   2447	    (dlid != be32_to_cpu(OPA_LID_PERMISSIVE)) &&
   2448	    (dlid != be16_to_cpu(IB_LID_PERMISSIVE)) &&
   2449	    (!(rdma_ah_get_ah_flags(attr) & IB_AH_GRH))) ||
   2450	    (rdma_ah_get_make_grd(attr))) {
   2451		rdma_ah_set_ah_flags(attr, IB_AH_GRH);
   2452		rdma_ah_set_interface_id(attr, OPA_MAKE_ID(dlid));
   2453		rdma_ah_set_subnet_prefix(attr, ibp->rvp.gid_prefix);
   2454	}
   2455}
   2456
   2457/*
   2458 * hfi1_check_mcast- Check if the given lid is
   2459 * in the OPA multicast range.
   2460 *
   2461 * The LID might either reside in ah.dlid or might be
   2462 * in the GRH of the address handle as DGID if extended
   2463 * addresses are in use.
   2464 */
   2465static inline bool hfi1_check_mcast(u32 lid)
   2466{
   2467	return ((lid >= opa_get_mcast_base(OPA_MCAST_NR)) &&
   2468		(lid != be32_to_cpu(OPA_LID_PERMISSIVE)));
   2469}
   2470
   2471#define opa_get_lid(lid, format)	\
   2472	__opa_get_lid(lid, OPA_PORT_PACKET_FORMAT_##format)
   2473
   2474/* Convert a lid to a specific lid space */
   2475static inline u32 __opa_get_lid(u32 lid, u8 format)
   2476{
   2477	bool is_mcast = hfi1_check_mcast(lid);
   2478
   2479	switch (format) {
   2480	case OPA_PORT_PACKET_FORMAT_8B:
   2481	case OPA_PORT_PACKET_FORMAT_10B:
   2482		if (is_mcast)
   2483			return (lid - opa_get_mcast_base(OPA_MCAST_NR) +
   2484				0xF0000);
   2485		return lid & 0xFFFFF;
   2486	case OPA_PORT_PACKET_FORMAT_16B:
   2487		if (is_mcast)
   2488			return (lid - opa_get_mcast_base(OPA_MCAST_NR) +
   2489				0xF00000);
   2490		return lid & 0xFFFFFF;
   2491	case OPA_PORT_PACKET_FORMAT_9B:
   2492		if (is_mcast)
   2493			return (lid -
   2494				opa_get_mcast_base(OPA_MCAST_NR) +
   2495				be16_to_cpu(IB_MULTICAST_LID_BASE));
   2496		else
   2497			return lid & 0xFFFF;
   2498	default:
   2499		return lid;
   2500	}
   2501}
   2502
   2503/* Return true if the given lid is the OPA 16B multicast range */
   2504static inline bool hfi1_is_16B_mcast(u32 lid)
   2505{
   2506	return ((lid >=
   2507		opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR), 16B)) &&
   2508		(lid != opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B)));
   2509}
   2510
   2511static inline void hfi1_make_opa_lid(struct rdma_ah_attr *attr)
   2512{
   2513	const struct ib_global_route *grh = rdma_ah_read_grh(attr);
   2514	u32 dlid = rdma_ah_get_dlid(attr);
   2515
   2516	/* Modify ah_attr.dlid to be in the 32 bit LID space.
   2517	 * This is how the address will be laid out:
   2518	 * Assuming MCAST_NR to be 4,
   2519	 * 32 bit permissive LID = 0xFFFFFFFF
   2520	 * Multicast LID range = 0xFFFFFFFE to 0xF0000000
   2521	 * Unicast LID range = 0xEFFFFFFF to 1
   2522	 * Invalid LID = 0
   2523	 */
   2524	if (ib_is_opa_gid(&grh->dgid))
   2525		dlid = opa_get_lid_from_gid(&grh->dgid);
   2526	else if ((dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
   2527		 (dlid != be16_to_cpu(IB_LID_PERMISSIVE)) &&
   2528		 (dlid != be32_to_cpu(OPA_LID_PERMISSIVE)))
   2529		dlid = dlid - be16_to_cpu(IB_MULTICAST_LID_BASE) +
   2530			opa_get_mcast_base(OPA_MCAST_NR);
   2531	else if (dlid == be16_to_cpu(IB_LID_PERMISSIVE))
   2532		dlid = be32_to_cpu(OPA_LID_PERMISSIVE);
   2533
   2534	rdma_ah_set_dlid(attr, dlid);
   2535}
   2536
   2537static inline u8 hfi1_get_packet_type(u32 lid)
   2538{
   2539	/* 9B if lid > 0xF0000000 */
   2540	if (lid >= opa_get_mcast_base(OPA_MCAST_NR))
   2541		return HFI1_PKT_TYPE_9B;
   2542
   2543	/* 16B if lid > 0xC000 */
   2544	if (lid >= opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR), 9B))
   2545		return HFI1_PKT_TYPE_16B;
   2546
   2547	return HFI1_PKT_TYPE_9B;
   2548}
   2549
   2550static inline bool hfi1_get_hdr_type(u32 lid, struct rdma_ah_attr *attr)
   2551{
   2552	/*
   2553	 * If there was an incoming 16B packet with permissive
   2554	 * LIDs, OPA GIDs would have been programmed when those
   2555	 * packets were received. A 16B packet will have to
   2556	 * be sent in response to that packet. Return a 16B
   2557	 * header type if that's the case.
   2558	 */
   2559	if (rdma_ah_get_dlid(attr) == be32_to_cpu(OPA_LID_PERMISSIVE))
   2560		return (ib_is_opa_gid(&rdma_ah_read_grh(attr)->dgid)) ?
   2561			HFI1_PKT_TYPE_16B : HFI1_PKT_TYPE_9B;
   2562
   2563	/*
   2564	 * Return a 16B header type if either the destination
   2565	 * or source lid is extended.
   2566	 */
   2567	if (hfi1_get_packet_type(rdma_ah_get_dlid(attr)) == HFI1_PKT_TYPE_16B)
   2568		return HFI1_PKT_TYPE_16B;
   2569
   2570	return hfi1_get_packet_type(lid);
   2571}
   2572
   2573static inline void hfi1_make_ext_grh(struct hfi1_packet *packet,
   2574				     struct ib_grh *grh, u32 slid,
   2575				     u32 dlid)
   2576{
   2577	struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
   2578	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
   2579
   2580	if (!ibp)
   2581		return;
   2582
   2583	grh->hop_limit = 1;
   2584	grh->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
   2585	if (slid == opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))
   2586		grh->sgid.global.interface_id =
   2587			OPA_MAKE_ID(be32_to_cpu(OPA_LID_PERMISSIVE));
   2588	else
   2589		grh->sgid.global.interface_id = OPA_MAKE_ID(slid);
   2590
   2591	/*
   2592	 * Upper layers (like mad) may compare the dgid in the
   2593	 * wc that is obtained here with the sgid_index in
   2594	 * the wr. Since sgid_index in wr is always 0 for
   2595	 * extended lids, set the dgid here to the default
   2596	 * IB gid.
   2597	 */
   2598	grh->dgid.global.subnet_prefix = ibp->rvp.gid_prefix;
   2599	grh->dgid.global.interface_id =
   2600		cpu_to_be64(ppd->guids[HFI1_PORT_GUID_INDEX]);
   2601}
   2602
   2603static inline int hfi1_get_16b_padding(u32 hdr_size, u32 payload)
   2604{
   2605	return -(hdr_size + payload + (SIZE_OF_CRC << 2) +
   2606		     SIZE_OF_LT) & 0x7;
   2607}
   2608
   2609static inline void hfi1_make_ib_hdr(struct ib_header *hdr,
   2610				    u16 lrh0, u16 len,
   2611				    u16 dlid, u16 slid)
   2612{
   2613	hdr->lrh[0] = cpu_to_be16(lrh0);
   2614	hdr->lrh[1] = cpu_to_be16(dlid);
   2615	hdr->lrh[2] = cpu_to_be16(len);
   2616	hdr->lrh[3] = cpu_to_be16(slid);
   2617}
   2618
   2619static inline void hfi1_make_16b_hdr(struct hfi1_16b_header *hdr,
   2620				     u32 slid, u32 dlid,
   2621				     u16 len, u16 pkey,
   2622				     bool becn, bool fecn, u8 l4,
   2623				     u8 sc)
   2624{
   2625	u32 lrh0 = 0;
   2626	u32 lrh1 = 0x40000000;
   2627	u32 lrh2 = 0;
   2628	u32 lrh3 = 0;
   2629
   2630	lrh0 = (lrh0 & ~OPA_16B_BECN_MASK) | (becn << OPA_16B_BECN_SHIFT);
   2631	lrh0 = (lrh0 & ~OPA_16B_LEN_MASK) | (len << OPA_16B_LEN_SHIFT);
   2632	lrh0 = (lrh0 & ~OPA_16B_LID_MASK)  | (slid & OPA_16B_LID_MASK);
   2633	lrh1 = (lrh1 & ~OPA_16B_FECN_MASK) | (fecn << OPA_16B_FECN_SHIFT);
   2634	lrh1 = (lrh1 & ~OPA_16B_SC_MASK) | (sc << OPA_16B_SC_SHIFT);
   2635	lrh1 = (lrh1 & ~OPA_16B_LID_MASK) | (dlid & OPA_16B_LID_MASK);
   2636	lrh2 = (lrh2 & ~OPA_16B_SLID_MASK) |
   2637		((slid >> OPA_16B_SLID_SHIFT) << OPA_16B_SLID_HIGH_SHIFT);
   2638	lrh2 = (lrh2 & ~OPA_16B_DLID_MASK) |
   2639		((dlid >> OPA_16B_DLID_SHIFT) << OPA_16B_DLID_HIGH_SHIFT);
   2640	lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | ((u32)pkey << OPA_16B_PKEY_SHIFT);
   2641	lrh2 = (lrh2 & ~OPA_16B_L4_MASK) | l4;
   2642
   2643	hdr->lrh[0] = lrh0;
   2644	hdr->lrh[1] = lrh1;
   2645	hdr->lrh[2] = lrh2;
   2646	hdr->lrh[3] = lrh3;
   2647}
   2648#endif                          /* _HFI1_KERNEL_H */