cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ib_verbs.h (143198B)


      1/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
      2/*
      3 * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
      4 * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
      5 * Copyright (c) 2004, 2020 Intel Corporation.  All rights reserved.
      6 * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
      7 * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
      8 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
      9 * Copyright (c) 2005, 2006, 2007 Cisco Systems.  All rights reserved.
     10 */
     11
     12#ifndef IB_VERBS_H
     13#define IB_VERBS_H
     14
     15#include <linux/ethtool.h>
     16#include <linux/types.h>
     17#include <linux/device.h>
     18#include <linux/dma-mapping.h>
     19#include <linux/kref.h>
     20#include <linux/list.h>
     21#include <linux/rwsem.h>
     22#include <linux/workqueue.h>
     23#include <linux/irq_poll.h>
     24#include <uapi/linux/if_ether.h>
     25#include <net/ipv6.h>
     26#include <net/ip.h>
     27#include <linux/string.h>
     28#include <linux/slab.h>
     29#include <linux/netdevice.h>
     30#include <linux/refcount.h>
     31#include <linux/if_link.h>
     32#include <linux/atomic.h>
     33#include <linux/mmu_notifier.h>
     34#include <linux/uaccess.h>
     35#include <linux/cgroup_rdma.h>
     36#include <linux/irqflags.h>
     37#include <linux/preempt.h>
     38#include <linux/dim.h>
     39#include <uapi/rdma/ib_user_verbs.h>
     40#include <rdma/rdma_counter.h>
     41#include <rdma/restrack.h>
     42#include <rdma/signature.h>
     43#include <uapi/rdma/rdma_user_ioctl.h>
     44#include <uapi/rdma/ib_user_ioctl_verbs.h>
     45
     46#define IB_FW_VERSION_NAME_MAX	ETHTOOL_FWVERS_LEN
     47
     48struct ib_umem_odp;
     49struct ib_uqp_object;
     50struct ib_usrq_object;
     51struct ib_uwq_object;
     52struct rdma_cm_id;
     53struct ib_port;
     54struct hw_stats_device_data;
     55
     56extern struct workqueue_struct *ib_wq;
     57extern struct workqueue_struct *ib_comp_wq;
     58extern struct workqueue_struct *ib_comp_unbound_wq;
     59
     60struct ib_ucq_object;
     61
     62__printf(3, 4) __cold
     63void ibdev_printk(const char *level, const struct ib_device *ibdev,
     64		  const char *format, ...);
     65__printf(2, 3) __cold
     66void ibdev_emerg(const struct ib_device *ibdev, const char *format, ...);
     67__printf(2, 3) __cold
     68void ibdev_alert(const struct ib_device *ibdev, const char *format, ...);
     69__printf(2, 3) __cold
     70void ibdev_crit(const struct ib_device *ibdev, const char *format, ...);
     71__printf(2, 3) __cold
     72void ibdev_err(const struct ib_device *ibdev, const char *format, ...);
     73__printf(2, 3) __cold
     74void ibdev_warn(const struct ib_device *ibdev, const char *format, ...);
     75__printf(2, 3) __cold
     76void ibdev_notice(const struct ib_device *ibdev, const char *format, ...);
     77__printf(2, 3) __cold
     78void ibdev_info(const struct ib_device *ibdev, const char *format, ...);
     79
     80#if defined(CONFIG_DYNAMIC_DEBUG) || \
     81	(defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
     82#define ibdev_dbg(__dev, format, args...)                       \
     83	dynamic_ibdev_dbg(__dev, format, ##args)
     84#else
     85__printf(2, 3) __cold
     86static inline
     87void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {}
     88#endif
     89
     90#define ibdev_level_ratelimited(ibdev_level, ibdev, fmt, ...)           \
     91do {                                                                    \
     92	static DEFINE_RATELIMIT_STATE(_rs,                              \
     93				      DEFAULT_RATELIMIT_INTERVAL,       \
     94				      DEFAULT_RATELIMIT_BURST);         \
     95	if (__ratelimit(&_rs))                                          \
     96		ibdev_level(ibdev, fmt, ##__VA_ARGS__);                 \
     97} while (0)
     98
     99#define ibdev_emerg_ratelimited(ibdev, fmt, ...) \
    100	ibdev_level_ratelimited(ibdev_emerg, ibdev, fmt, ##__VA_ARGS__)
    101#define ibdev_alert_ratelimited(ibdev, fmt, ...) \
    102	ibdev_level_ratelimited(ibdev_alert, ibdev, fmt, ##__VA_ARGS__)
    103#define ibdev_crit_ratelimited(ibdev, fmt, ...) \
    104	ibdev_level_ratelimited(ibdev_crit, ibdev, fmt, ##__VA_ARGS__)
    105#define ibdev_err_ratelimited(ibdev, fmt, ...) \
    106	ibdev_level_ratelimited(ibdev_err, ibdev, fmt, ##__VA_ARGS__)
    107#define ibdev_warn_ratelimited(ibdev, fmt, ...) \
    108	ibdev_level_ratelimited(ibdev_warn, ibdev, fmt, ##__VA_ARGS__)
    109#define ibdev_notice_ratelimited(ibdev, fmt, ...) \
    110	ibdev_level_ratelimited(ibdev_notice, ibdev, fmt, ##__VA_ARGS__)
    111#define ibdev_info_ratelimited(ibdev, fmt, ...) \
    112	ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__)
    113
    114#if defined(CONFIG_DYNAMIC_DEBUG) || \
    115	(defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
    116/* descriptor check is first to prevent flooding with "callbacks suppressed" */
    117#define ibdev_dbg_ratelimited(ibdev, fmt, ...)                          \
    118do {                                                                    \
    119	static DEFINE_RATELIMIT_STATE(_rs,                              \
    120				      DEFAULT_RATELIMIT_INTERVAL,       \
    121				      DEFAULT_RATELIMIT_BURST);         \
    122	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);                 \
    123	if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs))      \
    124		__dynamic_ibdev_dbg(&descriptor, ibdev, fmt,            \
    125				    ##__VA_ARGS__);                     \
    126} while (0)
    127#else
    128__printf(2, 3) __cold
    129static inline
    130void ibdev_dbg_ratelimited(const struct ib_device *ibdev, const char *format, ...) {}
    131#endif
    132
    133union ib_gid {
    134	u8	raw[16];
    135	struct {
    136		__be64	subnet_prefix;
    137		__be64	interface_id;
    138	} global;
    139};
    140
    141extern union ib_gid zgid;
    142
    143enum ib_gid_type {
    144	IB_GID_TYPE_IB = IB_UVERBS_GID_TYPE_IB,
    145	IB_GID_TYPE_ROCE = IB_UVERBS_GID_TYPE_ROCE_V1,
    146	IB_GID_TYPE_ROCE_UDP_ENCAP = IB_UVERBS_GID_TYPE_ROCE_V2,
    147	IB_GID_TYPE_SIZE
    148};
    149
    150#define ROCE_V2_UDP_DPORT      4791
    151struct ib_gid_attr {
    152	struct net_device __rcu	*ndev;
    153	struct ib_device	*device;
    154	union ib_gid		gid;
    155	enum ib_gid_type	gid_type;
    156	u16			index;
    157	u32			port_num;
    158};
    159
    160enum {
    161	/* set the local administered indication */
    162	IB_SA_WELL_KNOWN_GUID	= BIT_ULL(57) | 2,
    163};
    164
    165enum rdma_transport_type {
    166	RDMA_TRANSPORT_IB,
    167	RDMA_TRANSPORT_IWARP,
    168	RDMA_TRANSPORT_USNIC,
    169	RDMA_TRANSPORT_USNIC_UDP,
    170	RDMA_TRANSPORT_UNSPECIFIED,
    171};
    172
    173enum rdma_protocol_type {
    174	RDMA_PROTOCOL_IB,
    175	RDMA_PROTOCOL_IBOE,
    176	RDMA_PROTOCOL_IWARP,
    177	RDMA_PROTOCOL_USNIC_UDP
    178};
    179
    180__attribute_const__ enum rdma_transport_type
    181rdma_node_get_transport(unsigned int node_type);
    182
    183enum rdma_network_type {
    184	RDMA_NETWORK_IB,
    185	RDMA_NETWORK_ROCE_V1,
    186	RDMA_NETWORK_IPV4,
    187	RDMA_NETWORK_IPV6
    188};
    189
    190static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type network_type)
    191{
    192	if (network_type == RDMA_NETWORK_IPV4 ||
    193	    network_type == RDMA_NETWORK_IPV6)
    194		return IB_GID_TYPE_ROCE_UDP_ENCAP;
    195	else if (network_type == RDMA_NETWORK_ROCE_V1)
    196		return IB_GID_TYPE_ROCE;
    197	else
    198		return IB_GID_TYPE_IB;
    199}
    200
    201static inline enum rdma_network_type
    202rdma_gid_attr_network_type(const struct ib_gid_attr *attr)
    203{
    204	if (attr->gid_type == IB_GID_TYPE_IB)
    205		return RDMA_NETWORK_IB;
    206
    207	if (attr->gid_type == IB_GID_TYPE_ROCE)
    208		return RDMA_NETWORK_ROCE_V1;
    209
    210	if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid))
    211		return RDMA_NETWORK_IPV4;
    212	else
    213		return RDMA_NETWORK_IPV6;
    214}
    215
    216enum rdma_link_layer {
    217	IB_LINK_LAYER_UNSPECIFIED,
    218	IB_LINK_LAYER_INFINIBAND,
    219	IB_LINK_LAYER_ETHERNET,
    220};
    221
    222enum ib_device_cap_flags {
    223	IB_DEVICE_RESIZE_MAX_WR = IB_UVERBS_DEVICE_RESIZE_MAX_WR,
    224	IB_DEVICE_BAD_PKEY_CNTR = IB_UVERBS_DEVICE_BAD_PKEY_CNTR,
    225	IB_DEVICE_BAD_QKEY_CNTR = IB_UVERBS_DEVICE_BAD_QKEY_CNTR,
    226	IB_DEVICE_RAW_MULTI = IB_UVERBS_DEVICE_RAW_MULTI,
    227	IB_DEVICE_AUTO_PATH_MIG = IB_UVERBS_DEVICE_AUTO_PATH_MIG,
    228	IB_DEVICE_CHANGE_PHY_PORT = IB_UVERBS_DEVICE_CHANGE_PHY_PORT,
    229	IB_DEVICE_UD_AV_PORT_ENFORCE = IB_UVERBS_DEVICE_UD_AV_PORT_ENFORCE,
    230	IB_DEVICE_CURR_QP_STATE_MOD = IB_UVERBS_DEVICE_CURR_QP_STATE_MOD,
    231	IB_DEVICE_SHUTDOWN_PORT = IB_UVERBS_DEVICE_SHUTDOWN_PORT,
    232	/* IB_DEVICE_INIT_TYPE = IB_UVERBS_DEVICE_INIT_TYPE, (not in use) */
    233	IB_DEVICE_PORT_ACTIVE_EVENT = IB_UVERBS_DEVICE_PORT_ACTIVE_EVENT,
    234	IB_DEVICE_SYS_IMAGE_GUID = IB_UVERBS_DEVICE_SYS_IMAGE_GUID,
    235	IB_DEVICE_RC_RNR_NAK_GEN = IB_UVERBS_DEVICE_RC_RNR_NAK_GEN,
    236	IB_DEVICE_SRQ_RESIZE = IB_UVERBS_DEVICE_SRQ_RESIZE,
    237	IB_DEVICE_N_NOTIFY_CQ = IB_UVERBS_DEVICE_N_NOTIFY_CQ,
    238
    239	/* Reserved, old SEND_W_INV = 1 << 16,*/
    240	IB_DEVICE_MEM_WINDOW = IB_UVERBS_DEVICE_MEM_WINDOW,
    241	/*
    242	 * Devices should set IB_DEVICE_UD_IP_SUM if they support
    243	 * insertion of UDP and TCP checksum on outgoing UD IPoIB
    244	 * messages and can verify the validity of checksum for
    245	 * incoming messages.  Setting this flag implies that the
    246	 * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
    247	 */
    248	IB_DEVICE_UD_IP_CSUM = IB_UVERBS_DEVICE_UD_IP_CSUM,
    249	IB_DEVICE_XRC = IB_UVERBS_DEVICE_XRC,
    250
    251	/*
    252	 * This device supports the IB "base memory management extension",
    253	 * which includes support for fast registrations (IB_WR_REG_MR,
    254	 * IB_WR_LOCAL_INV and IB_WR_SEND_WITH_INV verbs).  This flag should
    255	 * also be set by any iWarp device which must support FRs to comply
    256	 * to the iWarp verbs spec.  iWarp devices also support the
    257	 * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the
    258	 * stag.
    259	 */
    260	IB_DEVICE_MEM_MGT_EXTENSIONS = IB_UVERBS_DEVICE_MEM_MGT_EXTENSIONS,
    261	IB_DEVICE_MEM_WINDOW_TYPE_2A = IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2A,
    262	IB_DEVICE_MEM_WINDOW_TYPE_2B = IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2B,
    263	IB_DEVICE_RC_IP_CSUM = IB_UVERBS_DEVICE_RC_IP_CSUM,
    264	/* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */
    265	IB_DEVICE_RAW_IP_CSUM = IB_UVERBS_DEVICE_RAW_IP_CSUM,
    266	IB_DEVICE_MANAGED_FLOW_STEERING =
    267		IB_UVERBS_DEVICE_MANAGED_FLOW_STEERING,
    268	/* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
    269	IB_DEVICE_RAW_SCATTER_FCS = IB_UVERBS_DEVICE_RAW_SCATTER_FCS,
    270	/* The device supports padding incoming writes to cacheline. */
    271	IB_DEVICE_PCI_WRITE_END_PADDING =
    272		IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING,
    273};
    274
    275enum ib_kernel_cap_flags {
    276	/*
    277	 * This device supports a per-device lkey or stag that can be
    278	 * used without performing a memory registration for the local
    279	 * memory.  Note that ULPs should never check this flag, but
    280	 * instead of use the local_dma_lkey flag in the ib_pd structure,
    281	 * which will always contain a usable lkey.
    282	 */
    283	IBK_LOCAL_DMA_LKEY = 1 << 0,
    284	/* IB_QP_CREATE_INTEGRITY_EN is supported to implement T10-PI */
    285	IBK_INTEGRITY_HANDOVER = 1 << 1,
    286	/* IB_ACCESS_ON_DEMAND is supported during reg_user_mr() */
    287	IBK_ON_DEMAND_PAGING = 1 << 2,
    288	/* IB_MR_TYPE_SG_GAPS is supported */
    289	IBK_SG_GAPS_REG = 1 << 3,
    290	/* Driver supports RDMA_NLDEV_CMD_DELLINK */
    291	IBK_ALLOW_USER_UNREG = 1 << 4,
    292
    293	/* ipoib will use IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK */
    294	IBK_BLOCK_MULTICAST_LOOPBACK = 1 << 5,
    295	/* iopib will use IB_QP_CREATE_IPOIB_UD_LSO for its QPs */
    296	IBK_UD_TSO = 1 << 6,
    297	/* iopib will use the device ops:
    298	 *   get_vf_config
    299	 *   get_vf_guid
    300	 *   get_vf_stats
    301	 *   set_vf_guid
    302	 *   set_vf_link_state
    303	 */
    304	IBK_VIRTUAL_FUNCTION = 1 << 7,
    305	/* ipoib will use IB_QP_CREATE_NETDEV_USE for its QPs */
    306	IBK_RDMA_NETDEV_OPA = 1 << 8,
    307};
    308
    309enum ib_atomic_cap {
    310	IB_ATOMIC_NONE,
    311	IB_ATOMIC_HCA,
    312	IB_ATOMIC_GLOB
    313};
    314
    315enum ib_odp_general_cap_bits {
    316	IB_ODP_SUPPORT		= 1 << 0,
    317	IB_ODP_SUPPORT_IMPLICIT = 1 << 1,
    318};
    319
    320enum ib_odp_transport_cap_bits {
    321	IB_ODP_SUPPORT_SEND	= 1 << 0,
    322	IB_ODP_SUPPORT_RECV	= 1 << 1,
    323	IB_ODP_SUPPORT_WRITE	= 1 << 2,
    324	IB_ODP_SUPPORT_READ	= 1 << 3,
    325	IB_ODP_SUPPORT_ATOMIC	= 1 << 4,
    326	IB_ODP_SUPPORT_SRQ_RECV	= 1 << 5,
    327};
    328
    329struct ib_odp_caps {
    330	uint64_t general_caps;
    331	struct {
    332		uint32_t  rc_odp_caps;
    333		uint32_t  uc_odp_caps;
    334		uint32_t  ud_odp_caps;
    335		uint32_t  xrc_odp_caps;
    336	} per_transport_caps;
    337};
    338
    339struct ib_rss_caps {
    340	/* Corresponding bit will be set if qp type from
    341	 * 'enum ib_qp_type' is supported, e.g.
    342	 * supported_qpts |= 1 << IB_QPT_UD
    343	 */
    344	u32 supported_qpts;
    345	u32 max_rwq_indirection_tables;
    346	u32 max_rwq_indirection_table_size;
    347};
    348
    349enum ib_tm_cap_flags {
    350	/*  Support tag matching with rendezvous offload for RC transport */
    351	IB_TM_CAP_RNDV_RC = 1 << 0,
    352};
    353
    354struct ib_tm_caps {
    355	/* Max size of RNDV header */
    356	u32 max_rndv_hdr_size;
    357	/* Max number of entries in tag matching list */
    358	u32 max_num_tags;
    359	/* From enum ib_tm_cap_flags */
    360	u32 flags;
    361	/* Max number of outstanding list operations */
    362	u32 max_ops;
    363	/* Max number of SGE in tag matching entry */
    364	u32 max_sge;
    365};
    366
    367struct ib_cq_init_attr {
    368	unsigned int	cqe;
    369	u32		comp_vector;
    370	u32		flags;
    371};
    372
    373enum ib_cq_attr_mask {
    374	IB_CQ_MODERATE = 1 << 0,
    375};
    376
    377struct ib_cq_caps {
    378	u16     max_cq_moderation_count;
    379	u16     max_cq_moderation_period;
    380};
    381
    382struct ib_dm_mr_attr {
    383	u64		length;
    384	u64		offset;
    385	u32		access_flags;
    386};
    387
    388struct ib_dm_alloc_attr {
    389	u64	length;
    390	u32	alignment;
    391	u32	flags;
    392};
    393
    394struct ib_device_attr {
    395	u64			fw_ver;
    396	__be64			sys_image_guid;
    397	u64			max_mr_size;
    398	u64			page_size_cap;
    399	u32			vendor_id;
    400	u32			vendor_part_id;
    401	u32			hw_ver;
    402	int			max_qp;
    403	int			max_qp_wr;
    404	u64			device_cap_flags;
    405	u64			kernel_cap_flags;
    406	int			max_send_sge;
    407	int			max_recv_sge;
    408	int			max_sge_rd;
    409	int			max_cq;
    410	int			max_cqe;
    411	int			max_mr;
    412	int			max_pd;
    413	int			max_qp_rd_atom;
    414	int			max_ee_rd_atom;
    415	int			max_res_rd_atom;
    416	int			max_qp_init_rd_atom;
    417	int			max_ee_init_rd_atom;
    418	enum ib_atomic_cap	atomic_cap;
    419	enum ib_atomic_cap	masked_atomic_cap;
    420	int			max_ee;
    421	int			max_rdd;
    422	int			max_mw;
    423	int			max_raw_ipv6_qp;
    424	int			max_raw_ethy_qp;
    425	int			max_mcast_grp;
    426	int			max_mcast_qp_attach;
    427	int			max_total_mcast_qp_attach;
    428	int			max_ah;
    429	int			max_srq;
    430	int			max_srq_wr;
    431	int			max_srq_sge;
    432	unsigned int		max_fast_reg_page_list_len;
    433	unsigned int		max_pi_fast_reg_page_list_len;
    434	u16			max_pkeys;
    435	u8			local_ca_ack_delay;
    436	int			sig_prot_cap;
    437	int			sig_guard_cap;
    438	struct ib_odp_caps	odp_caps;
    439	uint64_t		timestamp_mask;
    440	uint64_t		hca_core_clock; /* in KHZ */
    441	struct ib_rss_caps	rss_caps;
    442	u32			max_wq_type_rq;
    443	u32			raw_packet_caps; /* Use ib_raw_packet_caps enum */
    444	struct ib_tm_caps	tm_caps;
    445	struct ib_cq_caps       cq_caps;
    446	u64			max_dm_size;
    447	/* Max entries for sgl for optimized performance per READ */
    448	u32			max_sgl_rd;
    449};
    450
    451enum ib_mtu {
    452	IB_MTU_256  = 1,
    453	IB_MTU_512  = 2,
    454	IB_MTU_1024 = 3,
    455	IB_MTU_2048 = 4,
    456	IB_MTU_4096 = 5
    457};
    458
    459enum opa_mtu {
    460	OPA_MTU_8192 = 6,
    461	OPA_MTU_10240 = 7
    462};
    463
    464static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
    465{
    466	switch (mtu) {
    467	case IB_MTU_256:  return  256;
    468	case IB_MTU_512:  return  512;
    469	case IB_MTU_1024: return 1024;
    470	case IB_MTU_2048: return 2048;
    471	case IB_MTU_4096: return 4096;
    472	default: 	  return -1;
    473	}
    474}
    475
    476static inline enum ib_mtu ib_mtu_int_to_enum(int mtu)
    477{
    478	if (mtu >= 4096)
    479		return IB_MTU_4096;
    480	else if (mtu >= 2048)
    481		return IB_MTU_2048;
    482	else if (mtu >= 1024)
    483		return IB_MTU_1024;
    484	else if (mtu >= 512)
    485		return IB_MTU_512;
    486	else
    487		return IB_MTU_256;
    488}
    489
    490static inline int opa_mtu_enum_to_int(enum opa_mtu mtu)
    491{
    492	switch (mtu) {
    493	case OPA_MTU_8192:
    494		return 8192;
    495	case OPA_MTU_10240:
    496		return 10240;
    497	default:
    498		return(ib_mtu_enum_to_int((enum ib_mtu)mtu));
    499	}
    500}
    501
    502static inline enum opa_mtu opa_mtu_int_to_enum(int mtu)
    503{
    504	if (mtu >= 10240)
    505		return OPA_MTU_10240;
    506	else if (mtu >= 8192)
    507		return OPA_MTU_8192;
    508	else
    509		return ((enum opa_mtu)ib_mtu_int_to_enum(mtu));
    510}
    511
    512enum ib_port_state {
    513	IB_PORT_NOP		= 0,
    514	IB_PORT_DOWN		= 1,
    515	IB_PORT_INIT		= 2,
    516	IB_PORT_ARMED		= 3,
    517	IB_PORT_ACTIVE		= 4,
    518	IB_PORT_ACTIVE_DEFER	= 5
    519};
    520
    521enum ib_port_phys_state {
    522	IB_PORT_PHYS_STATE_SLEEP = 1,
    523	IB_PORT_PHYS_STATE_POLLING = 2,
    524	IB_PORT_PHYS_STATE_DISABLED = 3,
    525	IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING = 4,
    526	IB_PORT_PHYS_STATE_LINK_UP = 5,
    527	IB_PORT_PHYS_STATE_LINK_ERROR_RECOVERY = 6,
    528	IB_PORT_PHYS_STATE_PHY_TEST = 7,
    529};
    530
    531enum ib_port_width {
    532	IB_WIDTH_1X	= 1,
    533	IB_WIDTH_2X	= 16,
    534	IB_WIDTH_4X	= 2,
    535	IB_WIDTH_8X	= 4,
    536	IB_WIDTH_12X	= 8
    537};
    538
    539static inline int ib_width_enum_to_int(enum ib_port_width width)
    540{
    541	switch (width) {
    542	case IB_WIDTH_1X:  return  1;
    543	case IB_WIDTH_2X:  return  2;
    544	case IB_WIDTH_4X:  return  4;
    545	case IB_WIDTH_8X:  return  8;
    546	case IB_WIDTH_12X: return 12;
    547	default: 	  return -1;
    548	}
    549}
    550
    551enum ib_port_speed {
    552	IB_SPEED_SDR	= 1,
    553	IB_SPEED_DDR	= 2,
    554	IB_SPEED_QDR	= 4,
    555	IB_SPEED_FDR10	= 8,
    556	IB_SPEED_FDR	= 16,
    557	IB_SPEED_EDR	= 32,
    558	IB_SPEED_HDR	= 64,
    559	IB_SPEED_NDR	= 128,
    560};
    561
    562enum ib_stat_flag {
    563	IB_STAT_FLAG_OPTIONAL = 1 << 0,
    564};
    565
    566/**
    567 * struct rdma_stat_desc
    568 * @name - The name of the counter
    569 * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL
    570 * @priv - Driver private information; Core code should not use
    571 */
    572struct rdma_stat_desc {
    573	const char *name;
    574	unsigned int flags;
    575	const void *priv;
    576};
    577
    578/**
    579 * struct rdma_hw_stats
    580 * @lock - Mutex to protect parallel write access to lifespan and values
    581 *    of counters, which are 64bits and not guaranteed to be written
    582 *    atomicaly on 32bits systems.
    583 * @timestamp - Used by the core code to track when the last update was
    584 * @lifespan - Used by the core code to determine how old the counters
    585 *   should be before being updated again.  Stored in jiffies, defaults
    586 *   to 10 milliseconds, drivers can override the default be specifying
    587 *   their own value during their allocation routine.
    588 * @descs - Array of pointers to static descriptors used for the counters
    589 *   in directory.
    590 * @is_disabled - A bitmap to indicate each counter is currently disabled
    591 *   or not.
    592 * @num_counters - How many hardware counters there are.  If name is
    593 *   shorter than this number, a kernel oops will result.  Driver authors
    594 *   are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters)
    595 *   in their code to prevent this.
    596 * @value - Array of u64 counters that are accessed by the sysfs code and
    597 *   filled in by the drivers get_stats routine
    598 */
    599struct rdma_hw_stats {
    600	struct mutex	lock; /* Protect lifespan and values[] */
    601	unsigned long	timestamp;
    602	unsigned long	lifespan;
    603	const struct rdma_stat_desc *descs;
    604	unsigned long	*is_disabled;
    605	int		num_counters;
    606	u64		value[];
    607};
    608
    609#define RDMA_HW_STATS_DEFAULT_LIFESPAN 10
    610
    611struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
    612	const struct rdma_stat_desc *descs, int num_counters,
    613	unsigned long lifespan);
    614
    615void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats);
    616
    617/* Define bits for the various functionality this port needs to be supported by
    618 * the core.
    619 */
    620/* Management                           0x00000FFF */
    621#define RDMA_CORE_CAP_IB_MAD            0x00000001
    622#define RDMA_CORE_CAP_IB_SMI            0x00000002
    623#define RDMA_CORE_CAP_IB_CM             0x00000004
    624#define RDMA_CORE_CAP_IW_CM             0x00000008
    625#define RDMA_CORE_CAP_IB_SA             0x00000010
    626#define RDMA_CORE_CAP_OPA_MAD           0x00000020
    627
    628/* Address format                       0x000FF000 */
    629#define RDMA_CORE_CAP_AF_IB             0x00001000
    630#define RDMA_CORE_CAP_ETH_AH            0x00002000
    631#define RDMA_CORE_CAP_OPA_AH            0x00004000
    632#define RDMA_CORE_CAP_IB_GRH_REQUIRED   0x00008000
    633
    634/* Protocol                             0xFFF00000 */
    635#define RDMA_CORE_CAP_PROT_IB           0x00100000
    636#define RDMA_CORE_CAP_PROT_ROCE         0x00200000
    637#define RDMA_CORE_CAP_PROT_IWARP        0x00400000
    638#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
    639#define RDMA_CORE_CAP_PROT_RAW_PACKET   0x01000000
    640#define RDMA_CORE_CAP_PROT_USNIC        0x02000000
    641
    642#define RDMA_CORE_PORT_IB_GRH_REQUIRED (RDMA_CORE_CAP_IB_GRH_REQUIRED \
    643					| RDMA_CORE_CAP_PROT_ROCE     \
    644					| RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP)
    645
    646#define RDMA_CORE_PORT_IBA_IB          (RDMA_CORE_CAP_PROT_IB  \
    647					| RDMA_CORE_CAP_IB_MAD \
    648					| RDMA_CORE_CAP_IB_SMI \
    649					| RDMA_CORE_CAP_IB_CM  \
    650					| RDMA_CORE_CAP_IB_SA  \
    651					| RDMA_CORE_CAP_AF_IB)
    652#define RDMA_CORE_PORT_IBA_ROCE        (RDMA_CORE_CAP_PROT_ROCE \
    653					| RDMA_CORE_CAP_IB_MAD  \
    654					| RDMA_CORE_CAP_IB_CM   \
    655					| RDMA_CORE_CAP_AF_IB   \
    656					| RDMA_CORE_CAP_ETH_AH)
    657#define RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP			\
    658					(RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP \
    659					| RDMA_CORE_CAP_IB_MAD  \
    660					| RDMA_CORE_CAP_IB_CM   \
    661					| RDMA_CORE_CAP_AF_IB   \
    662					| RDMA_CORE_CAP_ETH_AH)
    663#define RDMA_CORE_PORT_IWARP           (RDMA_CORE_CAP_PROT_IWARP \
    664					| RDMA_CORE_CAP_IW_CM)
    665#define RDMA_CORE_PORT_INTEL_OPA       (RDMA_CORE_PORT_IBA_IB  \
    666					| RDMA_CORE_CAP_OPA_MAD)
    667
    668#define RDMA_CORE_PORT_RAW_PACKET	(RDMA_CORE_CAP_PROT_RAW_PACKET)
    669
    670#define RDMA_CORE_PORT_USNIC		(RDMA_CORE_CAP_PROT_USNIC)
    671
    672struct ib_port_attr {
    673	u64			subnet_prefix;
    674	enum ib_port_state	state;
    675	enum ib_mtu		max_mtu;
    676	enum ib_mtu		active_mtu;
    677	u32                     phys_mtu;
    678	int			gid_tbl_len;
    679	unsigned int		ip_gids:1;
    680	/* This is the value from PortInfo CapabilityMask, defined by IBA */
    681	u32			port_cap_flags;
    682	u32			max_msg_sz;
    683	u32			bad_pkey_cntr;
    684	u32			qkey_viol_cntr;
    685	u16			pkey_tbl_len;
    686	u32			sm_lid;
    687	u32			lid;
    688	u8			lmc;
    689	u8			max_vl_num;
    690	u8			sm_sl;
    691	u8			subnet_timeout;
    692	u8			init_type_reply;
    693	u8			active_width;
    694	u16			active_speed;
    695	u8                      phys_state;
    696	u16			port_cap_flags2;
    697};
    698
    699enum ib_device_modify_flags {
    700	IB_DEVICE_MODIFY_SYS_IMAGE_GUID	= 1 << 0,
    701	IB_DEVICE_MODIFY_NODE_DESC	= 1 << 1
    702};
    703
    704#define IB_DEVICE_NODE_DESC_MAX 64
    705
    706struct ib_device_modify {
    707	u64	sys_image_guid;
    708	char	node_desc[IB_DEVICE_NODE_DESC_MAX];
    709};
    710
    711enum ib_port_modify_flags {
    712	IB_PORT_SHUTDOWN		= 1,
    713	IB_PORT_INIT_TYPE		= (1<<2),
    714	IB_PORT_RESET_QKEY_CNTR		= (1<<3),
    715	IB_PORT_OPA_MASK_CHG		= (1<<4)
    716};
    717
    718struct ib_port_modify {
    719	u32	set_port_cap_mask;
    720	u32	clr_port_cap_mask;
    721	u8	init_type;
    722};
    723
    724enum ib_event_type {
    725	IB_EVENT_CQ_ERR,
    726	IB_EVENT_QP_FATAL,
    727	IB_EVENT_QP_REQ_ERR,
    728	IB_EVENT_QP_ACCESS_ERR,
    729	IB_EVENT_COMM_EST,
    730	IB_EVENT_SQ_DRAINED,
    731	IB_EVENT_PATH_MIG,
    732	IB_EVENT_PATH_MIG_ERR,
    733	IB_EVENT_DEVICE_FATAL,
    734	IB_EVENT_PORT_ACTIVE,
    735	IB_EVENT_PORT_ERR,
    736	IB_EVENT_LID_CHANGE,
    737	IB_EVENT_PKEY_CHANGE,
    738	IB_EVENT_SM_CHANGE,
    739	IB_EVENT_SRQ_ERR,
    740	IB_EVENT_SRQ_LIMIT_REACHED,
    741	IB_EVENT_QP_LAST_WQE_REACHED,
    742	IB_EVENT_CLIENT_REREGISTER,
    743	IB_EVENT_GID_CHANGE,
    744	IB_EVENT_WQ_FATAL,
    745};
    746
    747const char *__attribute_const__ ib_event_msg(enum ib_event_type event);
    748
    749struct ib_event {
    750	struct ib_device	*device;
    751	union {
    752		struct ib_cq	*cq;
    753		struct ib_qp	*qp;
    754		struct ib_srq	*srq;
    755		struct ib_wq	*wq;
    756		u32		port_num;
    757	} element;
    758	enum ib_event_type	event;
    759};
    760
    761struct ib_event_handler {
    762	struct ib_device *device;
    763	void            (*handler)(struct ib_event_handler *, struct ib_event *);
    764	struct list_head  list;
    765};
    766
    767#define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler)		\
    768	do {							\
    769		(_ptr)->device  = _device;			\
    770		(_ptr)->handler = _handler;			\
    771		INIT_LIST_HEAD(&(_ptr)->list);			\
    772	} while (0)
    773
    774struct ib_global_route {
    775	const struct ib_gid_attr *sgid_attr;
    776	union ib_gid	dgid;
    777	u32		flow_label;
    778	u8		sgid_index;
    779	u8		hop_limit;
    780	u8		traffic_class;
    781};
    782
    783struct ib_grh {
    784	__be32		version_tclass_flow;
    785	__be16		paylen;
    786	u8		next_hdr;
    787	u8		hop_limit;
    788	union ib_gid	sgid;
    789	union ib_gid	dgid;
    790};
    791
    792union rdma_network_hdr {
    793	struct ib_grh ibgrh;
    794	struct {
    795		/* The IB spec states that if it's IPv4, the header
    796		 * is located in the last 20 bytes of the header.
    797		 */
    798		u8		reserved[20];
    799		struct iphdr	roce4grh;
    800	};
    801};
    802
    803#define IB_QPN_MASK		0xFFFFFF
    804
    805enum {
    806	IB_MULTICAST_QPN = 0xffffff
    807};
    808
    809#define IB_LID_PERMISSIVE	cpu_to_be16(0xFFFF)
    810#define IB_MULTICAST_LID_BASE	cpu_to_be16(0xC000)
    811
    812enum ib_ah_flags {
    813	IB_AH_GRH	= 1
    814};
    815
    816enum ib_rate {
    817	IB_RATE_PORT_CURRENT = 0,
    818	IB_RATE_2_5_GBPS = 2,
    819	IB_RATE_5_GBPS   = 5,
    820	IB_RATE_10_GBPS  = 3,
    821	IB_RATE_20_GBPS  = 6,
    822	IB_RATE_30_GBPS  = 4,
    823	IB_RATE_40_GBPS  = 7,
    824	IB_RATE_60_GBPS  = 8,
    825	IB_RATE_80_GBPS  = 9,
    826	IB_RATE_120_GBPS = 10,
    827	IB_RATE_14_GBPS  = 11,
    828	IB_RATE_56_GBPS  = 12,
    829	IB_RATE_112_GBPS = 13,
    830	IB_RATE_168_GBPS = 14,
    831	IB_RATE_25_GBPS  = 15,
    832	IB_RATE_100_GBPS = 16,
    833	IB_RATE_200_GBPS = 17,
    834	IB_RATE_300_GBPS = 18,
    835	IB_RATE_28_GBPS  = 19,
    836	IB_RATE_50_GBPS  = 20,
    837	IB_RATE_400_GBPS = 21,
    838	IB_RATE_600_GBPS = 22,
    839};
    840
    841/**
    842 * ib_rate_to_mult - Convert the IB rate enum to a multiple of the
    843 * base rate of 2.5 Gbit/sec.  For example, IB_RATE_5_GBPS will be
    844 * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec.
    845 * @rate: rate to convert.
    846 */
    847__attribute_const__ int ib_rate_to_mult(enum ib_rate rate);
    848
    849/**
    850 * ib_rate_to_mbps - Convert the IB rate enum to Mbps.
    851 * For example, IB_RATE_2_5_GBPS will be converted to 2500.
    852 * @rate: rate to convert.
    853 */
    854__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
    855
    856
    857/**
    858 * enum ib_mr_type - memory region type
    859 * @IB_MR_TYPE_MEM_REG:       memory region that is used for
    860 *                            normal registration
    861 * @IB_MR_TYPE_SG_GAPS:       memory region that is capable to
    862 *                            register any arbitrary sg lists (without
    863 *                            the normal mr constraints - see
    864 *                            ib_map_mr_sg)
    865 * @IB_MR_TYPE_DM:            memory region that is used for device
    866 *                            memory registration
    867 * @IB_MR_TYPE_USER:          memory region that is used for the user-space
    868 *                            application
    869 * @IB_MR_TYPE_DMA:           memory region that is used for DMA operations
    870 *                            without address translations (VA=PA)
    871 * @IB_MR_TYPE_INTEGRITY:     memory region that is used for
    872 *                            data integrity operations
    873 */
    874enum ib_mr_type {
    875	IB_MR_TYPE_MEM_REG,
    876	IB_MR_TYPE_SG_GAPS,
    877	IB_MR_TYPE_DM,
    878	IB_MR_TYPE_USER,
    879	IB_MR_TYPE_DMA,
    880	IB_MR_TYPE_INTEGRITY,
    881};
    882
    883enum ib_mr_status_check {
    884	IB_MR_CHECK_SIG_STATUS = 1,
    885};
    886
    887/**
    888 * struct ib_mr_status - Memory region status container
    889 *
    890 * @fail_status: Bitmask of MR checks status. For each
    891 *     failed check a corresponding status bit is set.
    892 * @sig_err: Additional info for IB_MR_CEHCK_SIG_STATUS
    893 *     failure.
    894 */
    895struct ib_mr_status {
    896	u32		    fail_status;
    897	struct ib_sig_err   sig_err;
    898};
    899
    900/**
    901 * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
    902 * enum.
    903 * @mult: multiple to convert.
    904 */
    905__attribute_const__ enum ib_rate mult_to_ib_rate(int mult);
    906
    907struct rdma_ah_init_attr {
    908	struct rdma_ah_attr *ah_attr;
    909	u32 flags;
    910	struct net_device *xmit_slave;
    911};
    912
    913enum rdma_ah_attr_type {
    914	RDMA_AH_ATTR_TYPE_UNDEFINED,
    915	RDMA_AH_ATTR_TYPE_IB,
    916	RDMA_AH_ATTR_TYPE_ROCE,
    917	RDMA_AH_ATTR_TYPE_OPA,
    918};
    919
    920struct ib_ah_attr {
    921	u16			dlid;
    922	u8			src_path_bits;
    923};
    924
    925struct roce_ah_attr {
    926	u8			dmac[ETH_ALEN];
    927};
    928
    929struct opa_ah_attr {
    930	u32			dlid;
    931	u8			src_path_bits;
    932	bool			make_grd;
    933};
    934
    935struct rdma_ah_attr {
    936	struct ib_global_route	grh;
    937	u8			sl;
    938	u8			static_rate;
    939	u32			port_num;
    940	u8			ah_flags;
    941	enum rdma_ah_attr_type type;
    942	union {
    943		struct ib_ah_attr ib;
    944		struct roce_ah_attr roce;
    945		struct opa_ah_attr opa;
    946	};
    947};
    948
    949enum ib_wc_status {
    950	IB_WC_SUCCESS,
    951	IB_WC_LOC_LEN_ERR,
    952	IB_WC_LOC_QP_OP_ERR,
    953	IB_WC_LOC_EEC_OP_ERR,
    954	IB_WC_LOC_PROT_ERR,
    955	IB_WC_WR_FLUSH_ERR,
    956	IB_WC_MW_BIND_ERR,
    957	IB_WC_BAD_RESP_ERR,
    958	IB_WC_LOC_ACCESS_ERR,
    959	IB_WC_REM_INV_REQ_ERR,
    960	IB_WC_REM_ACCESS_ERR,
    961	IB_WC_REM_OP_ERR,
    962	IB_WC_RETRY_EXC_ERR,
    963	IB_WC_RNR_RETRY_EXC_ERR,
    964	IB_WC_LOC_RDD_VIOL_ERR,
    965	IB_WC_REM_INV_RD_REQ_ERR,
    966	IB_WC_REM_ABORT_ERR,
    967	IB_WC_INV_EECN_ERR,
    968	IB_WC_INV_EEC_STATE_ERR,
    969	IB_WC_FATAL_ERR,
    970	IB_WC_RESP_TIMEOUT_ERR,
    971	IB_WC_GENERAL_ERR
    972};
    973
    974const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status);
    975
    976enum ib_wc_opcode {
    977	IB_WC_SEND = IB_UVERBS_WC_SEND,
    978	IB_WC_RDMA_WRITE = IB_UVERBS_WC_RDMA_WRITE,
    979	IB_WC_RDMA_READ = IB_UVERBS_WC_RDMA_READ,
    980	IB_WC_COMP_SWAP = IB_UVERBS_WC_COMP_SWAP,
    981	IB_WC_FETCH_ADD = IB_UVERBS_WC_FETCH_ADD,
    982	IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW,
    983	IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV,
    984	IB_WC_LSO = IB_UVERBS_WC_TSO,
    985	IB_WC_REG_MR,
    986	IB_WC_MASKED_COMP_SWAP,
    987	IB_WC_MASKED_FETCH_ADD,
    988/*
    989 * Set value of IB_WC_RECV so consumers can test if a completion is a
    990 * receive by testing (opcode & IB_WC_RECV).
    991 */
    992	IB_WC_RECV			= 1 << 7,
    993	IB_WC_RECV_RDMA_WITH_IMM
    994};
    995
    996enum ib_wc_flags {
    997	IB_WC_GRH		= 1,
    998	IB_WC_WITH_IMM		= (1<<1),
    999	IB_WC_WITH_INVALIDATE	= (1<<2),
   1000	IB_WC_IP_CSUM_OK	= (1<<3),
   1001	IB_WC_WITH_SMAC		= (1<<4),
   1002	IB_WC_WITH_VLAN		= (1<<5),
   1003	IB_WC_WITH_NETWORK_HDR_TYPE	= (1<<6),
   1004};
   1005
   1006struct ib_wc {
   1007	union {
   1008		u64		wr_id;
   1009		struct ib_cqe	*wr_cqe;
   1010	};
   1011	enum ib_wc_status	status;
   1012	enum ib_wc_opcode	opcode;
   1013	u32			vendor_err;
   1014	u32			byte_len;
   1015	struct ib_qp	       *qp;
   1016	union {
   1017		__be32		imm_data;
   1018		u32		invalidate_rkey;
   1019	} ex;
   1020	u32			src_qp;
   1021	u32			slid;
   1022	int			wc_flags;
   1023	u16			pkey_index;
   1024	u8			sl;
   1025	u8			dlid_path_bits;
   1026	u32 port_num; /* valid only for DR SMPs on switches */
   1027	u8			smac[ETH_ALEN];
   1028	u16			vlan_id;
   1029	u8			network_hdr_type;
   1030};
   1031
   1032enum ib_cq_notify_flags {
   1033	IB_CQ_SOLICITED			= 1 << 0,
   1034	IB_CQ_NEXT_COMP			= 1 << 1,
   1035	IB_CQ_SOLICITED_MASK		= IB_CQ_SOLICITED | IB_CQ_NEXT_COMP,
   1036	IB_CQ_REPORT_MISSED_EVENTS	= 1 << 2,
   1037};
   1038
   1039enum ib_srq_type {
   1040	IB_SRQT_BASIC = IB_UVERBS_SRQT_BASIC,
   1041	IB_SRQT_XRC = IB_UVERBS_SRQT_XRC,
   1042	IB_SRQT_TM = IB_UVERBS_SRQT_TM,
   1043};
   1044
   1045static inline bool ib_srq_has_cq(enum ib_srq_type srq_type)
   1046{
   1047	return srq_type == IB_SRQT_XRC ||
   1048	       srq_type == IB_SRQT_TM;
   1049}
   1050
   1051enum ib_srq_attr_mask {
   1052	IB_SRQ_MAX_WR	= 1 << 0,
   1053	IB_SRQ_LIMIT	= 1 << 1,
   1054};
   1055
   1056struct ib_srq_attr {
   1057	u32	max_wr;
   1058	u32	max_sge;
   1059	u32	srq_limit;
   1060};
   1061
   1062struct ib_srq_init_attr {
   1063	void		      (*event_handler)(struct ib_event *, void *);
   1064	void		       *srq_context;
   1065	struct ib_srq_attr	attr;
   1066	enum ib_srq_type	srq_type;
   1067
   1068	struct {
   1069		struct ib_cq   *cq;
   1070		union {
   1071			struct {
   1072				struct ib_xrcd *xrcd;
   1073			} xrc;
   1074
   1075			struct {
   1076				u32		max_num_tags;
   1077			} tag_matching;
   1078		};
   1079	} ext;
   1080};
   1081
   1082struct ib_qp_cap {
   1083	u32	max_send_wr;
   1084	u32	max_recv_wr;
   1085	u32	max_send_sge;
   1086	u32	max_recv_sge;
   1087	u32	max_inline_data;
   1088
   1089	/*
   1090	 * Maximum number of rdma_rw_ctx structures in flight at a time.
   1091	 * ib_create_qp() will calculate the right amount of neededed WRs
   1092	 * and MRs based on this.
   1093	 */
   1094	u32	max_rdma_ctxs;
   1095};
   1096
   1097enum ib_sig_type {
   1098	IB_SIGNAL_ALL_WR,
   1099	IB_SIGNAL_REQ_WR
   1100};
   1101
   1102enum ib_qp_type {
   1103	/*
   1104	 * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
   1105	 * here (and in that order) since the MAD layer uses them as
   1106	 * indices into a 2-entry table.
   1107	 */
   1108	IB_QPT_SMI,
   1109	IB_QPT_GSI,
   1110
   1111	IB_QPT_RC = IB_UVERBS_QPT_RC,
   1112	IB_QPT_UC = IB_UVERBS_QPT_UC,
   1113	IB_QPT_UD = IB_UVERBS_QPT_UD,
   1114	IB_QPT_RAW_IPV6,
   1115	IB_QPT_RAW_ETHERTYPE,
   1116	IB_QPT_RAW_PACKET = IB_UVERBS_QPT_RAW_PACKET,
   1117	IB_QPT_XRC_INI = IB_UVERBS_QPT_XRC_INI,
   1118	IB_QPT_XRC_TGT = IB_UVERBS_QPT_XRC_TGT,
   1119	IB_QPT_MAX,
   1120	IB_QPT_DRIVER = IB_UVERBS_QPT_DRIVER,
   1121	/* Reserve a range for qp types internal to the low level driver.
   1122	 * These qp types will not be visible at the IB core layer, so the
   1123	 * IB_QPT_MAX usages should not be affected in the core layer
   1124	 */
   1125	IB_QPT_RESERVED1 = 0x1000,
   1126	IB_QPT_RESERVED2,
   1127	IB_QPT_RESERVED3,
   1128	IB_QPT_RESERVED4,
   1129	IB_QPT_RESERVED5,
   1130	IB_QPT_RESERVED6,
   1131	IB_QPT_RESERVED7,
   1132	IB_QPT_RESERVED8,
   1133	IB_QPT_RESERVED9,
   1134	IB_QPT_RESERVED10,
   1135};
   1136
   1137enum ib_qp_create_flags {
   1138	IB_QP_CREATE_IPOIB_UD_LSO		= 1 << 0,
   1139	IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK	=
   1140		IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
   1141	IB_QP_CREATE_CROSS_CHANNEL              = 1 << 2,
   1142	IB_QP_CREATE_MANAGED_SEND               = 1 << 3,
   1143	IB_QP_CREATE_MANAGED_RECV               = 1 << 4,
   1144	IB_QP_CREATE_NETIF_QP			= 1 << 5,
   1145	IB_QP_CREATE_INTEGRITY_EN		= 1 << 6,
   1146	IB_QP_CREATE_NETDEV_USE			= 1 << 7,
   1147	IB_QP_CREATE_SCATTER_FCS		=
   1148		IB_UVERBS_QP_CREATE_SCATTER_FCS,
   1149	IB_QP_CREATE_CVLAN_STRIPPING		=
   1150		IB_UVERBS_QP_CREATE_CVLAN_STRIPPING,
   1151	IB_QP_CREATE_SOURCE_QPN			= 1 << 10,
   1152	IB_QP_CREATE_PCI_WRITE_END_PADDING	=
   1153		IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING,
   1154	/* reserve bits 26-31 for low level drivers' internal use */
   1155	IB_QP_CREATE_RESERVED_START		= 1 << 26,
   1156	IB_QP_CREATE_RESERVED_END		= 1 << 31,
   1157};
   1158
   1159/*
   1160 * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler
   1161 * callback to destroy the passed in QP.
   1162 */
   1163
   1164struct ib_qp_init_attr {
   1165	/* Consumer's event_handler callback must not block */
   1166	void                  (*event_handler)(struct ib_event *, void *);
   1167
   1168	void		       *qp_context;
   1169	struct ib_cq	       *send_cq;
   1170	struct ib_cq	       *recv_cq;
   1171	struct ib_srq	       *srq;
   1172	struct ib_xrcd	       *xrcd;     /* XRC TGT QPs only */
   1173	struct ib_qp_cap	cap;
   1174	enum ib_sig_type	sq_sig_type;
   1175	enum ib_qp_type		qp_type;
   1176	u32			create_flags;
   1177
   1178	/*
   1179	 * Only needed for special QP types, or when using the RW API.
   1180	 */
   1181	u32			port_num;
   1182	struct ib_rwq_ind_table *rwq_ind_tbl;
   1183	u32			source_qpn;
   1184};
   1185
   1186struct ib_qp_open_attr {
   1187	void                  (*event_handler)(struct ib_event *, void *);
   1188	void		       *qp_context;
   1189	u32			qp_num;
   1190	enum ib_qp_type		qp_type;
   1191};
   1192
   1193enum ib_rnr_timeout {
   1194	IB_RNR_TIMER_655_36 =  0,
   1195	IB_RNR_TIMER_000_01 =  1,
   1196	IB_RNR_TIMER_000_02 =  2,
   1197	IB_RNR_TIMER_000_03 =  3,
   1198	IB_RNR_TIMER_000_04 =  4,
   1199	IB_RNR_TIMER_000_06 =  5,
   1200	IB_RNR_TIMER_000_08 =  6,
   1201	IB_RNR_TIMER_000_12 =  7,
   1202	IB_RNR_TIMER_000_16 =  8,
   1203	IB_RNR_TIMER_000_24 =  9,
   1204	IB_RNR_TIMER_000_32 = 10,
   1205	IB_RNR_TIMER_000_48 = 11,
   1206	IB_RNR_TIMER_000_64 = 12,
   1207	IB_RNR_TIMER_000_96 = 13,
   1208	IB_RNR_TIMER_001_28 = 14,
   1209	IB_RNR_TIMER_001_92 = 15,
   1210	IB_RNR_TIMER_002_56 = 16,
   1211	IB_RNR_TIMER_003_84 = 17,
   1212	IB_RNR_TIMER_005_12 = 18,
   1213	IB_RNR_TIMER_007_68 = 19,
   1214	IB_RNR_TIMER_010_24 = 20,
   1215	IB_RNR_TIMER_015_36 = 21,
   1216	IB_RNR_TIMER_020_48 = 22,
   1217	IB_RNR_TIMER_030_72 = 23,
   1218	IB_RNR_TIMER_040_96 = 24,
   1219	IB_RNR_TIMER_061_44 = 25,
   1220	IB_RNR_TIMER_081_92 = 26,
   1221	IB_RNR_TIMER_122_88 = 27,
   1222	IB_RNR_TIMER_163_84 = 28,
   1223	IB_RNR_TIMER_245_76 = 29,
   1224	IB_RNR_TIMER_327_68 = 30,
   1225	IB_RNR_TIMER_491_52 = 31
   1226};
   1227
   1228enum ib_qp_attr_mask {
   1229	IB_QP_STATE			= 1,
   1230	IB_QP_CUR_STATE			= (1<<1),
   1231	IB_QP_EN_SQD_ASYNC_NOTIFY	= (1<<2),
   1232	IB_QP_ACCESS_FLAGS		= (1<<3),
   1233	IB_QP_PKEY_INDEX		= (1<<4),
   1234	IB_QP_PORT			= (1<<5),
   1235	IB_QP_QKEY			= (1<<6),
   1236	IB_QP_AV			= (1<<7),
   1237	IB_QP_PATH_MTU			= (1<<8),
   1238	IB_QP_TIMEOUT			= (1<<9),
   1239	IB_QP_RETRY_CNT			= (1<<10),
   1240	IB_QP_RNR_RETRY			= (1<<11),
   1241	IB_QP_RQ_PSN			= (1<<12),
   1242	IB_QP_MAX_QP_RD_ATOMIC		= (1<<13),
   1243	IB_QP_ALT_PATH			= (1<<14),
   1244	IB_QP_MIN_RNR_TIMER		= (1<<15),
   1245	IB_QP_SQ_PSN			= (1<<16),
   1246	IB_QP_MAX_DEST_RD_ATOMIC	= (1<<17),
   1247	IB_QP_PATH_MIG_STATE		= (1<<18),
   1248	IB_QP_CAP			= (1<<19),
   1249	IB_QP_DEST_QPN			= (1<<20),
   1250	IB_QP_RESERVED1			= (1<<21),
   1251	IB_QP_RESERVED2			= (1<<22),
   1252	IB_QP_RESERVED3			= (1<<23),
   1253	IB_QP_RESERVED4			= (1<<24),
   1254	IB_QP_RATE_LIMIT		= (1<<25),
   1255
   1256	IB_QP_ATTR_STANDARD_BITS = GENMASK(20, 0),
   1257};
   1258
   1259enum ib_qp_state {
   1260	IB_QPS_RESET,
   1261	IB_QPS_INIT,
   1262	IB_QPS_RTR,
   1263	IB_QPS_RTS,
   1264	IB_QPS_SQD,
   1265	IB_QPS_SQE,
   1266	IB_QPS_ERR
   1267};
   1268
   1269enum ib_mig_state {
   1270	IB_MIG_MIGRATED,
   1271	IB_MIG_REARM,
   1272	IB_MIG_ARMED
   1273};
   1274
   1275enum ib_mw_type {
   1276	IB_MW_TYPE_1 = 1,
   1277	IB_MW_TYPE_2 = 2
   1278};
   1279
   1280struct ib_qp_attr {
   1281	enum ib_qp_state	qp_state;
   1282	enum ib_qp_state	cur_qp_state;
   1283	enum ib_mtu		path_mtu;
   1284	enum ib_mig_state	path_mig_state;
   1285	u32			qkey;
   1286	u32			rq_psn;
   1287	u32			sq_psn;
   1288	u32			dest_qp_num;
   1289	int			qp_access_flags;
   1290	struct ib_qp_cap	cap;
   1291	struct rdma_ah_attr	ah_attr;
   1292	struct rdma_ah_attr	alt_ah_attr;
   1293	u16			pkey_index;
   1294	u16			alt_pkey_index;
   1295	u8			en_sqd_async_notify;
   1296	u8			sq_draining;
   1297	u8			max_rd_atomic;
   1298	u8			max_dest_rd_atomic;
   1299	u8			min_rnr_timer;
   1300	u32			port_num;
   1301	u8			timeout;
   1302	u8			retry_cnt;
   1303	u8			rnr_retry;
   1304	u32			alt_port_num;
   1305	u8			alt_timeout;
   1306	u32			rate_limit;
   1307	struct net_device	*xmit_slave;
   1308};
   1309
   1310enum ib_wr_opcode {
   1311	/* These are shared with userspace */
   1312	IB_WR_RDMA_WRITE = IB_UVERBS_WR_RDMA_WRITE,
   1313	IB_WR_RDMA_WRITE_WITH_IMM = IB_UVERBS_WR_RDMA_WRITE_WITH_IMM,
   1314	IB_WR_SEND = IB_UVERBS_WR_SEND,
   1315	IB_WR_SEND_WITH_IMM = IB_UVERBS_WR_SEND_WITH_IMM,
   1316	IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ,
   1317	IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP,
   1318	IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD,
   1319	IB_WR_BIND_MW = IB_UVERBS_WR_BIND_MW,
   1320	IB_WR_LSO = IB_UVERBS_WR_TSO,
   1321	IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV,
   1322	IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV,
   1323	IB_WR_LOCAL_INV = IB_UVERBS_WR_LOCAL_INV,
   1324	IB_WR_MASKED_ATOMIC_CMP_AND_SWP =
   1325		IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP,
   1326	IB_WR_MASKED_ATOMIC_FETCH_AND_ADD =
   1327		IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD,
   1328
   1329	/* These are kernel only and can not be issued by userspace */
   1330	IB_WR_REG_MR = 0x20,
   1331	IB_WR_REG_MR_INTEGRITY,
   1332
   1333	/* reserve values for low level drivers' internal use.
   1334	 * These values will not be used at all in the ib core layer.
   1335	 */
   1336	IB_WR_RESERVED1 = 0xf0,
   1337	IB_WR_RESERVED2,
   1338	IB_WR_RESERVED3,
   1339	IB_WR_RESERVED4,
   1340	IB_WR_RESERVED5,
   1341	IB_WR_RESERVED6,
   1342	IB_WR_RESERVED7,
   1343	IB_WR_RESERVED8,
   1344	IB_WR_RESERVED9,
   1345	IB_WR_RESERVED10,
   1346};
   1347
   1348enum ib_send_flags {
   1349	IB_SEND_FENCE		= 1,
   1350	IB_SEND_SIGNALED	= (1<<1),
   1351	IB_SEND_SOLICITED	= (1<<2),
   1352	IB_SEND_INLINE		= (1<<3),
   1353	IB_SEND_IP_CSUM		= (1<<4),
   1354
   1355	/* reserve bits 26-31 for low level drivers' internal use */
   1356	IB_SEND_RESERVED_START	= (1 << 26),
   1357	IB_SEND_RESERVED_END	= (1 << 31),
   1358};
   1359
   1360struct ib_sge {
   1361	u64	addr;
   1362	u32	length;
   1363	u32	lkey;
   1364};
   1365
   1366struct ib_cqe {
   1367	void (*done)(struct ib_cq *cq, struct ib_wc *wc);
   1368};
   1369
   1370struct ib_send_wr {
   1371	struct ib_send_wr      *next;
   1372	union {
   1373		u64		wr_id;
   1374		struct ib_cqe	*wr_cqe;
   1375	};
   1376	struct ib_sge	       *sg_list;
   1377	int			num_sge;
   1378	enum ib_wr_opcode	opcode;
   1379	int			send_flags;
   1380	union {
   1381		__be32		imm_data;
   1382		u32		invalidate_rkey;
   1383	} ex;
   1384};
   1385
   1386struct ib_rdma_wr {
   1387	struct ib_send_wr	wr;
   1388	u64			remote_addr;
   1389	u32			rkey;
   1390};
   1391
   1392static inline const struct ib_rdma_wr *rdma_wr(const struct ib_send_wr *wr)
   1393{
   1394	return container_of(wr, struct ib_rdma_wr, wr);
   1395}
   1396
   1397struct ib_atomic_wr {
   1398	struct ib_send_wr	wr;
   1399	u64			remote_addr;
   1400	u64			compare_add;
   1401	u64			swap;
   1402	u64			compare_add_mask;
   1403	u64			swap_mask;
   1404	u32			rkey;
   1405};
   1406
   1407static inline const struct ib_atomic_wr *atomic_wr(const struct ib_send_wr *wr)
   1408{
   1409	return container_of(wr, struct ib_atomic_wr, wr);
   1410}
   1411
   1412struct ib_ud_wr {
   1413	struct ib_send_wr	wr;
   1414	struct ib_ah		*ah;
   1415	void			*header;
   1416	int			hlen;
   1417	int			mss;
   1418	u32			remote_qpn;
   1419	u32			remote_qkey;
   1420	u16			pkey_index; /* valid for GSI only */
   1421	u32			port_num; /* valid for DR SMPs on switch only */
   1422};
   1423
   1424static inline const struct ib_ud_wr *ud_wr(const struct ib_send_wr *wr)
   1425{
   1426	return container_of(wr, struct ib_ud_wr, wr);
   1427}
   1428
   1429struct ib_reg_wr {
   1430	struct ib_send_wr	wr;
   1431	struct ib_mr		*mr;
   1432	u32			key;
   1433	int			access;
   1434};
   1435
   1436static inline const struct ib_reg_wr *reg_wr(const struct ib_send_wr *wr)
   1437{
   1438	return container_of(wr, struct ib_reg_wr, wr);
   1439}
   1440
   1441struct ib_recv_wr {
   1442	struct ib_recv_wr      *next;
   1443	union {
   1444		u64		wr_id;
   1445		struct ib_cqe	*wr_cqe;
   1446	};
   1447	struct ib_sge	       *sg_list;
   1448	int			num_sge;
   1449};
   1450
   1451enum ib_access_flags {
   1452	IB_ACCESS_LOCAL_WRITE = IB_UVERBS_ACCESS_LOCAL_WRITE,
   1453	IB_ACCESS_REMOTE_WRITE = IB_UVERBS_ACCESS_REMOTE_WRITE,
   1454	IB_ACCESS_REMOTE_READ = IB_UVERBS_ACCESS_REMOTE_READ,
   1455	IB_ACCESS_REMOTE_ATOMIC = IB_UVERBS_ACCESS_REMOTE_ATOMIC,
   1456	IB_ACCESS_MW_BIND = IB_UVERBS_ACCESS_MW_BIND,
   1457	IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED,
   1458	IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
   1459	IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
   1460	IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING,
   1461
   1462	IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE,
   1463	IB_ACCESS_SUPPORTED =
   1464		((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL,
   1465};
   1466
   1467/*
   1468 * XXX: these are apparently used for ->rereg_user_mr, no idea why they
   1469 * are hidden here instead of a uapi header!
   1470 */
   1471enum ib_mr_rereg_flags {
   1472	IB_MR_REREG_TRANS	= 1,
   1473	IB_MR_REREG_PD		= (1<<1),
   1474	IB_MR_REREG_ACCESS	= (1<<2),
   1475	IB_MR_REREG_SUPPORTED	= ((IB_MR_REREG_ACCESS << 1) - 1)
   1476};
   1477
   1478struct ib_umem;
   1479
   1480enum rdma_remove_reason {
   1481	/*
   1482	 * Userspace requested uobject deletion or initial try
   1483	 * to remove uobject via cleanup. Call could fail
   1484	 */
   1485	RDMA_REMOVE_DESTROY,
   1486	/* Context deletion. This call should delete the actual object itself */
   1487	RDMA_REMOVE_CLOSE,
   1488	/* Driver is being hot-unplugged. This call should delete the actual object itself */
   1489	RDMA_REMOVE_DRIVER_REMOVE,
   1490	/* uobj is being cleaned-up before being committed */
   1491	RDMA_REMOVE_ABORT,
   1492	/* The driver failed to destroy the uobject and is being disconnected */
   1493	RDMA_REMOVE_DRIVER_FAILURE,
   1494};
   1495
   1496struct ib_rdmacg_object {
   1497#ifdef CONFIG_CGROUP_RDMA
   1498	struct rdma_cgroup	*cg;		/* owner rdma cgroup */
   1499#endif
   1500};
   1501
   1502struct ib_ucontext {
   1503	struct ib_device       *device;
   1504	struct ib_uverbs_file  *ufile;
   1505
   1506	struct ib_rdmacg_object	cg_obj;
   1507	/*
   1508	 * Implementation details of the RDMA core, don't use in drivers:
   1509	 */
   1510	struct rdma_restrack_entry res;
   1511	struct xarray mmap_xa;
   1512};
   1513
   1514struct ib_uobject {
   1515	u64			user_handle;	/* handle given to us by userspace */
   1516	/* ufile & ucontext owning this object */
   1517	struct ib_uverbs_file  *ufile;
   1518	/* FIXME, save memory: ufile->context == context */
   1519	struct ib_ucontext     *context;	/* associated user context */
   1520	void		       *object;		/* containing object */
   1521	struct list_head	list;		/* link to context's list */
   1522	struct ib_rdmacg_object	cg_obj;		/* rdmacg object */
   1523	int			id;		/* index into kernel idr */
   1524	struct kref		ref;
   1525	atomic_t		usecnt;		/* protects exclusive access */
   1526	struct rcu_head		rcu;		/* kfree_rcu() overhead */
   1527
   1528	const struct uverbs_api_object *uapi_object;
   1529};
   1530
   1531struct ib_udata {
   1532	const void __user *inbuf;
   1533	void __user *outbuf;
   1534	size_t       inlen;
   1535	size_t       outlen;
   1536};
   1537
   1538struct ib_pd {
   1539	u32			local_dma_lkey;
   1540	u32			flags;
   1541	struct ib_device       *device;
   1542	struct ib_uobject      *uobject;
   1543	atomic_t          	usecnt; /* count all resources */
   1544
   1545	u32			unsafe_global_rkey;
   1546
   1547	/*
   1548	 * Implementation details of the RDMA core, don't use in drivers:
   1549	 */
   1550	struct ib_mr	       *__internal_mr;
   1551	struct rdma_restrack_entry res;
   1552};
   1553
   1554struct ib_xrcd {
   1555	struct ib_device       *device;
   1556	atomic_t		usecnt; /* count all exposed resources */
   1557	struct inode	       *inode;
   1558	struct rw_semaphore	tgt_qps_rwsem;
   1559	struct xarray		tgt_qps;
   1560};
   1561
   1562struct ib_ah {
   1563	struct ib_device	*device;
   1564	struct ib_pd		*pd;
   1565	struct ib_uobject	*uobject;
   1566	const struct ib_gid_attr *sgid_attr;
   1567	enum rdma_ah_attr_type	type;
   1568};
   1569
   1570typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
   1571
   1572enum ib_poll_context {
   1573	IB_POLL_SOFTIRQ,	   /* poll from softirq context */
   1574	IB_POLL_WORKQUEUE,	   /* poll from workqueue */
   1575	IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
   1576	IB_POLL_LAST_POOL_TYPE = IB_POLL_UNBOUND_WORKQUEUE,
   1577
   1578	IB_POLL_DIRECT,		   /* caller context, no hw completions */
   1579};
   1580
   1581struct ib_cq {
   1582	struct ib_device       *device;
   1583	struct ib_ucq_object   *uobject;
   1584	ib_comp_handler   	comp_handler;
   1585	void                  (*event_handler)(struct ib_event *, void *);
   1586	void                   *cq_context;
   1587	int               	cqe;
   1588	unsigned int		cqe_used;
   1589	atomic_t          	usecnt; /* count number of work queues */
   1590	enum ib_poll_context	poll_ctx;
   1591	struct ib_wc		*wc;
   1592	struct list_head        pool_entry;
   1593	union {
   1594		struct irq_poll		iop;
   1595		struct work_struct	work;
   1596	};
   1597	struct workqueue_struct *comp_wq;
   1598	struct dim *dim;
   1599
   1600	/* updated only by trace points */
   1601	ktime_t timestamp;
   1602	u8 interrupt:1;
   1603	u8 shared:1;
   1604	unsigned int comp_vector;
   1605
   1606	/*
   1607	 * Implementation details of the RDMA core, don't use in drivers:
   1608	 */
   1609	struct rdma_restrack_entry res;
   1610};
   1611
   1612struct ib_srq {
   1613	struct ib_device       *device;
   1614	struct ib_pd	       *pd;
   1615	struct ib_usrq_object  *uobject;
   1616	void		      (*event_handler)(struct ib_event *, void *);
   1617	void		       *srq_context;
   1618	enum ib_srq_type	srq_type;
   1619	atomic_t		usecnt;
   1620
   1621	struct {
   1622		struct ib_cq   *cq;
   1623		union {
   1624			struct {
   1625				struct ib_xrcd *xrcd;
   1626				u32		srq_num;
   1627			} xrc;
   1628		};
   1629	} ext;
   1630
   1631	/*
   1632	 * Implementation details of the RDMA core, don't use in drivers:
   1633	 */
   1634	struct rdma_restrack_entry res;
   1635};
   1636
   1637enum ib_raw_packet_caps {
   1638	/*
   1639	 * Strip cvlan from incoming packet and report it in the matching work
   1640	 * completion is supported.
   1641	 */
   1642	IB_RAW_PACKET_CAP_CVLAN_STRIPPING =
   1643		IB_UVERBS_RAW_PACKET_CAP_CVLAN_STRIPPING,
   1644	/*
   1645	 * Scatter FCS field of an incoming packet to host memory is supported.
   1646	 */
   1647	IB_RAW_PACKET_CAP_SCATTER_FCS = IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS,
   1648	/* Checksum offloads are supported (for both send and receive). */
   1649	IB_RAW_PACKET_CAP_IP_CSUM = IB_UVERBS_RAW_PACKET_CAP_IP_CSUM,
   1650	/*
   1651	 * When a packet is received for an RQ with no receive WQEs, the
   1652	 * packet processing is delayed.
   1653	 */
   1654	IB_RAW_PACKET_CAP_DELAY_DROP = IB_UVERBS_RAW_PACKET_CAP_DELAY_DROP,
   1655};
   1656
   1657enum ib_wq_type {
   1658	IB_WQT_RQ = IB_UVERBS_WQT_RQ,
   1659};
   1660
   1661enum ib_wq_state {
   1662	IB_WQS_RESET,
   1663	IB_WQS_RDY,
   1664	IB_WQS_ERR
   1665};
   1666
   1667struct ib_wq {
   1668	struct ib_device       *device;
   1669	struct ib_uwq_object   *uobject;
   1670	void		    *wq_context;
   1671	void		    (*event_handler)(struct ib_event *, void *);
   1672	struct ib_pd	       *pd;
   1673	struct ib_cq	       *cq;
   1674	u32		wq_num;
   1675	enum ib_wq_state       state;
   1676	enum ib_wq_type	wq_type;
   1677	atomic_t		usecnt;
   1678};
   1679
   1680enum ib_wq_flags {
   1681	IB_WQ_FLAGS_CVLAN_STRIPPING	= IB_UVERBS_WQ_FLAGS_CVLAN_STRIPPING,
   1682	IB_WQ_FLAGS_SCATTER_FCS		= IB_UVERBS_WQ_FLAGS_SCATTER_FCS,
   1683	IB_WQ_FLAGS_DELAY_DROP		= IB_UVERBS_WQ_FLAGS_DELAY_DROP,
   1684	IB_WQ_FLAGS_PCI_WRITE_END_PADDING =
   1685				IB_UVERBS_WQ_FLAGS_PCI_WRITE_END_PADDING,
   1686};
   1687
   1688struct ib_wq_init_attr {
   1689	void		       *wq_context;
   1690	enum ib_wq_type	wq_type;
   1691	u32		max_wr;
   1692	u32		max_sge;
   1693	struct	ib_cq	       *cq;
   1694	void		    (*event_handler)(struct ib_event *, void *);
   1695	u32		create_flags; /* Use enum ib_wq_flags */
   1696};
   1697
   1698enum ib_wq_attr_mask {
   1699	IB_WQ_STATE		= 1 << 0,
   1700	IB_WQ_CUR_STATE		= 1 << 1,
   1701	IB_WQ_FLAGS		= 1 << 2,
   1702};
   1703
   1704struct ib_wq_attr {
   1705	enum	ib_wq_state	wq_state;
   1706	enum	ib_wq_state	curr_wq_state;
   1707	u32			flags; /* Use enum ib_wq_flags */
   1708	u32			flags_mask; /* Use enum ib_wq_flags */
   1709};
   1710
   1711struct ib_rwq_ind_table {
   1712	struct ib_device	*device;
   1713	struct ib_uobject      *uobject;
   1714	atomic_t		usecnt;
   1715	u32		ind_tbl_num;
   1716	u32		log_ind_tbl_size;
   1717	struct ib_wq	**ind_tbl;
   1718};
   1719
   1720struct ib_rwq_ind_table_init_attr {
   1721	u32		log_ind_tbl_size;
   1722	/* Each entry is a pointer to Receive Work Queue */
   1723	struct ib_wq	**ind_tbl;
   1724};
   1725
   1726enum port_pkey_state {
   1727	IB_PORT_PKEY_NOT_VALID = 0,
   1728	IB_PORT_PKEY_VALID = 1,
   1729	IB_PORT_PKEY_LISTED = 2,
   1730};
   1731
   1732struct ib_qp_security;
   1733
   1734struct ib_port_pkey {
   1735	enum port_pkey_state	state;
   1736	u16			pkey_index;
   1737	u32			port_num;
   1738	struct list_head	qp_list;
   1739	struct list_head	to_error_list;
   1740	struct ib_qp_security  *sec;
   1741};
   1742
   1743struct ib_ports_pkeys {
   1744	struct ib_port_pkey	main;
   1745	struct ib_port_pkey	alt;
   1746};
   1747
   1748struct ib_qp_security {
   1749	struct ib_qp	       *qp;
   1750	struct ib_device       *dev;
   1751	/* Hold this mutex when changing port and pkey settings. */
   1752	struct mutex		mutex;
   1753	struct ib_ports_pkeys  *ports_pkeys;
   1754	/* A list of all open shared QP handles.  Required to enforce security
   1755	 * properly for all users of a shared QP.
   1756	 */
   1757	struct list_head        shared_qp_list;
   1758	void                   *security;
   1759	bool			destroying;
   1760	atomic_t		error_list_count;
   1761	struct completion	error_complete;
   1762	int			error_comps_pending;
   1763};
   1764
   1765/*
   1766 * @max_write_sge: Maximum SGE elements per RDMA WRITE request.
   1767 * @max_read_sge:  Maximum SGE elements per RDMA READ request.
   1768 */
   1769struct ib_qp {
   1770	struct ib_device       *device;
   1771	struct ib_pd	       *pd;
   1772	struct ib_cq	       *send_cq;
   1773	struct ib_cq	       *recv_cq;
   1774	spinlock_t		mr_lock;
   1775	int			mrs_used;
   1776	struct list_head	rdma_mrs;
   1777	struct list_head	sig_mrs;
   1778	struct ib_srq	       *srq;
   1779	struct ib_xrcd	       *xrcd; /* XRC TGT QPs only */
   1780	struct list_head	xrcd_list;
   1781
   1782	/* count times opened, mcast attaches, flow attaches */
   1783	atomic_t		usecnt;
   1784	struct list_head	open_list;
   1785	struct ib_qp           *real_qp;
   1786	struct ib_uqp_object   *uobject;
   1787	void                  (*event_handler)(struct ib_event *, void *);
   1788	void		       *qp_context;
   1789	/* sgid_attrs associated with the AV's */
   1790	const struct ib_gid_attr *av_sgid_attr;
   1791	const struct ib_gid_attr *alt_path_sgid_attr;
   1792	u32			qp_num;
   1793	u32			max_write_sge;
   1794	u32			max_read_sge;
   1795	enum ib_qp_type		qp_type;
   1796	struct ib_rwq_ind_table *rwq_ind_tbl;
   1797	struct ib_qp_security  *qp_sec;
   1798	u32			port;
   1799
   1800	bool			integrity_en;
   1801	/*
   1802	 * Implementation details of the RDMA core, don't use in drivers:
   1803	 */
   1804	struct rdma_restrack_entry     res;
   1805
   1806	/* The counter the qp is bind to */
   1807	struct rdma_counter    *counter;
   1808};
   1809
   1810struct ib_dm {
   1811	struct ib_device  *device;
   1812	u32		   length;
   1813	u32		   flags;
   1814	struct ib_uobject *uobject;
   1815	atomic_t	   usecnt;
   1816};
   1817
   1818struct ib_mr {
   1819	struct ib_device  *device;
   1820	struct ib_pd	  *pd;
   1821	u32		   lkey;
   1822	u32		   rkey;
   1823	u64		   iova;
   1824	u64		   length;
   1825	unsigned int	   page_size;
   1826	enum ib_mr_type	   type;
   1827	bool		   need_inval;
   1828	union {
   1829		struct ib_uobject	*uobject;	/* user */
   1830		struct list_head	qp_entry;	/* FR */
   1831	};
   1832
   1833	struct ib_dm      *dm;
   1834	struct ib_sig_attrs *sig_attrs; /* only for IB_MR_TYPE_INTEGRITY MRs */
   1835	/*
   1836	 * Implementation details of the RDMA core, don't use in drivers:
   1837	 */
   1838	struct rdma_restrack_entry res;
   1839};
   1840
   1841struct ib_mw {
   1842	struct ib_device	*device;
   1843	struct ib_pd		*pd;
   1844	struct ib_uobject	*uobject;
   1845	u32			rkey;
   1846	enum ib_mw_type         type;
   1847};
   1848
   1849/* Supported steering options */
   1850enum ib_flow_attr_type {
   1851	/* steering according to rule specifications */
   1852	IB_FLOW_ATTR_NORMAL		= 0x0,
   1853	/* default unicast and multicast rule -
   1854	 * receive all Eth traffic which isn't steered to any QP
   1855	 */
   1856	IB_FLOW_ATTR_ALL_DEFAULT	= 0x1,
   1857	/* default multicast rule -
   1858	 * receive all Eth multicast traffic which isn't steered to any QP
   1859	 */
   1860	IB_FLOW_ATTR_MC_DEFAULT		= 0x2,
   1861	/* sniffer rule - receive all port traffic */
   1862	IB_FLOW_ATTR_SNIFFER		= 0x3
   1863};
   1864
   1865/* Supported steering header types */
   1866enum ib_flow_spec_type {
   1867	/* L2 headers*/
   1868	IB_FLOW_SPEC_ETH		= 0x20,
   1869	IB_FLOW_SPEC_IB			= 0x22,
   1870	/* L3 header*/
   1871	IB_FLOW_SPEC_IPV4		= 0x30,
   1872	IB_FLOW_SPEC_IPV6		= 0x31,
   1873	IB_FLOW_SPEC_ESP                = 0x34,
   1874	/* L4 headers*/
   1875	IB_FLOW_SPEC_TCP		= 0x40,
   1876	IB_FLOW_SPEC_UDP		= 0x41,
   1877	IB_FLOW_SPEC_VXLAN_TUNNEL	= 0x50,
   1878	IB_FLOW_SPEC_GRE		= 0x51,
   1879	IB_FLOW_SPEC_MPLS		= 0x60,
   1880	IB_FLOW_SPEC_INNER		= 0x100,
   1881	/* Actions */
   1882	IB_FLOW_SPEC_ACTION_TAG         = 0x1000,
   1883	IB_FLOW_SPEC_ACTION_DROP        = 0x1001,
   1884	IB_FLOW_SPEC_ACTION_HANDLE	= 0x1002,
   1885	IB_FLOW_SPEC_ACTION_COUNT       = 0x1003,
   1886};
   1887#define IB_FLOW_SPEC_LAYER_MASK	0xF0
   1888#define IB_FLOW_SPEC_SUPPORT_LAYERS 10
   1889
   1890enum ib_flow_flags {
   1891	IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */
   1892	IB_FLOW_ATTR_FLAGS_EGRESS = 1UL << 2, /* Egress flow */
   1893	IB_FLOW_ATTR_FLAGS_RESERVED  = 1UL << 3  /* Must be last */
   1894};
   1895
   1896struct ib_flow_eth_filter {
   1897	u8	dst_mac[6];
   1898	u8	src_mac[6];
   1899	__be16	ether_type;
   1900	__be16	vlan_tag;
   1901	/* Must be last */
   1902	u8	real_sz[];
   1903};
   1904
   1905struct ib_flow_spec_eth {
   1906	u32			  type;
   1907	u16			  size;
   1908	struct ib_flow_eth_filter val;
   1909	struct ib_flow_eth_filter mask;
   1910};
   1911
   1912struct ib_flow_ib_filter {
   1913	__be16 dlid;
   1914	__u8   sl;
   1915	/* Must be last */
   1916	u8	real_sz[];
   1917};
   1918
   1919struct ib_flow_spec_ib {
   1920	u32			 type;
   1921	u16			 size;
   1922	struct ib_flow_ib_filter val;
   1923	struct ib_flow_ib_filter mask;
   1924};
   1925
   1926/* IPv4 header flags */
   1927enum ib_ipv4_flags {
   1928	IB_IPV4_DONT_FRAG = 0x2, /* Don't enable packet fragmentation */
   1929	IB_IPV4_MORE_FRAG = 0X4  /* For All fragmented packets except the
   1930				    last have this flag set */
   1931};
   1932
   1933struct ib_flow_ipv4_filter {
   1934	__be32	src_ip;
   1935	__be32	dst_ip;
   1936	u8	proto;
   1937	u8	tos;
   1938	u8	ttl;
   1939	u8	flags;
   1940	/* Must be last */
   1941	u8	real_sz[];
   1942};
   1943
   1944struct ib_flow_spec_ipv4 {
   1945	u32			   type;
   1946	u16			   size;
   1947	struct ib_flow_ipv4_filter val;
   1948	struct ib_flow_ipv4_filter mask;
   1949};
   1950
   1951struct ib_flow_ipv6_filter {
   1952	u8	src_ip[16];
   1953	u8	dst_ip[16];
   1954	__be32	flow_label;
   1955	u8	next_hdr;
   1956	u8	traffic_class;
   1957	u8	hop_limit;
   1958	/* Must be last */
   1959	u8	real_sz[];
   1960};
   1961
   1962struct ib_flow_spec_ipv6 {
   1963	u32			   type;
   1964	u16			   size;
   1965	struct ib_flow_ipv6_filter val;
   1966	struct ib_flow_ipv6_filter mask;
   1967};
   1968
   1969struct ib_flow_tcp_udp_filter {
   1970	__be16	dst_port;
   1971	__be16	src_port;
   1972	/* Must be last */
   1973	u8	real_sz[];
   1974};
   1975
   1976struct ib_flow_spec_tcp_udp {
   1977	u32			      type;
   1978	u16			      size;
   1979	struct ib_flow_tcp_udp_filter val;
   1980	struct ib_flow_tcp_udp_filter mask;
   1981};
   1982
   1983struct ib_flow_tunnel_filter {
   1984	__be32	tunnel_id;
   1985	u8	real_sz[];
   1986};
   1987
   1988/* ib_flow_spec_tunnel describes the Vxlan tunnel
   1989 * the tunnel_id from val has the vni value
   1990 */
   1991struct ib_flow_spec_tunnel {
   1992	u32			      type;
   1993	u16			      size;
   1994	struct ib_flow_tunnel_filter  val;
   1995	struct ib_flow_tunnel_filter  mask;
   1996};
   1997
   1998struct ib_flow_esp_filter {
   1999	__be32	spi;
   2000	__be32  seq;
   2001	/* Must be last */
   2002	u8	real_sz[];
   2003};
   2004
   2005struct ib_flow_spec_esp {
   2006	u32                           type;
   2007	u16			      size;
   2008	struct ib_flow_esp_filter     val;
   2009	struct ib_flow_esp_filter     mask;
   2010};
   2011
   2012struct ib_flow_gre_filter {
   2013	__be16 c_ks_res0_ver;
   2014	__be16 protocol;
   2015	__be32 key;
   2016	/* Must be last */
   2017	u8	real_sz[];
   2018};
   2019
   2020struct ib_flow_spec_gre {
   2021	u32                           type;
   2022	u16			      size;
   2023	struct ib_flow_gre_filter     val;
   2024	struct ib_flow_gre_filter     mask;
   2025};
   2026
   2027struct ib_flow_mpls_filter {
   2028	__be32 tag;
   2029	/* Must be last */
   2030	u8	real_sz[];
   2031};
   2032
   2033struct ib_flow_spec_mpls {
   2034	u32                           type;
   2035	u16			      size;
   2036	struct ib_flow_mpls_filter     val;
   2037	struct ib_flow_mpls_filter     mask;
   2038};
   2039
   2040struct ib_flow_spec_action_tag {
   2041	enum ib_flow_spec_type	      type;
   2042	u16			      size;
   2043	u32                           tag_id;
   2044};
   2045
   2046struct ib_flow_spec_action_drop {
   2047	enum ib_flow_spec_type	      type;
   2048	u16			      size;
   2049};
   2050
   2051struct ib_flow_spec_action_handle {
   2052	enum ib_flow_spec_type	      type;
   2053	u16			      size;
   2054	struct ib_flow_action	     *act;
   2055};
   2056
   2057enum ib_counters_description {
   2058	IB_COUNTER_PACKETS,
   2059	IB_COUNTER_BYTES,
   2060};
   2061
   2062struct ib_flow_spec_action_count {
   2063	enum ib_flow_spec_type type;
   2064	u16 size;
   2065	struct ib_counters *counters;
   2066};
   2067
   2068union ib_flow_spec {
   2069	struct {
   2070		u32			type;
   2071		u16			size;
   2072	};
   2073	struct ib_flow_spec_eth		eth;
   2074	struct ib_flow_spec_ib		ib;
   2075	struct ib_flow_spec_ipv4        ipv4;
   2076	struct ib_flow_spec_tcp_udp	tcp_udp;
   2077	struct ib_flow_spec_ipv6        ipv6;
   2078	struct ib_flow_spec_tunnel      tunnel;
   2079	struct ib_flow_spec_esp		esp;
   2080	struct ib_flow_spec_gre		gre;
   2081	struct ib_flow_spec_mpls	mpls;
   2082	struct ib_flow_spec_action_tag  flow_tag;
   2083	struct ib_flow_spec_action_drop drop;
   2084	struct ib_flow_spec_action_handle action;
   2085	struct ib_flow_spec_action_count flow_count;
   2086};
   2087
   2088struct ib_flow_attr {
   2089	enum ib_flow_attr_type type;
   2090	u16	     size;
   2091	u16	     priority;
   2092	u32	     flags;
   2093	u8	     num_of_specs;
   2094	u32	     port;
   2095	union ib_flow_spec flows[];
   2096};
   2097
   2098struct ib_flow {
   2099	struct ib_qp		*qp;
   2100	struct ib_device	*device;
   2101	struct ib_uobject	*uobject;
   2102};
   2103
   2104enum ib_flow_action_type {
   2105	IB_FLOW_ACTION_UNSPECIFIED,
   2106	IB_FLOW_ACTION_ESP = 1,
   2107};
   2108
   2109struct ib_flow_action_attrs_esp_keymats {
   2110	enum ib_uverbs_flow_action_esp_keymat			protocol;
   2111	union {
   2112		struct ib_uverbs_flow_action_esp_keymat_aes_gcm aes_gcm;
   2113	} keymat;
   2114};
   2115
   2116struct ib_flow_action_attrs_esp_replays {
   2117	enum ib_uverbs_flow_action_esp_replay			protocol;
   2118	union {
   2119		struct ib_uverbs_flow_action_esp_replay_bmp	bmp;
   2120	} replay;
   2121};
   2122
   2123enum ib_flow_action_attrs_esp_flags {
   2124	/* All user-space flags at the top: Use enum ib_uverbs_flow_action_esp_flags
   2125	 * This is done in order to share the same flags between user-space and
   2126	 * kernel and spare an unnecessary translation.
   2127	 */
   2128
   2129	/* Kernel flags */
   2130	IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED	= 1ULL << 32,
   2131	IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS	= 1ULL << 33,
   2132};
   2133
   2134struct ib_flow_spec_list {
   2135	struct ib_flow_spec_list	*next;
   2136	union ib_flow_spec		spec;
   2137};
   2138
   2139struct ib_flow_action_attrs_esp {
   2140	struct ib_flow_action_attrs_esp_keymats		*keymat;
   2141	struct ib_flow_action_attrs_esp_replays		*replay;
   2142	struct ib_flow_spec_list			*encap;
   2143	/* Used only if IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED is enabled.
   2144	 * Value of 0 is a valid value.
   2145	 */
   2146	u32						esn;
   2147	u32						spi;
   2148	u32						seq;
   2149	u32						tfc_pad;
   2150	/* Use enum ib_flow_action_attrs_esp_flags */
   2151	u64						flags;
   2152	u64						hard_limit_pkts;
   2153};
   2154
   2155struct ib_flow_action {
   2156	struct ib_device		*device;
   2157	struct ib_uobject		*uobject;
   2158	enum ib_flow_action_type	type;
   2159	atomic_t			usecnt;
   2160};
   2161
   2162struct ib_mad;
   2163
   2164enum ib_process_mad_flags {
   2165	IB_MAD_IGNORE_MKEY	= 1,
   2166	IB_MAD_IGNORE_BKEY	= 2,
   2167	IB_MAD_IGNORE_ALL	= IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY
   2168};
   2169
   2170enum ib_mad_result {
   2171	IB_MAD_RESULT_FAILURE  = 0,      /* (!SUCCESS is the important flag) */
   2172	IB_MAD_RESULT_SUCCESS  = 1 << 0, /* MAD was successfully processed   */
   2173	IB_MAD_RESULT_REPLY    = 1 << 1, /* Reply packet needs to be sent    */
   2174	IB_MAD_RESULT_CONSUMED = 1 << 2  /* Packet consumed: stop processing */
   2175};
   2176
   2177struct ib_port_cache {
   2178	u64		      subnet_prefix;
   2179	struct ib_pkey_cache  *pkey;
   2180	struct ib_gid_table   *gid;
   2181	u8                     lmc;
   2182	enum ib_port_state     port_state;
   2183};
   2184
   2185struct ib_port_immutable {
   2186	int                           pkey_tbl_len;
   2187	int                           gid_tbl_len;
   2188	u32                           core_cap_flags;
   2189	u32                           max_mad_size;
   2190};
   2191
   2192struct ib_port_data {
   2193	struct ib_device *ib_dev;
   2194
   2195	struct ib_port_immutable immutable;
   2196
   2197	spinlock_t pkey_list_lock;
   2198
   2199	spinlock_t netdev_lock;
   2200
   2201	struct list_head pkey_list;
   2202
   2203	struct ib_port_cache cache;
   2204
   2205	struct net_device __rcu *netdev;
   2206	struct hlist_node ndev_hash_link;
   2207	struct rdma_port_counter port_counter;
   2208	struct ib_port *sysfs;
   2209};
   2210
   2211/* rdma netdev type - specifies protocol type */
   2212enum rdma_netdev_t {
   2213	RDMA_NETDEV_OPA_VNIC,
   2214	RDMA_NETDEV_IPOIB,
   2215};
   2216
   2217/**
   2218 * struct rdma_netdev - rdma netdev
   2219 * For cases where netstack interfacing is required.
   2220 */
   2221struct rdma_netdev {
   2222	void              *clnt_priv;
   2223	struct ib_device  *hca;
   2224	u32		   port_num;
   2225	int                mtu;
   2226
   2227	/*
   2228	 * cleanup function must be specified.
   2229	 * FIXME: This is only used for OPA_VNIC and that usage should be
   2230	 * removed too.
   2231	 */
   2232	void (*free_rdma_netdev)(struct net_device *netdev);
   2233
   2234	/* control functions */
   2235	void (*set_id)(struct net_device *netdev, int id);
   2236	/* send packet */
   2237	int (*send)(struct net_device *dev, struct sk_buff *skb,
   2238		    struct ib_ah *address, u32 dqpn);
   2239	/* multicast */
   2240	int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
   2241			    union ib_gid *gid, u16 mlid,
   2242			    int set_qkey, u32 qkey);
   2243	int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
   2244			    union ib_gid *gid, u16 mlid);
   2245	/* timeout */
   2246	void (*tx_timeout)(struct net_device *dev, unsigned int txqueue);
   2247};
   2248
   2249struct rdma_netdev_alloc_params {
   2250	size_t sizeof_priv;
   2251	unsigned int txqs;
   2252	unsigned int rxqs;
   2253	void *param;
   2254
   2255	int (*initialize_rdma_netdev)(struct ib_device *device, u32 port_num,
   2256				      struct net_device *netdev, void *param);
   2257};
   2258
   2259struct ib_odp_counters {
   2260	atomic64_t faults;
   2261	atomic64_t invalidations;
   2262	atomic64_t prefetch;
   2263};
   2264
   2265struct ib_counters {
   2266	struct ib_device	*device;
   2267	struct ib_uobject	*uobject;
   2268	/* num of objects attached */
   2269	atomic_t	usecnt;
   2270};
   2271
   2272struct ib_counters_read_attr {
   2273	u64	*counters_buff;
   2274	u32	ncounters;
   2275	u32	flags; /* use enum ib_read_counters_flags */
   2276};
   2277
   2278struct uverbs_attr_bundle;
   2279struct iw_cm_id;
   2280struct iw_cm_conn_param;
   2281
   2282#define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member)                      \
   2283	.size_##ib_struct =                                                    \
   2284		(sizeof(struct drv_struct) +                                   \
   2285		 BUILD_BUG_ON_ZERO(offsetof(struct drv_struct, member)) +      \
   2286		 BUILD_BUG_ON_ZERO(                                            \
   2287			 !__same_type(((struct drv_struct *)NULL)->member,     \
   2288				      struct ib_struct)))
   2289
   2290#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp)                          \
   2291	((struct ib_type *)rdma_zalloc_obj(ib_dev, ib_dev->ops.size_##ib_type, \
   2292					   gfp, false))
   2293
   2294#define rdma_zalloc_drv_obj_numa(ib_dev, ib_type)                              \
   2295	((struct ib_type *)rdma_zalloc_obj(ib_dev, ib_dev->ops.size_##ib_type, \
   2296					   GFP_KERNEL, true))
   2297
   2298#define rdma_zalloc_drv_obj(ib_dev, ib_type)                                   \
   2299	rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL)
   2300
   2301#define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct
   2302
   2303struct rdma_user_mmap_entry {
   2304	struct kref ref;
   2305	struct ib_ucontext *ucontext;
   2306	unsigned long start_pgoff;
   2307	size_t npages;
   2308	bool driver_removed;
   2309};
   2310
   2311/* Return the offset (in bytes) the user should pass to libc's mmap() */
   2312static inline u64
   2313rdma_user_mmap_get_offset(const struct rdma_user_mmap_entry *entry)
   2314{
   2315	return (u64)entry->start_pgoff << PAGE_SHIFT;
   2316}
   2317
   2318/**
   2319 * struct ib_device_ops - InfiniBand device operations
   2320 * This structure defines all the InfiniBand device operations, providers will
   2321 * need to define the supported operations, otherwise they will be set to null.
   2322 */
   2323struct ib_device_ops {
   2324	struct module *owner;
   2325	enum rdma_driver_id driver_id;
   2326	u32 uverbs_abi_ver;
   2327	unsigned int uverbs_no_driver_id_binding:1;
   2328
   2329	/*
   2330	 * NOTE: New drivers should not make use of device_group; instead new
   2331	 * device parameter should be exposed via netlink command. This
   2332	 * mechanism exists only for existing drivers.
   2333	 */
   2334	const struct attribute_group *device_group;
   2335	const struct attribute_group **port_groups;
   2336
   2337	int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr,
   2338			 const struct ib_send_wr **bad_send_wr);
   2339	int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
   2340			 const struct ib_recv_wr **bad_recv_wr);
   2341	void (*drain_rq)(struct ib_qp *qp);
   2342	void (*drain_sq)(struct ib_qp *qp);
   2343	int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
   2344	int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
   2345	int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags);
   2346	int (*post_srq_recv)(struct ib_srq *srq,
   2347			     const struct ib_recv_wr *recv_wr,
   2348			     const struct ib_recv_wr **bad_recv_wr);
   2349	int (*process_mad)(struct ib_device *device, int process_mad_flags,
   2350			   u32 port_num, const struct ib_wc *in_wc,
   2351			   const struct ib_grh *in_grh,
   2352			   const struct ib_mad *in_mad, struct ib_mad *out_mad,
   2353			   size_t *out_mad_size, u16 *out_mad_pkey_index);
   2354	int (*query_device)(struct ib_device *device,
   2355			    struct ib_device_attr *device_attr,
   2356			    struct ib_udata *udata);
   2357	int (*modify_device)(struct ib_device *device, int device_modify_mask,
   2358			     struct ib_device_modify *device_modify);
   2359	void (*get_dev_fw_str)(struct ib_device *device, char *str);
   2360	const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
   2361						     int comp_vector);
   2362	int (*query_port)(struct ib_device *device, u32 port_num,
   2363			  struct ib_port_attr *port_attr);
   2364	int (*modify_port)(struct ib_device *device, u32 port_num,
   2365			   int port_modify_mask,
   2366			   struct ib_port_modify *port_modify);
   2367	/**
   2368	 * The following mandatory functions are used only at device
   2369	 * registration.  Keep functions such as these at the end of this
   2370	 * structure to avoid cache line misses when accessing struct ib_device
   2371	 * in fast paths.
   2372	 */
   2373	int (*get_port_immutable)(struct ib_device *device, u32 port_num,
   2374				  struct ib_port_immutable *immutable);
   2375	enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
   2376					       u32 port_num);
   2377	/**
   2378	 * When calling get_netdev, the HW vendor's driver should return the
   2379	 * net device of device @device at port @port_num or NULL if such
   2380	 * a net device doesn't exist. The vendor driver should call dev_hold
   2381	 * on this net device. The HW vendor's device driver must guarantee
   2382	 * that this function returns NULL before the net device has finished
   2383	 * NETDEV_UNREGISTER state.
   2384	 */
   2385	struct net_device *(*get_netdev)(struct ib_device *device,
   2386					 u32 port_num);
   2387	/**
   2388	 * rdma netdev operation
   2389	 *
   2390	 * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params
   2391	 * must return -EOPNOTSUPP if it doesn't support the specified type.
   2392	 */
   2393	struct net_device *(*alloc_rdma_netdev)(
   2394		struct ib_device *device, u32 port_num, enum rdma_netdev_t type,
   2395		const char *name, unsigned char name_assign_type,
   2396		void (*setup)(struct net_device *));
   2397
   2398	int (*rdma_netdev_get_params)(struct ib_device *device, u32 port_num,
   2399				      enum rdma_netdev_t type,
   2400				      struct rdma_netdev_alloc_params *params);
   2401	/**
   2402	 * query_gid should be return GID value for @device, when @port_num
   2403	 * link layer is either IB or iWarp. It is no-op if @port_num port
   2404	 * is RoCE link layer.
   2405	 */
   2406	int (*query_gid)(struct ib_device *device, u32 port_num, int index,
   2407			 union ib_gid *gid);
   2408	/**
   2409	 * When calling add_gid, the HW vendor's driver should add the gid
   2410	 * of device of port at gid index available at @attr. Meta-info of
   2411	 * that gid (for example, the network device related to this gid) is
   2412	 * available at @attr. @context allows the HW vendor driver to store
   2413	 * extra information together with a GID entry. The HW vendor driver may
   2414	 * allocate memory to contain this information and store it in @context
   2415	 * when a new GID entry is written to. Params are consistent until the
   2416	 * next call of add_gid or delete_gid. The function should return 0 on
   2417	 * success or error otherwise. The function could be called
   2418	 * concurrently for different ports. This function is only called when
   2419	 * roce_gid_table is used.
   2420	 */
   2421	int (*add_gid)(const struct ib_gid_attr *attr, void **context);
   2422	/**
   2423	 * When calling del_gid, the HW vendor's driver should delete the
   2424	 * gid of device @device at gid index gid_index of port port_num
   2425	 * available in @attr.
   2426	 * Upon the deletion of a GID entry, the HW vendor must free any
   2427	 * allocated memory. The caller will clear @context afterwards.
   2428	 * This function is only called when roce_gid_table is used.
   2429	 */
   2430	int (*del_gid)(const struct ib_gid_attr *attr, void **context);
   2431	int (*query_pkey)(struct ib_device *device, u32 port_num, u16 index,
   2432			  u16 *pkey);
   2433	int (*alloc_ucontext)(struct ib_ucontext *context,
   2434			      struct ib_udata *udata);
   2435	void (*dealloc_ucontext)(struct ib_ucontext *context);
   2436	int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
   2437	/**
   2438	 * This will be called once refcount of an entry in mmap_xa reaches
   2439	 * zero. The type of the memory that was mapped may differ between
   2440	 * entries and is opaque to the rdma_user_mmap interface.
   2441	 * Therefore needs to be implemented by the driver in mmap_free.
   2442	 */
   2443	void (*mmap_free)(struct rdma_user_mmap_entry *entry);
   2444	void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
   2445	int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
   2446	int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
   2447	int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
   2448			 struct ib_udata *udata);
   2449	int (*create_user_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
   2450			      struct ib_udata *udata);
   2451	int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
   2452	int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
   2453	int (*destroy_ah)(struct ib_ah *ah, u32 flags);
   2454	int (*create_srq)(struct ib_srq *srq,
   2455			  struct ib_srq_init_attr *srq_init_attr,
   2456			  struct ib_udata *udata);
   2457	int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
   2458			  enum ib_srq_attr_mask srq_attr_mask,
   2459			  struct ib_udata *udata);
   2460	int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
   2461	int (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
   2462	int (*create_qp)(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr,
   2463			 struct ib_udata *udata);
   2464	int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
   2465			 int qp_attr_mask, struct ib_udata *udata);
   2466	int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
   2467			int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
   2468	int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata);
   2469	int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr,
   2470			 struct ib_udata *udata);
   2471	int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
   2472	int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
   2473	int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
   2474	struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
   2475	struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
   2476				     u64 virt_addr, int mr_access_flags,
   2477				     struct ib_udata *udata);
   2478	struct ib_mr *(*reg_user_mr_dmabuf)(struct ib_pd *pd, u64 offset,
   2479					    u64 length, u64 virt_addr, int fd,
   2480					    int mr_access_flags,
   2481					    struct ib_udata *udata);
   2482	struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start,
   2483				       u64 length, u64 virt_addr,
   2484				       int mr_access_flags, struct ib_pd *pd,
   2485				       struct ib_udata *udata);
   2486	int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata);
   2487	struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
   2488				  u32 max_num_sg);
   2489	struct ib_mr *(*alloc_mr_integrity)(struct ib_pd *pd,
   2490					    u32 max_num_data_sg,
   2491					    u32 max_num_meta_sg);
   2492	int (*advise_mr)(struct ib_pd *pd,
   2493			 enum ib_uverbs_advise_mr_advice advice, u32 flags,
   2494			 struct ib_sge *sg_list, u32 num_sge,
   2495			 struct uverbs_attr_bundle *attrs);
   2496
   2497	/*
   2498	 * Kernel users should universally support relaxed ordering (RO), as
   2499	 * they are designed to read data only after observing the CQE and use
   2500	 * the DMA API correctly.
   2501	 *
   2502	 * Some drivers implicitly enable RO if platform supports it.
   2503	 */
   2504	int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
   2505			 unsigned int *sg_offset);
   2506	int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
   2507			       struct ib_mr_status *mr_status);
   2508	int (*alloc_mw)(struct ib_mw *mw, struct ib_udata *udata);
   2509	int (*dealloc_mw)(struct ib_mw *mw);
   2510	int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
   2511	int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
   2512	int (*alloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
   2513	int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
   2514	struct ib_flow *(*create_flow)(struct ib_qp *qp,
   2515				       struct ib_flow_attr *flow_attr,
   2516				       struct ib_udata *udata);
   2517	int (*destroy_flow)(struct ib_flow *flow_id);
   2518	int (*destroy_flow_action)(struct ib_flow_action *action);
   2519	int (*set_vf_link_state)(struct ib_device *device, int vf, u32 port,
   2520				 int state);
   2521	int (*get_vf_config)(struct ib_device *device, int vf, u32 port,
   2522			     struct ifla_vf_info *ivf);
   2523	int (*get_vf_stats)(struct ib_device *device, int vf, u32 port,
   2524			    struct ifla_vf_stats *stats);
   2525	int (*get_vf_guid)(struct ib_device *device, int vf, u32 port,
   2526			    struct ifla_vf_guid *node_guid,
   2527			    struct ifla_vf_guid *port_guid);
   2528	int (*set_vf_guid)(struct ib_device *device, int vf, u32 port, u64 guid,
   2529			   int type);
   2530	struct ib_wq *(*create_wq)(struct ib_pd *pd,
   2531				   struct ib_wq_init_attr *init_attr,
   2532				   struct ib_udata *udata);
   2533	int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
   2534	int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
   2535			 u32 wq_attr_mask, struct ib_udata *udata);
   2536	int (*create_rwq_ind_table)(struct ib_rwq_ind_table *ib_rwq_ind_table,
   2537				    struct ib_rwq_ind_table_init_attr *init_attr,
   2538				    struct ib_udata *udata);
   2539	int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
   2540	struct ib_dm *(*alloc_dm)(struct ib_device *device,
   2541				  struct ib_ucontext *context,
   2542				  struct ib_dm_alloc_attr *attr,
   2543				  struct uverbs_attr_bundle *attrs);
   2544	int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs);
   2545	struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
   2546				   struct ib_dm_mr_attr *attr,
   2547				   struct uverbs_attr_bundle *attrs);
   2548	int (*create_counters)(struct ib_counters *counters,
   2549			       struct uverbs_attr_bundle *attrs);
   2550	int (*destroy_counters)(struct ib_counters *counters);
   2551	int (*read_counters)(struct ib_counters *counters,
   2552			     struct ib_counters_read_attr *counters_read_attr,
   2553			     struct uverbs_attr_bundle *attrs);
   2554	int (*map_mr_sg_pi)(struct ib_mr *mr, struct scatterlist *data_sg,
   2555			    int data_sg_nents, unsigned int *data_sg_offset,
   2556			    struct scatterlist *meta_sg, int meta_sg_nents,
   2557			    unsigned int *meta_sg_offset);
   2558
   2559	/**
   2560	 * alloc_hw_[device,port]_stats - Allocate a struct rdma_hw_stats and
   2561	 *   fill in the driver initialized data.  The struct is kfree()'ed by
   2562	 *   the sysfs core when the device is removed.  A lifespan of -1 in the
   2563	 *   return struct tells the core to set a default lifespan.
   2564	 */
   2565	struct rdma_hw_stats *(*alloc_hw_device_stats)(struct ib_device *device);
   2566	struct rdma_hw_stats *(*alloc_hw_port_stats)(struct ib_device *device,
   2567						     u32 port_num);
   2568	/**
   2569	 * get_hw_stats - Fill in the counter value(s) in the stats struct.
   2570	 * @index - The index in the value array we wish to have updated, or
   2571	 *   num_counters if we want all stats updated
   2572	 * Return codes -
   2573	 *   < 0 - Error, no counters updated
   2574	 *   index - Updated the single counter pointed to by index
   2575	 *   num_counters - Updated all counters (will reset the timestamp
   2576	 *     and prevent further calls for lifespan milliseconds)
   2577	 * Drivers are allowed to update all counters in leiu of just the
   2578	 *   one given in index at their option
   2579	 */
   2580	int (*get_hw_stats)(struct ib_device *device,
   2581			    struct rdma_hw_stats *stats, u32 port, int index);
   2582
   2583	/**
   2584	 * modify_hw_stat - Modify the counter configuration
   2585	 * @enable: true/false when enable/disable a counter
   2586	 * Return codes - 0 on success or error code otherwise.
   2587	 */
   2588	int (*modify_hw_stat)(struct ib_device *device, u32 port,
   2589			      unsigned int counter_index, bool enable);
   2590	/**
   2591	 * Allows rdma drivers to add their own restrack attributes.
   2592	 */
   2593	int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr);
   2594	int (*fill_res_mr_entry_raw)(struct sk_buff *msg, struct ib_mr *ibmr);
   2595	int (*fill_res_cq_entry)(struct sk_buff *msg, struct ib_cq *ibcq);
   2596	int (*fill_res_cq_entry_raw)(struct sk_buff *msg, struct ib_cq *ibcq);
   2597	int (*fill_res_qp_entry)(struct sk_buff *msg, struct ib_qp *ibqp);
   2598	int (*fill_res_qp_entry_raw)(struct sk_buff *msg, struct ib_qp *ibqp);
   2599	int (*fill_res_cm_id_entry)(struct sk_buff *msg, struct rdma_cm_id *id);
   2600
   2601	/* Device lifecycle callbacks */
   2602	/*
   2603	 * Called after the device becomes registered, before clients are
   2604	 * attached
   2605	 */
   2606	int (*enable_driver)(struct ib_device *dev);
   2607	/*
   2608	 * This is called as part of ib_dealloc_device().
   2609	 */
   2610	void (*dealloc_driver)(struct ib_device *dev);
   2611
   2612	/* iWarp CM callbacks */
   2613	void (*iw_add_ref)(struct ib_qp *qp);
   2614	void (*iw_rem_ref)(struct ib_qp *qp);
   2615	struct ib_qp *(*iw_get_qp)(struct ib_device *device, int qpn);
   2616	int (*iw_connect)(struct iw_cm_id *cm_id,
   2617			  struct iw_cm_conn_param *conn_param);
   2618	int (*iw_accept)(struct iw_cm_id *cm_id,
   2619			 struct iw_cm_conn_param *conn_param);
   2620	int (*iw_reject)(struct iw_cm_id *cm_id, const void *pdata,
   2621			 u8 pdata_len);
   2622	int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog);
   2623	int (*iw_destroy_listen)(struct iw_cm_id *cm_id);
   2624	/**
   2625	 * counter_bind_qp - Bind a QP to a counter.
   2626	 * @counter - The counter to be bound. If counter->id is zero then
   2627	 *   the driver needs to allocate a new counter and set counter->id
   2628	 */
   2629	int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp);
   2630	/**
   2631	 * counter_unbind_qp - Unbind the qp from the dynamically-allocated
   2632	 *   counter and bind it onto the default one
   2633	 */
   2634	int (*counter_unbind_qp)(struct ib_qp *qp);
   2635	/**
   2636	 * counter_dealloc -De-allocate the hw counter
   2637	 */
   2638	int (*counter_dealloc)(struct rdma_counter *counter);
   2639	/**
   2640	 * counter_alloc_stats - Allocate a struct rdma_hw_stats and fill in
   2641	 * the driver initialized data.
   2642	 */
   2643	struct rdma_hw_stats *(*counter_alloc_stats)(
   2644		struct rdma_counter *counter);
   2645	/**
   2646	 * counter_update_stats - Query the stats value of this counter
   2647	 */
   2648	int (*counter_update_stats)(struct rdma_counter *counter);
   2649
   2650	/**
   2651	 * Allows rdma drivers to add their own restrack attributes
   2652	 * dumped via 'rdma stat' iproute2 command.
   2653	 */
   2654	int (*fill_stat_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr);
   2655
   2656	/* query driver for its ucontext properties */
   2657	int (*query_ucontext)(struct ib_ucontext *context,
   2658			      struct uverbs_attr_bundle *attrs);
   2659
   2660	/*
   2661	 * Provide NUMA node. This API exists for rdmavt/hfi1 only.
   2662	 * Everyone else relies on Linux memory management model.
   2663	 */
   2664	int (*get_numa_node)(struct ib_device *dev);
   2665
   2666	DECLARE_RDMA_OBJ_SIZE(ib_ah);
   2667	DECLARE_RDMA_OBJ_SIZE(ib_counters);
   2668	DECLARE_RDMA_OBJ_SIZE(ib_cq);
   2669	DECLARE_RDMA_OBJ_SIZE(ib_mw);
   2670	DECLARE_RDMA_OBJ_SIZE(ib_pd);
   2671	DECLARE_RDMA_OBJ_SIZE(ib_qp);
   2672	DECLARE_RDMA_OBJ_SIZE(ib_rwq_ind_table);
   2673	DECLARE_RDMA_OBJ_SIZE(ib_srq);
   2674	DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
   2675	DECLARE_RDMA_OBJ_SIZE(ib_xrcd);
   2676};
   2677
   2678struct ib_core_device {
   2679	/* device must be the first element in structure until,
   2680	 * union of ib_core_device and device exists in ib_device.
   2681	 */
   2682	struct device dev;
   2683	possible_net_t rdma_net;
   2684	struct kobject *ports_kobj;
   2685	struct list_head port_list;
   2686	struct ib_device *owner; /* reach back to owner ib_device */
   2687};
   2688
   2689struct rdma_restrack_root;
   2690struct ib_device {
   2691	/* Do not access @dma_device directly from ULP nor from HW drivers. */
   2692	struct device                *dma_device;
   2693	struct ib_device_ops	     ops;
   2694	char                          name[IB_DEVICE_NAME_MAX];
   2695	struct rcu_head rcu_head;
   2696
   2697	struct list_head              event_handler_list;
   2698	/* Protects event_handler_list */
   2699	struct rw_semaphore event_handler_rwsem;
   2700
   2701	/* Protects QP's event_handler calls and open_qp list */
   2702	spinlock_t qp_open_list_lock;
   2703
   2704	struct rw_semaphore	      client_data_rwsem;
   2705	struct xarray                 client_data;
   2706	struct mutex                  unregistration_lock;
   2707
   2708	/* Synchronize GID, Pkey cache entries, subnet prefix, LMC */
   2709	rwlock_t cache_lock;
   2710	/**
   2711	 * port_data is indexed by port number
   2712	 */
   2713	struct ib_port_data *port_data;
   2714
   2715	int			      num_comp_vectors;
   2716
   2717	union {
   2718		struct device		dev;
   2719		struct ib_core_device	coredev;
   2720	};
   2721
   2722	/* First group is for device attributes,
   2723	 * Second group is for driver provided attributes (optional).
   2724	 * Third group is for the hw_stats
   2725	 * It is a NULL terminated array.
   2726	 */
   2727	const struct attribute_group	*groups[4];
   2728
   2729	u64			     uverbs_cmd_mask;
   2730
   2731	char			     node_desc[IB_DEVICE_NODE_DESC_MAX];
   2732	__be64			     node_guid;
   2733	u32			     local_dma_lkey;
   2734	u16                          is_switch:1;
   2735	/* Indicates kernel verbs support, should not be used in drivers */
   2736	u16                          kverbs_provider:1;
   2737	/* CQ adaptive moderation (RDMA DIM) */
   2738	u16                          use_cq_dim:1;
   2739	u8                           node_type;
   2740	u32			     phys_port_cnt;
   2741	struct ib_device_attr        attrs;
   2742	struct hw_stats_device_data *hw_stats_data;
   2743
   2744#ifdef CONFIG_CGROUP_RDMA
   2745	struct rdmacg_device         cg_device;
   2746#endif
   2747
   2748	u32                          index;
   2749
   2750	spinlock_t                   cq_pools_lock;
   2751	struct list_head             cq_pools[IB_POLL_LAST_POOL_TYPE + 1];
   2752
   2753	struct rdma_restrack_root *res;
   2754
   2755	const struct uapi_definition   *driver_def;
   2756
   2757	/*
   2758	 * Positive refcount indicates that the device is currently
   2759	 * registered and cannot be unregistered.
   2760	 */
   2761	refcount_t refcount;
   2762	struct completion unreg_completion;
   2763	struct work_struct unregistration_work;
   2764
   2765	const struct rdma_link_ops *link_ops;
   2766
   2767	/* Protects compat_devs xarray modifications */
   2768	struct mutex compat_devs_mutex;
   2769	/* Maintains compat devices for each net namespace */
   2770	struct xarray compat_devs;
   2771
   2772	/* Used by iWarp CM */
   2773	char iw_ifname[IFNAMSIZ];
   2774	u32 iw_driver_flags;
   2775	u32 lag_flags;
   2776};
   2777
   2778static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size,
   2779				    gfp_t gfp, bool is_numa_aware)
   2780{
   2781	if (is_numa_aware && dev->ops.get_numa_node)
   2782		return kzalloc_node(size, gfp, dev->ops.get_numa_node(dev));
   2783
   2784	return kzalloc(size, gfp);
   2785}
   2786
   2787struct ib_client_nl_info;
   2788struct ib_client {
   2789	const char *name;
   2790	int (*add)(struct ib_device *ibdev);
   2791	void (*remove)(struct ib_device *, void *client_data);
   2792	void (*rename)(struct ib_device *dev, void *client_data);
   2793	int (*get_nl_info)(struct ib_device *ibdev, void *client_data,
   2794			   struct ib_client_nl_info *res);
   2795	int (*get_global_nl_info)(struct ib_client_nl_info *res);
   2796
   2797	/* Returns the net_dev belonging to this ib_client and matching the
   2798	 * given parameters.
   2799	 * @dev:	 An RDMA device that the net_dev use for communication.
   2800	 * @port:	 A physical port number on the RDMA device.
   2801	 * @pkey:	 P_Key that the net_dev uses if applicable.
   2802	 * @gid:	 A GID that the net_dev uses to communicate.
   2803	 * @addr:	 An IP address the net_dev is configured with.
   2804	 * @client_data: The device's client data set by ib_set_client_data().
   2805	 *
   2806	 * An ib_client that implements a net_dev on top of RDMA devices
   2807	 * (such as IP over IB) should implement this callback, allowing the
   2808	 * rdma_cm module to find the right net_dev for a given request.
   2809	 *
   2810	 * The caller is responsible for calling dev_put on the returned
   2811	 * netdev. */
   2812	struct net_device *(*get_net_dev_by_params)(
   2813			struct ib_device *dev,
   2814			u32 port,
   2815			u16 pkey,
   2816			const union ib_gid *gid,
   2817			const struct sockaddr *addr,
   2818			void *client_data);
   2819
   2820	refcount_t uses;
   2821	struct completion uses_zero;
   2822	u32 client_id;
   2823
   2824	/* kverbs are not required by the client */
   2825	u8 no_kverbs_req:1;
   2826};
   2827
   2828/*
   2829 * IB block DMA iterator
   2830 *
   2831 * Iterates the DMA-mapped SGL in contiguous memory blocks aligned
   2832 * to a HW supported page size.
   2833 */
   2834struct ib_block_iter {
   2835	/* internal states */
   2836	struct scatterlist *__sg;	/* sg holding the current aligned block */
   2837	dma_addr_t __dma_addr;		/* unaligned DMA address of this block */
   2838	unsigned int __sg_nents;	/* number of SG entries */
   2839	unsigned int __sg_advance;	/* number of bytes to advance in sg in next step */
   2840	unsigned int __pg_bit;		/* alignment of current block */
   2841};
   2842
   2843struct ib_device *_ib_alloc_device(size_t size);
   2844#define ib_alloc_device(drv_struct, member)                                    \
   2845	container_of(_ib_alloc_device(sizeof(struct drv_struct) +              \
   2846				      BUILD_BUG_ON_ZERO(offsetof(              \
   2847					      struct drv_struct, member))),    \
   2848		     struct drv_struct, member)
   2849
   2850void ib_dealloc_device(struct ib_device *device);
   2851
   2852void ib_get_device_fw_str(struct ib_device *device, char *str);
   2853
   2854int ib_register_device(struct ib_device *device, const char *name,
   2855		       struct device *dma_device);
   2856void ib_unregister_device(struct ib_device *device);
   2857void ib_unregister_driver(enum rdma_driver_id driver_id);
   2858void ib_unregister_device_and_put(struct ib_device *device);
   2859void ib_unregister_device_queued(struct ib_device *ib_dev);
   2860
   2861int ib_register_client   (struct ib_client *client);
   2862void ib_unregister_client(struct ib_client *client);
   2863
   2864void __rdma_block_iter_start(struct ib_block_iter *biter,
   2865			     struct scatterlist *sglist,
   2866			     unsigned int nents,
   2867			     unsigned long pgsz);
   2868bool __rdma_block_iter_next(struct ib_block_iter *biter);
   2869
   2870/**
   2871 * rdma_block_iter_dma_address - get the aligned dma address of the current
   2872 * block held by the block iterator.
   2873 * @biter: block iterator holding the memory block
   2874 */
   2875static inline dma_addr_t
   2876rdma_block_iter_dma_address(struct ib_block_iter *biter)
   2877{
   2878	return biter->__dma_addr & ~(BIT_ULL(biter->__pg_bit) - 1);
   2879}
   2880
   2881/**
   2882 * rdma_for_each_block - iterate over contiguous memory blocks of the sg list
   2883 * @sglist: sglist to iterate over
   2884 * @biter: block iterator holding the memory block
   2885 * @nents: maximum number of sg entries to iterate over
   2886 * @pgsz: best HW supported page size to use
   2887 *
   2888 * Callers may use rdma_block_iter_dma_address() to get each
   2889 * blocks aligned DMA address.
   2890 */
   2891#define rdma_for_each_block(sglist, biter, nents, pgsz)		\
   2892	for (__rdma_block_iter_start(biter, sglist, nents,	\
   2893				     pgsz);			\
   2894	     __rdma_block_iter_next(biter);)
   2895
   2896/**
   2897 * ib_get_client_data - Get IB client context
   2898 * @device:Device to get context for
   2899 * @client:Client to get context for
   2900 *
   2901 * ib_get_client_data() returns the client context data set with
   2902 * ib_set_client_data(). This can only be called while the client is
   2903 * registered to the device, once the ib_client remove() callback returns this
   2904 * cannot be called.
   2905 */
   2906static inline void *ib_get_client_data(struct ib_device *device,
   2907				       struct ib_client *client)
   2908{
   2909	return xa_load(&device->client_data, client->client_id);
   2910}
   2911void  ib_set_client_data(struct ib_device *device, struct ib_client *client,
   2912			 void *data);
   2913void ib_set_device_ops(struct ib_device *device,
   2914		       const struct ib_device_ops *ops);
   2915
   2916int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
   2917		      unsigned long pfn, unsigned long size, pgprot_t prot,
   2918		      struct rdma_user_mmap_entry *entry);
   2919int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
   2920				struct rdma_user_mmap_entry *entry,
   2921				size_t length);
   2922int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
   2923				      struct rdma_user_mmap_entry *entry,
   2924				      size_t length, u32 min_pgoff,
   2925				      u32 max_pgoff);
   2926
   2927static inline int
   2928rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext,
   2929				  struct rdma_user_mmap_entry *entry,
   2930				  size_t length, u32 pgoff)
   2931{
   2932	return rdma_user_mmap_entry_insert_range(ucontext, entry, length, pgoff,
   2933						 pgoff);
   2934}
   2935
   2936struct rdma_user_mmap_entry *
   2937rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
   2938			       unsigned long pgoff);
   2939struct rdma_user_mmap_entry *
   2940rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
   2941			 struct vm_area_struct *vma);
   2942void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry);
   2943
   2944void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry);
   2945
   2946static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
   2947{
   2948	return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
   2949}
   2950
   2951static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
   2952{
   2953	return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
   2954}
   2955
   2956static inline bool ib_is_buffer_cleared(const void __user *p,
   2957					size_t len)
   2958{
   2959	bool ret;
   2960	u8 *buf;
   2961
   2962	if (len > USHRT_MAX)
   2963		return false;
   2964
   2965	buf = memdup_user(p, len);
   2966	if (IS_ERR(buf))
   2967		return false;
   2968
   2969	ret = !memchr_inv(buf, 0, len);
   2970	kfree(buf);
   2971	return ret;
   2972}
   2973
   2974static inline bool ib_is_udata_cleared(struct ib_udata *udata,
   2975				       size_t offset,
   2976				       size_t len)
   2977{
   2978	return ib_is_buffer_cleared(udata->inbuf + offset, len);
   2979}
   2980
   2981/**
   2982 * ib_modify_qp_is_ok - Check that the supplied attribute mask
   2983 * contains all required attributes and no attributes not allowed for
   2984 * the given QP state transition.
   2985 * @cur_state: Current QP state
   2986 * @next_state: Next QP state
   2987 * @type: QP type
   2988 * @mask: Mask of supplied QP attributes
   2989 *
   2990 * This function is a helper function that a low-level driver's
   2991 * modify_qp method can use to validate the consumer's input.  It
   2992 * checks that cur_state and next_state are valid QP states, that a
   2993 * transition from cur_state to next_state is allowed by the IB spec,
   2994 * and that the attribute mask supplied is allowed for the transition.
   2995 */
   2996bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
   2997			enum ib_qp_type type, enum ib_qp_attr_mask mask);
   2998
   2999void ib_register_event_handler(struct ib_event_handler *event_handler);
   3000void ib_unregister_event_handler(struct ib_event_handler *event_handler);
   3001void ib_dispatch_event(const struct ib_event *event);
   3002
   3003int ib_query_port(struct ib_device *device,
   3004		  u32 port_num, struct ib_port_attr *port_attr);
   3005
   3006enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
   3007					       u32 port_num);
   3008
   3009/**
   3010 * rdma_cap_ib_switch - Check if the device is IB switch
   3011 * @device: Device to check
   3012 *
   3013 * Device driver is responsible for setting is_switch bit on
   3014 * in ib_device structure at init time.
   3015 *
   3016 * Return: true if the device is IB switch.
   3017 */
   3018static inline bool rdma_cap_ib_switch(const struct ib_device *device)
   3019{
   3020	return device->is_switch;
   3021}
   3022
   3023/**
   3024 * rdma_start_port - Return the first valid port number for the device
   3025 * specified
   3026 *
   3027 * @device: Device to be checked
   3028 *
   3029 * Return start port number
   3030 */
   3031static inline u32 rdma_start_port(const struct ib_device *device)
   3032{
   3033	return rdma_cap_ib_switch(device) ? 0 : 1;
   3034}
   3035
   3036/**
   3037 * rdma_for_each_port - Iterate over all valid port numbers of the IB device
   3038 * @device - The struct ib_device * to iterate over
   3039 * @iter - The unsigned int to store the port number
   3040 */
   3041#define rdma_for_each_port(device, iter)                                       \
   3042	for (iter = rdma_start_port(device +				       \
   3043				    BUILD_BUG_ON_ZERO(!__same_type(u32,	       \
   3044								   iter)));    \
   3045	     iter <= rdma_end_port(device); iter++)
   3046
   3047/**
   3048 * rdma_end_port - Return the last valid port number for the device
   3049 * specified
   3050 *
   3051 * @device: Device to be checked
   3052 *
   3053 * Return last port number
   3054 */
   3055static inline u32 rdma_end_port(const struct ib_device *device)
   3056{
   3057	return rdma_cap_ib_switch(device) ? 0 : device->phys_port_cnt;
   3058}
   3059
   3060static inline int rdma_is_port_valid(const struct ib_device *device,
   3061				     unsigned int port)
   3062{
   3063	return (port >= rdma_start_port(device) &&
   3064		port <= rdma_end_port(device));
   3065}
   3066
   3067static inline bool rdma_is_grh_required(const struct ib_device *device,
   3068					u32 port_num)
   3069{
   3070	return device->port_data[port_num].immutable.core_cap_flags &
   3071	       RDMA_CORE_PORT_IB_GRH_REQUIRED;
   3072}
   3073
   3074static inline bool rdma_protocol_ib(const struct ib_device *device,
   3075				    u32 port_num)
   3076{
   3077	return device->port_data[port_num].immutable.core_cap_flags &
   3078	       RDMA_CORE_CAP_PROT_IB;
   3079}
   3080
   3081static inline bool rdma_protocol_roce(const struct ib_device *device,
   3082				      u32 port_num)
   3083{
   3084	return device->port_data[port_num].immutable.core_cap_flags &
   3085	       (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
   3086}
   3087
   3088static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device,
   3089						u32 port_num)
   3090{
   3091	return device->port_data[port_num].immutable.core_cap_flags &
   3092	       RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
   3093}
   3094
   3095static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device,
   3096						u32 port_num)
   3097{
   3098	return device->port_data[port_num].immutable.core_cap_flags &
   3099	       RDMA_CORE_CAP_PROT_ROCE;
   3100}
   3101
   3102static inline bool rdma_protocol_iwarp(const struct ib_device *device,
   3103				       u32 port_num)
   3104{
   3105	return device->port_data[port_num].immutable.core_cap_flags &
   3106	       RDMA_CORE_CAP_PROT_IWARP;
   3107}
   3108
   3109static inline bool rdma_ib_or_roce(const struct ib_device *device,
   3110				   u32 port_num)
   3111{
   3112	return rdma_protocol_ib(device, port_num) ||
   3113		rdma_protocol_roce(device, port_num);
   3114}
   3115
   3116static inline bool rdma_protocol_raw_packet(const struct ib_device *device,
   3117					    u32 port_num)
   3118{
   3119	return device->port_data[port_num].immutable.core_cap_flags &
   3120	       RDMA_CORE_CAP_PROT_RAW_PACKET;
   3121}
   3122
   3123static inline bool rdma_protocol_usnic(const struct ib_device *device,
   3124				       u32 port_num)
   3125{
   3126	return device->port_data[port_num].immutable.core_cap_flags &
   3127	       RDMA_CORE_CAP_PROT_USNIC;
   3128}
   3129
   3130/**
   3131 * rdma_cap_ib_mad - Check if the port of a device supports Infiniband
   3132 * Management Datagrams.
   3133 * @device: Device to check
   3134 * @port_num: Port number to check
   3135 *
   3136 * Management Datagrams (MAD) are a required part of the InfiniBand
   3137 * specification and are supported on all InfiniBand devices.  A slightly
   3138 * extended version are also supported on OPA interfaces.
   3139 *
   3140 * Return: true if the port supports sending/receiving of MAD packets.
   3141 */
   3142static inline bool rdma_cap_ib_mad(const struct ib_device *device, u32 port_num)
   3143{
   3144	return device->port_data[port_num].immutable.core_cap_flags &
   3145	       RDMA_CORE_CAP_IB_MAD;
   3146}
   3147
   3148/**
   3149 * rdma_cap_opa_mad - Check if the port of device provides support for OPA
   3150 * Management Datagrams.
   3151 * @device: Device to check
   3152 * @port_num: Port number to check
   3153 *
   3154 * Intel OmniPath devices extend and/or replace the InfiniBand Management
   3155 * datagrams with their own versions.  These OPA MADs share many but not all of
   3156 * the characteristics of InfiniBand MADs.
   3157 *
   3158 * OPA MADs differ in the following ways:
   3159 *
   3160 *    1) MADs are variable size up to 2K
   3161 *       IBTA defined MADs remain fixed at 256 bytes
   3162 *    2) OPA SMPs must carry valid PKeys
   3163 *    3) OPA SMP packets are a different format
   3164 *
   3165 * Return: true if the port supports OPA MAD packet formats.
   3166 */
   3167static inline bool rdma_cap_opa_mad(struct ib_device *device, u32 port_num)
   3168{
   3169	return device->port_data[port_num].immutable.core_cap_flags &
   3170		RDMA_CORE_CAP_OPA_MAD;
   3171}
   3172
   3173/**
   3174 * rdma_cap_ib_smi - Check if the port of a device provides an Infiniband
   3175 * Subnet Management Agent (SMA) on the Subnet Management Interface (SMI).
   3176 * @device: Device to check
   3177 * @port_num: Port number to check
   3178 *
   3179 * Each InfiniBand node is required to provide a Subnet Management Agent
   3180 * that the subnet manager can access.  Prior to the fabric being fully
   3181 * configured by the subnet manager, the SMA is accessed via a well known
   3182 * interface called the Subnet Management Interface (SMI).  This interface
   3183 * uses directed route packets to communicate with the SM to get around the
   3184 * chicken and egg problem of the SM needing to know what's on the fabric
   3185 * in order to configure the fabric, and needing to configure the fabric in
   3186 * order to send packets to the devices on the fabric.  These directed
   3187 * route packets do not need the fabric fully configured in order to reach
   3188 * their destination.  The SMI is the only method allowed to send
   3189 * directed route packets on an InfiniBand fabric.
   3190 *
   3191 * Return: true if the port provides an SMI.
   3192 */
   3193static inline bool rdma_cap_ib_smi(const struct ib_device *device, u32 port_num)
   3194{
   3195	return device->port_data[port_num].immutable.core_cap_flags &
   3196	       RDMA_CORE_CAP_IB_SMI;
   3197}
   3198
   3199/**
   3200 * rdma_cap_ib_cm - Check if the port of device has the capability Infiniband
   3201 * Communication Manager.
   3202 * @device: Device to check
   3203 * @port_num: Port number to check
   3204 *
   3205 * The InfiniBand Communication Manager is one of many pre-defined General
   3206 * Service Agents (GSA) that are accessed via the General Service
   3207 * Interface (GSI).  It's role is to facilitate establishment of connections
   3208 * between nodes as well as other management related tasks for established
   3209 * connections.
   3210 *
   3211 * Return: true if the port supports an IB CM (this does not guarantee that
   3212 * a CM is actually running however).
   3213 */
   3214static inline bool rdma_cap_ib_cm(const struct ib_device *device, u32 port_num)
   3215{
   3216	return device->port_data[port_num].immutable.core_cap_flags &
   3217	       RDMA_CORE_CAP_IB_CM;
   3218}
   3219
   3220/**
   3221 * rdma_cap_iw_cm - Check if the port of device has the capability IWARP
   3222 * Communication Manager.
   3223 * @device: Device to check
   3224 * @port_num: Port number to check
   3225 *
   3226 * Similar to above, but specific to iWARP connections which have a different
   3227 * managment protocol than InfiniBand.
   3228 *
   3229 * Return: true if the port supports an iWARP CM (this does not guarantee that
   3230 * a CM is actually running however).
   3231 */
   3232static inline bool rdma_cap_iw_cm(const struct ib_device *device, u32 port_num)
   3233{
   3234	return device->port_data[port_num].immutable.core_cap_flags &
   3235	       RDMA_CORE_CAP_IW_CM;
   3236}
   3237
   3238/**
   3239 * rdma_cap_ib_sa - Check if the port of device has the capability Infiniband
   3240 * Subnet Administration.
   3241 * @device: Device to check
   3242 * @port_num: Port number to check
   3243 *
   3244 * An InfiniBand Subnet Administration (SA) service is a pre-defined General
   3245 * Service Agent (GSA) provided by the Subnet Manager (SM).  On InfiniBand
   3246 * fabrics, devices should resolve routes to other hosts by contacting the
   3247 * SA to query the proper route.
   3248 *
   3249 * Return: true if the port should act as a client to the fabric Subnet
   3250 * Administration interface.  This does not imply that the SA service is
   3251 * running locally.
   3252 */
   3253static inline bool rdma_cap_ib_sa(const struct ib_device *device, u32 port_num)
   3254{
   3255	return device->port_data[port_num].immutable.core_cap_flags &
   3256	       RDMA_CORE_CAP_IB_SA;
   3257}
   3258
   3259/**
   3260 * rdma_cap_ib_mcast - Check if the port of device has the capability Infiniband
   3261 * Multicast.
   3262 * @device: Device to check
   3263 * @port_num: Port number to check
   3264 *
   3265 * InfiniBand multicast registration is more complex than normal IPv4 or
   3266 * IPv6 multicast registration.  Each Host Channel Adapter must register
   3267 * with the Subnet Manager when it wishes to join a multicast group.  It
   3268 * should do so only once regardless of how many queue pairs it subscribes
   3269 * to this group.  And it should leave the group only after all queue pairs
   3270 * attached to the group have been detached.
   3271 *
   3272 * Return: true if the port must undertake the additional adminstrative
   3273 * overhead of registering/unregistering with the SM and tracking of the
   3274 * total number of queue pairs attached to the multicast group.
   3275 */
   3276static inline bool rdma_cap_ib_mcast(const struct ib_device *device,
   3277				     u32 port_num)
   3278{
   3279	return rdma_cap_ib_sa(device, port_num);
   3280}
   3281
   3282/**
   3283 * rdma_cap_af_ib - Check if the port of device has the capability
   3284 * Native Infiniband Address.
   3285 * @device: Device to check
   3286 * @port_num: Port number to check
   3287 *
   3288 * InfiniBand addressing uses a port's GUID + Subnet Prefix to make a default
   3289 * GID.  RoCE uses a different mechanism, but still generates a GID via
   3290 * a prescribed mechanism and port specific data.
   3291 *
   3292 * Return: true if the port uses a GID address to identify devices on the
   3293 * network.
   3294 */
   3295static inline bool rdma_cap_af_ib(const struct ib_device *device, u32 port_num)
   3296{
   3297	return device->port_data[port_num].immutable.core_cap_flags &
   3298	       RDMA_CORE_CAP_AF_IB;
   3299}
   3300
   3301/**
   3302 * rdma_cap_eth_ah - Check if the port of device has the capability
   3303 * Ethernet Address Handle.
   3304 * @device: Device to check
   3305 * @port_num: Port number to check
   3306 *
   3307 * RoCE is InfiniBand over Ethernet, and it uses a well defined technique
   3308 * to fabricate GIDs over Ethernet/IP specific addresses native to the
   3309 * port.  Normally, packet headers are generated by the sending host
   3310 * adapter, but when sending connectionless datagrams, we must manually
   3311 * inject the proper headers for the fabric we are communicating over.
   3312 *
   3313 * Return: true if we are running as a RoCE port and must force the
   3314 * addition of a Global Route Header built from our Ethernet Address
   3315 * Handle into our header list for connectionless packets.
   3316 */
   3317static inline bool rdma_cap_eth_ah(const struct ib_device *device, u32 port_num)
   3318{
   3319	return device->port_data[port_num].immutable.core_cap_flags &
   3320	       RDMA_CORE_CAP_ETH_AH;
   3321}
   3322
   3323/**
   3324 * rdma_cap_opa_ah - Check if the port of device supports
   3325 * OPA Address handles
   3326 * @device: Device to check
   3327 * @port_num: Port number to check
   3328 *
   3329 * Return: true if we are running on an OPA device which supports
   3330 * the extended OPA addressing.
   3331 */
   3332static inline bool rdma_cap_opa_ah(struct ib_device *device, u32 port_num)
   3333{
   3334	return (device->port_data[port_num].immutable.core_cap_flags &
   3335		RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH;
   3336}
   3337
   3338/**
   3339 * rdma_max_mad_size - Return the max MAD size required by this RDMA Port.
   3340 *
   3341 * @device: Device
   3342 * @port_num: Port number
   3343 *
   3344 * This MAD size includes the MAD headers and MAD payload.  No other headers
   3345 * are included.
   3346 *
   3347 * Return the max MAD size required by the Port.  Will return 0 if the port
   3348 * does not support MADs
   3349 */
   3350static inline size_t rdma_max_mad_size(const struct ib_device *device,
   3351				       u32 port_num)
   3352{
   3353	return device->port_data[port_num].immutable.max_mad_size;
   3354}
   3355
   3356/**
   3357 * rdma_cap_roce_gid_table - Check if the port of device uses roce_gid_table
   3358 * @device: Device to check
   3359 * @port_num: Port number to check
   3360 *
   3361 * RoCE GID table mechanism manages the various GIDs for a device.
   3362 *
   3363 * NOTE: if allocating the port's GID table has failed, this call will still
   3364 * return true, but any RoCE GID table API will fail.
   3365 *
   3366 * Return: true if the port uses RoCE GID table mechanism in order to manage
   3367 * its GIDs.
   3368 */
   3369static inline bool rdma_cap_roce_gid_table(const struct ib_device *device,
   3370					   u32 port_num)
   3371{
   3372	return rdma_protocol_roce(device, port_num) &&
   3373		device->ops.add_gid && device->ops.del_gid;
   3374}
   3375
   3376/*
   3377 * Check if the device supports READ W/ INVALIDATE.
   3378 */
   3379static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
   3380{
   3381	/*
   3382	 * iWarp drivers must support READ W/ INVALIDATE.  No other protocol
   3383	 * has support for it yet.
   3384	 */
   3385	return rdma_protocol_iwarp(dev, port_num);
   3386}
   3387
   3388/**
   3389 * rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not.
   3390 * @device: Device
   3391 * @port_num: 1 based Port number
   3392 *
   3393 * Return true if port is an Intel OPA port , false if not
   3394 */
   3395static inline bool rdma_core_cap_opa_port(struct ib_device *device,
   3396					  u32 port_num)
   3397{
   3398	return (device->port_data[port_num].immutable.core_cap_flags &
   3399		RDMA_CORE_PORT_INTEL_OPA) == RDMA_CORE_PORT_INTEL_OPA;
   3400}
   3401
   3402/**
   3403 * rdma_mtu_enum_to_int - Return the mtu of the port as an integer value.
   3404 * @device: Device
   3405 * @port_num: Port number
   3406 * @mtu: enum value of MTU
   3407 *
   3408 * Return the MTU size supported by the port as an integer value. Will return
   3409 * -1 if enum value of mtu is not supported.
   3410 */
   3411static inline int rdma_mtu_enum_to_int(struct ib_device *device, u32 port,
   3412				       int mtu)
   3413{
   3414	if (rdma_core_cap_opa_port(device, port))
   3415		return opa_mtu_enum_to_int((enum opa_mtu)mtu);
   3416	else
   3417		return ib_mtu_enum_to_int((enum ib_mtu)mtu);
   3418}
   3419
   3420/**
   3421 * rdma_mtu_from_attr - Return the mtu of the port from the port attribute.
   3422 * @device: Device
   3423 * @port_num: Port number
   3424 * @attr: port attribute
   3425 *
   3426 * Return the MTU size supported by the port as an integer value.
   3427 */
   3428static inline int rdma_mtu_from_attr(struct ib_device *device, u32 port,
   3429				     struct ib_port_attr *attr)
   3430{
   3431	if (rdma_core_cap_opa_port(device, port))
   3432		return attr->phys_mtu;
   3433	else
   3434		return ib_mtu_enum_to_int(attr->max_mtu);
   3435}
   3436
   3437int ib_set_vf_link_state(struct ib_device *device, int vf, u32 port,
   3438			 int state);
   3439int ib_get_vf_config(struct ib_device *device, int vf, u32 port,
   3440		     struct ifla_vf_info *info);
   3441int ib_get_vf_stats(struct ib_device *device, int vf, u32 port,
   3442		    struct ifla_vf_stats *stats);
   3443int ib_get_vf_guid(struct ib_device *device, int vf, u32 port,
   3444		    struct ifla_vf_guid *node_guid,
   3445		    struct ifla_vf_guid *port_guid);
   3446int ib_set_vf_guid(struct ib_device *device, int vf, u32 port, u64 guid,
   3447		   int type);
   3448
   3449int ib_query_pkey(struct ib_device *device,
   3450		  u32 port_num, u16 index, u16 *pkey);
   3451
   3452int ib_modify_device(struct ib_device *device,
   3453		     int device_modify_mask,
   3454		     struct ib_device_modify *device_modify);
   3455
   3456int ib_modify_port(struct ib_device *device,
   3457		   u32 port_num, int port_modify_mask,
   3458		   struct ib_port_modify *port_modify);
   3459
   3460int ib_find_gid(struct ib_device *device, union ib_gid *gid,
   3461		u32 *port_num, u16 *index);
   3462
   3463int ib_find_pkey(struct ib_device *device,
   3464		 u32 port_num, u16 pkey, u16 *index);
   3465
   3466enum ib_pd_flags {
   3467	/*
   3468	 * Create a memory registration for all memory in the system and place
   3469	 * the rkey for it into pd->unsafe_global_rkey.  This can be used by
   3470	 * ULPs to avoid the overhead of dynamic MRs.
   3471	 *
   3472	 * This flag is generally considered unsafe and must only be used in
   3473	 * extremly trusted environments.  Every use of it will log a warning
   3474	 * in the kernel log.
   3475	 */
   3476	IB_PD_UNSAFE_GLOBAL_RKEY	= 0x01,
   3477};
   3478
   3479struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
   3480		const char *caller);
   3481
   3482/**
   3483 * ib_alloc_pd - Allocates an unused protection domain.
   3484 * @device: The device on which to allocate the protection domain.
   3485 * @flags: protection domain flags
   3486 *
   3487 * A protection domain object provides an association between QPs, shared
   3488 * receive queues, address handles, memory regions, and memory windows.
   3489 *
   3490 * Every PD has a local_dma_lkey which can be used as the lkey value for local
   3491 * memory operations.
   3492 */
   3493#define ib_alloc_pd(device, flags) \
   3494	__ib_alloc_pd((device), (flags), KBUILD_MODNAME)
   3495
   3496int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
   3497
   3498/**
   3499 * ib_dealloc_pd - Deallocate kernel PD
   3500 * @pd: The protection domain
   3501 *
   3502 * NOTE: for user PD use ib_dealloc_pd_user with valid udata!
   3503 */
   3504static inline void ib_dealloc_pd(struct ib_pd *pd)
   3505{
   3506	int ret = ib_dealloc_pd_user(pd, NULL);
   3507
   3508	WARN_ONCE(ret, "Destroy of kernel PD shouldn't fail");
   3509}
   3510
   3511enum rdma_create_ah_flags {
   3512	/* In a sleepable context */
   3513	RDMA_CREATE_AH_SLEEPABLE = BIT(0),
   3514};
   3515
   3516/**
   3517 * rdma_create_ah - Creates an address handle for the given address vector.
   3518 * @pd: The protection domain associated with the address handle.
   3519 * @ah_attr: The attributes of the address vector.
   3520 * @flags: Create address handle flags (see enum rdma_create_ah_flags).
   3521 *
   3522 * The address handle is used to reference a local or global destination
   3523 * in all UD QP post sends.
   3524 */
   3525struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
   3526			     u32 flags);
   3527
   3528/**
   3529 * rdma_create_user_ah - Creates an address handle for the given address vector.
   3530 * It resolves destination mac address for ah attribute of RoCE type.
   3531 * @pd: The protection domain associated with the address handle.
   3532 * @ah_attr: The attributes of the address vector.
   3533 * @udata: pointer to user's input output buffer information need by
   3534 *         provider driver.
   3535 *
   3536 * It returns 0 on success and returns appropriate error code on error.
   3537 * The address handle is used to reference a local or global destination
   3538 * in all UD QP post sends.
   3539 */
   3540struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
   3541				  struct rdma_ah_attr *ah_attr,
   3542				  struct ib_udata *udata);
   3543/**
   3544 * ib_get_gids_from_rdma_hdr - Get sgid and dgid from GRH or IPv4 header
   3545 *   work completion.
   3546 * @hdr: the L3 header to parse
   3547 * @net_type: type of header to parse
   3548 * @sgid: place to store source gid
   3549 * @dgid: place to store destination gid
   3550 */
   3551int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
   3552			      enum rdma_network_type net_type,
   3553			      union ib_gid *sgid, union ib_gid *dgid);
   3554
   3555/**
   3556 * ib_get_rdma_header_version - Get the header version
   3557 * @hdr: the L3 header to parse
   3558 */
   3559int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
   3560
   3561/**
   3562 * ib_init_ah_attr_from_wc - Initializes address handle attributes from a
   3563 *   work completion.
   3564 * @device: Device on which the received message arrived.
   3565 * @port_num: Port on which the received message arrived.
   3566 * @wc: Work completion associated with the received message.
   3567 * @grh: References the received global route header.  This parameter is
   3568 *   ignored unless the work completion indicates that the GRH is valid.
   3569 * @ah_attr: Returned attributes that can be used when creating an address
   3570 *   handle for replying to the message.
   3571 * When ib_init_ah_attr_from_wc() returns success,
   3572 * (a) for IB link layer it optionally contains a reference to SGID attribute
   3573 * when GRH is present for IB link layer.
   3574 * (b) for RoCE link layer it contains a reference to SGID attribute.
   3575 * User must invoke rdma_cleanup_ah_attr_gid_attr() to release reference to SGID
   3576 * attributes which are initialized using ib_init_ah_attr_from_wc().
   3577 *
   3578 */
   3579int ib_init_ah_attr_from_wc(struct ib_device *device, u32 port_num,
   3580			    const struct ib_wc *wc, const struct ib_grh *grh,
   3581			    struct rdma_ah_attr *ah_attr);
   3582
   3583/**
   3584 * ib_create_ah_from_wc - Creates an address handle associated with the
   3585 *   sender of the specified work completion.
   3586 * @pd: The protection domain associated with the address handle.
   3587 * @wc: Work completion information associated with a received message.
   3588 * @grh: References the received global route header.  This parameter is
   3589 *   ignored unless the work completion indicates that the GRH is valid.
   3590 * @port_num: The outbound port number to associate with the address.
   3591 *
   3592 * The address handle is used to reference a local or global destination
   3593 * in all UD QP post sends.
   3594 */
   3595struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
   3596				   const struct ib_grh *grh, u32 port_num);
   3597
   3598/**
   3599 * rdma_modify_ah - Modifies the address vector associated with an address
   3600 *   handle.
   3601 * @ah: The address handle to modify.
   3602 * @ah_attr: The new address vector attributes to associate with the
   3603 *   address handle.
   3604 */
   3605int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
   3606
   3607/**
   3608 * rdma_query_ah - Queries the address vector associated with an address
   3609 *   handle.
   3610 * @ah: The address handle to query.
   3611 * @ah_attr: The address vector attributes associated with the address
   3612 *   handle.
   3613 */
   3614int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
   3615
   3616enum rdma_destroy_ah_flags {
   3617	/* In a sleepable context */
   3618	RDMA_DESTROY_AH_SLEEPABLE = BIT(0),
   3619};
   3620
   3621/**
   3622 * rdma_destroy_ah_user - Destroys an address handle.
   3623 * @ah: The address handle to destroy.
   3624 * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
   3625 * @udata: Valid user data or NULL for kernel objects
   3626 */
   3627int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata);
   3628
   3629/**
   3630 * rdma_destroy_ah - Destroys an kernel address handle.
   3631 * @ah: The address handle to destroy.
   3632 * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
   3633 *
   3634 * NOTE: for user ah use rdma_destroy_ah_user with valid udata!
   3635 */
   3636static inline void rdma_destroy_ah(struct ib_ah *ah, u32 flags)
   3637{
   3638	int ret = rdma_destroy_ah_user(ah, flags, NULL);
   3639
   3640	WARN_ONCE(ret, "Destroy of kernel AH shouldn't fail");
   3641}
   3642
   3643struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
   3644				  struct ib_srq_init_attr *srq_init_attr,
   3645				  struct ib_usrq_object *uobject,
   3646				  struct ib_udata *udata);
   3647static inline struct ib_srq *
   3648ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr)
   3649{
   3650	if (!pd->device->ops.create_srq)
   3651		return ERR_PTR(-EOPNOTSUPP);
   3652
   3653	return ib_create_srq_user(pd, srq_init_attr, NULL, NULL);
   3654}
   3655
   3656/**
   3657 * ib_modify_srq - Modifies the attributes for the specified SRQ.
   3658 * @srq: The SRQ to modify.
   3659 * @srq_attr: On input, specifies the SRQ attributes to modify.  On output,
   3660 *   the current values of selected SRQ attributes are returned.
   3661 * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ
   3662 *   are being modified.
   3663 *
   3664 * The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or
   3665 * IB_SRQ_LIMIT to set the SRQ's limit and request notification when
   3666 * the number of receives queued drops below the limit.
   3667 */
   3668int ib_modify_srq(struct ib_srq *srq,
   3669		  struct ib_srq_attr *srq_attr,
   3670		  enum ib_srq_attr_mask srq_attr_mask);
   3671
   3672/**
   3673 * ib_query_srq - Returns the attribute list and current values for the
   3674 *   specified SRQ.
   3675 * @srq: The SRQ to query.
   3676 * @srq_attr: The attributes of the specified SRQ.
   3677 */
   3678int ib_query_srq(struct ib_srq *srq,
   3679		 struct ib_srq_attr *srq_attr);
   3680
   3681/**
   3682 * ib_destroy_srq_user - Destroys the specified SRQ.
   3683 * @srq: The SRQ to destroy.
   3684 * @udata: Valid user data or NULL for kernel objects
   3685 */
   3686int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata);
   3687
   3688/**
   3689 * ib_destroy_srq - Destroys the specified kernel SRQ.
   3690 * @srq: The SRQ to destroy.
   3691 *
   3692 * NOTE: for user srq use ib_destroy_srq_user with valid udata!
   3693 */
   3694static inline void ib_destroy_srq(struct ib_srq *srq)
   3695{
   3696	int ret = ib_destroy_srq_user(srq, NULL);
   3697
   3698	WARN_ONCE(ret, "Destroy of kernel SRQ shouldn't fail");
   3699}
   3700
   3701/**
   3702 * ib_post_srq_recv - Posts a list of work requests to the specified SRQ.
   3703 * @srq: The SRQ to post the work request on.
   3704 * @recv_wr: A list of work requests to post on the receive queue.
   3705 * @bad_recv_wr: On an immediate failure, this parameter will reference
   3706 *   the work request that failed to be posted on the QP.
   3707 */
   3708static inline int ib_post_srq_recv(struct ib_srq *srq,
   3709				   const struct ib_recv_wr *recv_wr,
   3710				   const struct ib_recv_wr **bad_recv_wr)
   3711{
   3712	const struct ib_recv_wr *dummy;
   3713
   3714	return srq->device->ops.post_srq_recv(srq, recv_wr,
   3715					      bad_recv_wr ? : &dummy);
   3716}
   3717
   3718struct ib_qp *ib_create_qp_kernel(struct ib_pd *pd,
   3719				  struct ib_qp_init_attr *qp_init_attr,
   3720				  const char *caller);
   3721/**
   3722 * ib_create_qp - Creates a kernel QP associated with the specific protection
   3723 * domain.
   3724 * @pd: The protection domain associated with the QP.
   3725 * @init_attr: A list of initial attributes required to create the
   3726 *   QP.  If QP creation succeeds, then the attributes are updated to
   3727 *   the actual capabilities of the created QP.
   3728 */
   3729static inline struct ib_qp *ib_create_qp(struct ib_pd *pd,
   3730					 struct ib_qp_init_attr *init_attr)
   3731{
   3732	return ib_create_qp_kernel(pd, init_attr, KBUILD_MODNAME);
   3733}
   3734
   3735/**
   3736 * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
   3737 * @qp: The QP to modify.
   3738 * @attr: On input, specifies the QP attributes to modify.  On output,
   3739 *   the current values of selected QP attributes are returned.
   3740 * @attr_mask: A bit-mask used to specify which attributes of the QP
   3741 *   are being modified.
   3742 * @udata: pointer to user's input output buffer information
   3743 *   are being modified.
   3744 * It returns 0 on success and returns appropriate error code on error.
   3745 */
   3746int ib_modify_qp_with_udata(struct ib_qp *qp,
   3747			    struct ib_qp_attr *attr,
   3748			    int attr_mask,
   3749			    struct ib_udata *udata);
   3750
   3751/**
   3752 * ib_modify_qp - Modifies the attributes for the specified QP and then
   3753 *   transitions the QP to the given state.
   3754 * @qp: The QP to modify.
   3755 * @qp_attr: On input, specifies the QP attributes to modify.  On output,
   3756 *   the current values of selected QP attributes are returned.
   3757 * @qp_attr_mask: A bit-mask used to specify which attributes of the QP
   3758 *   are being modified.
   3759 */
   3760int ib_modify_qp(struct ib_qp *qp,
   3761		 struct ib_qp_attr *qp_attr,
   3762		 int qp_attr_mask);
   3763
   3764/**
   3765 * ib_query_qp - Returns the attribute list and current values for the
   3766 *   specified QP.
   3767 * @qp: The QP to query.
   3768 * @qp_attr: The attributes of the specified QP.
   3769 * @qp_attr_mask: A bit-mask used to select specific attributes to query.
   3770 * @qp_init_attr: Additional attributes of the selected QP.
   3771 *
   3772 * The qp_attr_mask may be used to limit the query to gathering only the
   3773 * selected attributes.
   3774 */
   3775int ib_query_qp(struct ib_qp *qp,
   3776		struct ib_qp_attr *qp_attr,
   3777		int qp_attr_mask,
   3778		struct ib_qp_init_attr *qp_init_attr);
   3779
   3780/**
   3781 * ib_destroy_qp - Destroys the specified QP.
   3782 * @qp: The QP to destroy.
   3783 * @udata: Valid udata or NULL for kernel objects
   3784 */
   3785int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata);
   3786
   3787/**
   3788 * ib_destroy_qp - Destroys the specified kernel QP.
   3789 * @qp: The QP to destroy.
   3790 *
   3791 * NOTE: for user qp use ib_destroy_qp_user with valid udata!
   3792 */
   3793static inline int ib_destroy_qp(struct ib_qp *qp)
   3794{
   3795	return ib_destroy_qp_user(qp, NULL);
   3796}
   3797
   3798/**
   3799 * ib_open_qp - Obtain a reference to an existing sharable QP.
   3800 * @xrcd - XRC domain
   3801 * @qp_open_attr: Attributes identifying the QP to open.
   3802 *
   3803 * Returns a reference to a sharable QP.
   3804 */
   3805struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
   3806			 struct ib_qp_open_attr *qp_open_attr);
   3807
   3808/**
   3809 * ib_close_qp - Release an external reference to a QP.
   3810 * @qp: The QP handle to release
   3811 *
   3812 * The opened QP handle is released by the caller.  The underlying
   3813 * shared QP is not destroyed until all internal references are released.
   3814 */
   3815int ib_close_qp(struct ib_qp *qp);
   3816
   3817/**
   3818 * ib_post_send - Posts a list of work requests to the send queue of
   3819 *   the specified QP.
   3820 * @qp: The QP to post the work request on.
   3821 * @send_wr: A list of work requests to post on the send queue.
   3822 * @bad_send_wr: On an immediate failure, this parameter will reference
   3823 *   the work request that failed to be posted on the QP.
   3824 *
   3825 * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate
   3826 * error is returned, the QP state shall not be affected,
   3827 * ib_post_send() will return an immediate error after queueing any
   3828 * earlier work requests in the list.
   3829 */
   3830static inline int ib_post_send(struct ib_qp *qp,
   3831			       const struct ib_send_wr *send_wr,
   3832			       const struct ib_send_wr **bad_send_wr)
   3833{
   3834	const struct ib_send_wr *dummy;
   3835
   3836	return qp->device->ops.post_send(qp, send_wr, bad_send_wr ? : &dummy);
   3837}
   3838
   3839/**
   3840 * ib_post_recv - Posts a list of work requests to the receive queue of
   3841 *   the specified QP.
   3842 * @qp: The QP to post the work request on.
   3843 * @recv_wr: A list of work requests to post on the receive queue.
   3844 * @bad_recv_wr: On an immediate failure, this parameter will reference
   3845 *   the work request that failed to be posted on the QP.
   3846 */
   3847static inline int ib_post_recv(struct ib_qp *qp,
   3848			       const struct ib_recv_wr *recv_wr,
   3849			       const struct ib_recv_wr **bad_recv_wr)
   3850{
   3851	const struct ib_recv_wr *dummy;
   3852
   3853	return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
   3854}
   3855
   3856struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
   3857			    int comp_vector, enum ib_poll_context poll_ctx,
   3858			    const char *caller);
   3859static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
   3860					int nr_cqe, int comp_vector,
   3861					enum ib_poll_context poll_ctx)
   3862{
   3863	return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
   3864			     KBUILD_MODNAME);
   3865}
   3866
   3867struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
   3868				int nr_cqe, enum ib_poll_context poll_ctx,
   3869				const char *caller);
   3870
   3871/**
   3872 * ib_alloc_cq_any: Allocate kernel CQ
   3873 * @dev: The IB device
   3874 * @private: Private data attached to the CQE
   3875 * @nr_cqe: Number of CQEs in the CQ
   3876 * @poll_ctx: Context used for polling the CQ
   3877 */
   3878static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev,
   3879					    void *private, int nr_cqe,
   3880					    enum ib_poll_context poll_ctx)
   3881{
   3882	return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx,
   3883				 KBUILD_MODNAME);
   3884}
   3885
   3886void ib_free_cq(struct ib_cq *cq);
   3887int ib_process_cq_direct(struct ib_cq *cq, int budget);
   3888
   3889/**
   3890 * ib_create_cq - Creates a CQ on the specified device.
   3891 * @device: The device on which to create the CQ.
   3892 * @comp_handler: A user-specified callback that is invoked when a
   3893 *   completion event occurs on the CQ.
   3894 * @event_handler: A user-specified callback that is invoked when an
   3895 *   asynchronous event not associated with a completion occurs on the CQ.
   3896 * @cq_context: Context associated with the CQ returned to the user via
   3897 *   the associated completion and event handlers.
   3898 * @cq_attr: The attributes the CQ should be created upon.
   3899 *
   3900 * Users can examine the cq structure to determine the actual CQ size.
   3901 */
   3902struct ib_cq *__ib_create_cq(struct ib_device *device,
   3903			     ib_comp_handler comp_handler,
   3904			     void (*event_handler)(struct ib_event *, void *),
   3905			     void *cq_context,
   3906			     const struct ib_cq_init_attr *cq_attr,
   3907			     const char *caller);
   3908#define ib_create_cq(device, cmp_hndlr, evt_hndlr, cq_ctxt, cq_attr) \
   3909	__ib_create_cq((device), (cmp_hndlr), (evt_hndlr), (cq_ctxt), (cq_attr), KBUILD_MODNAME)
   3910
   3911/**
   3912 * ib_resize_cq - Modifies the capacity of the CQ.
   3913 * @cq: The CQ to resize.
   3914 * @cqe: The minimum size of the CQ.
   3915 *
   3916 * Users can examine the cq structure to determine the actual CQ size.
   3917 */
   3918int ib_resize_cq(struct ib_cq *cq, int cqe);
   3919
   3920/**
   3921 * rdma_set_cq_moderation - Modifies moderation params of the CQ
   3922 * @cq: The CQ to modify.
   3923 * @cq_count: number of CQEs that will trigger an event
   3924 * @cq_period: max period of time in usec before triggering an event
   3925 *
   3926 */
   3927int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period);
   3928
   3929/**
   3930 * ib_destroy_cq_user - Destroys the specified CQ.
   3931 * @cq: The CQ to destroy.
   3932 * @udata: Valid user data or NULL for kernel objects
   3933 */
   3934int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata);
   3935
   3936/**
   3937 * ib_destroy_cq - Destroys the specified kernel CQ.
   3938 * @cq: The CQ to destroy.
   3939 *
   3940 * NOTE: for user cq use ib_destroy_cq_user with valid udata!
   3941 */
   3942static inline void ib_destroy_cq(struct ib_cq *cq)
   3943{
   3944	int ret = ib_destroy_cq_user(cq, NULL);
   3945
   3946	WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
   3947}
   3948
   3949/**
   3950 * ib_poll_cq - poll a CQ for completion(s)
   3951 * @cq:the CQ being polled
   3952 * @num_entries:maximum number of completions to return
   3953 * @wc:array of at least @num_entries &struct ib_wc where completions
   3954 *   will be returned
   3955 *
   3956 * Poll a CQ for (possibly multiple) completions.  If the return value
   3957 * is < 0, an error occurred.  If the return value is >= 0, it is the
   3958 * number of completions returned.  If the return value is
   3959 * non-negative and < num_entries, then the CQ was emptied.
   3960 */
   3961static inline int ib_poll_cq(struct ib_cq *cq, int num_entries,
   3962			     struct ib_wc *wc)
   3963{
   3964	return cq->device->ops.poll_cq(cq, num_entries, wc);
   3965}
   3966
   3967/**
   3968 * ib_req_notify_cq - Request completion notification on a CQ.
   3969 * @cq: The CQ to generate an event for.
   3970 * @flags:
   3971 *   Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP
   3972 *   to request an event on the next solicited event or next work
   3973 *   completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS
   3974 *   may also be |ed in to request a hint about missed events, as
   3975 *   described below.
   3976 *
   3977 * Return Value:
   3978 *    < 0 means an error occurred while requesting notification
   3979 *   == 0 means notification was requested successfully, and if
   3980 *        IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events
   3981 *        were missed and it is safe to wait for another event.  In
   3982 *        this case is it guaranteed that any work completions added
   3983 *        to the CQ since the last CQ poll will trigger a completion
   3984 *        notification event.
   3985 *    > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed
   3986 *        in.  It means that the consumer must poll the CQ again to
   3987 *        make sure it is empty to avoid missing an event because of a
   3988 *        race between requesting notification and an entry being
   3989 *        added to the CQ.  This return value means it is possible
   3990 *        (but not guaranteed) that a work completion has been added
   3991 *        to the CQ since the last poll without triggering a
   3992 *        completion notification event.
   3993 */
   3994static inline int ib_req_notify_cq(struct ib_cq *cq,
   3995				   enum ib_cq_notify_flags flags)
   3996{
   3997	return cq->device->ops.req_notify_cq(cq, flags);
   3998}
   3999
   4000struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe,
   4001			     int comp_vector_hint,
   4002			     enum ib_poll_context poll_ctx);
   4003
   4004void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe);
   4005
   4006/*
   4007 * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to
   4008 * NULL. This causes the ib_dma* helpers to just stash the kernel virtual
   4009 * address into the dma address.
   4010 */
   4011static inline bool ib_uses_virt_dma(struct ib_device *dev)
   4012{
   4013	return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device;
   4014}
   4015
   4016/**
   4017 * ib_dma_mapping_error - check a DMA addr for error
   4018 * @dev: The device for which the dma_addr was created
   4019 * @dma_addr: The DMA address to check
   4020 */
   4021static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
   4022{
   4023	if (ib_uses_virt_dma(dev))
   4024		return 0;
   4025	return dma_mapping_error(dev->dma_device, dma_addr);
   4026}
   4027
   4028/**
   4029 * ib_dma_map_single - Map a kernel virtual address to DMA address
   4030 * @dev: The device for which the dma_addr is to be created
   4031 * @cpu_addr: The kernel virtual address
   4032 * @size: The size of the region in bytes
   4033 * @direction: The direction of the DMA
   4034 */
   4035static inline u64 ib_dma_map_single(struct ib_device *dev,
   4036				    void *cpu_addr, size_t size,
   4037				    enum dma_data_direction direction)
   4038{
   4039	if (ib_uses_virt_dma(dev))
   4040		return (uintptr_t)cpu_addr;
   4041	return dma_map_single(dev->dma_device, cpu_addr, size, direction);
   4042}
   4043
   4044/**
   4045 * ib_dma_unmap_single - Destroy a mapping created by ib_dma_map_single()
   4046 * @dev: The device for which the DMA address was created
   4047 * @addr: The DMA address
   4048 * @size: The size of the region in bytes
   4049 * @direction: The direction of the DMA
   4050 */
   4051static inline void ib_dma_unmap_single(struct ib_device *dev,
   4052				       u64 addr, size_t size,
   4053				       enum dma_data_direction direction)
   4054{
   4055	if (!ib_uses_virt_dma(dev))
   4056		dma_unmap_single(dev->dma_device, addr, size, direction);
   4057}
   4058
   4059/**
   4060 * ib_dma_map_page - Map a physical page to DMA address
   4061 * @dev: The device for which the dma_addr is to be created
   4062 * @page: The page to be mapped
   4063 * @offset: The offset within the page
   4064 * @size: The size of the region in bytes
   4065 * @direction: The direction of the DMA
   4066 */
   4067static inline u64 ib_dma_map_page(struct ib_device *dev,
   4068				  struct page *page,
   4069				  unsigned long offset,
   4070				  size_t size,
   4071					 enum dma_data_direction direction)
   4072{
   4073	if (ib_uses_virt_dma(dev))
   4074		return (uintptr_t)(page_address(page) + offset);
   4075	return dma_map_page(dev->dma_device, page, offset, size, direction);
   4076}
   4077
   4078/**
   4079 * ib_dma_unmap_page - Destroy a mapping created by ib_dma_map_page()
   4080 * @dev: The device for which the DMA address was created
   4081 * @addr: The DMA address
   4082 * @size: The size of the region in bytes
   4083 * @direction: The direction of the DMA
   4084 */
   4085static inline void ib_dma_unmap_page(struct ib_device *dev,
   4086				     u64 addr, size_t size,
   4087				     enum dma_data_direction direction)
   4088{
   4089	if (!ib_uses_virt_dma(dev))
   4090		dma_unmap_page(dev->dma_device, addr, size, direction);
   4091}
   4092
   4093int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents);
   4094static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
   4095				      struct scatterlist *sg, int nents,
   4096				      enum dma_data_direction direction,
   4097				      unsigned long dma_attrs)
   4098{
   4099	if (ib_uses_virt_dma(dev))
   4100		return ib_dma_virt_map_sg(dev, sg, nents);
   4101	return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
   4102				dma_attrs);
   4103}
   4104
   4105static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
   4106					 struct scatterlist *sg, int nents,
   4107					 enum dma_data_direction direction,
   4108					 unsigned long dma_attrs)
   4109{
   4110	if (!ib_uses_virt_dma(dev))
   4111		dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction,
   4112				   dma_attrs);
   4113}
   4114
   4115/**
   4116 * ib_dma_map_sgtable_attrs - Map a scatter/gather table to DMA addresses
   4117 * @dev: The device for which the DMA addresses are to be created
   4118 * @sg: The sg_table object describing the buffer
   4119 * @direction: The direction of the DMA
   4120 * @attrs: Optional DMA attributes for the map operation
   4121 */
   4122static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev,
   4123					   struct sg_table *sgt,
   4124					   enum dma_data_direction direction,
   4125					   unsigned long dma_attrs)
   4126{
   4127	int nents;
   4128
   4129	if (ib_uses_virt_dma(dev)) {
   4130		nents = ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents);
   4131		if (!nents)
   4132			return -EIO;
   4133		sgt->nents = nents;
   4134		return 0;
   4135	}
   4136	return dma_map_sgtable(dev->dma_device, sgt, direction, dma_attrs);
   4137}
   4138
   4139static inline void ib_dma_unmap_sgtable_attrs(struct ib_device *dev,
   4140					      struct sg_table *sgt,
   4141					      enum dma_data_direction direction,
   4142					      unsigned long dma_attrs)
   4143{
   4144	if (!ib_uses_virt_dma(dev))
   4145		dma_unmap_sgtable(dev->dma_device, sgt, direction, dma_attrs);
   4146}
   4147
   4148/**
   4149 * ib_dma_map_sg - Map a scatter/gather list to DMA addresses
   4150 * @dev: The device for which the DMA addresses are to be created
   4151 * @sg: The array of scatter/gather entries
   4152 * @nents: The number of scatter/gather entries
   4153 * @direction: The direction of the DMA
   4154 */
   4155static inline int ib_dma_map_sg(struct ib_device *dev,
   4156				struct scatterlist *sg, int nents,
   4157				enum dma_data_direction direction)
   4158{
   4159	return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0);
   4160}
   4161
   4162/**
   4163 * ib_dma_unmap_sg - Unmap a scatter/gather list of DMA addresses
   4164 * @dev: The device for which the DMA addresses were created
   4165 * @sg: The array of scatter/gather entries
   4166 * @nents: The number of scatter/gather entries
   4167 * @direction: The direction of the DMA
   4168 */
   4169static inline void ib_dma_unmap_sg(struct ib_device *dev,
   4170				   struct scatterlist *sg, int nents,
   4171				   enum dma_data_direction direction)
   4172{
   4173	ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0);
   4174}
   4175
   4176/**
   4177 * ib_dma_max_seg_size - Return the size limit of a single DMA transfer
   4178 * @dev: The device to query
   4179 *
   4180 * The returned value represents a size in bytes.
   4181 */
   4182static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev)
   4183{
   4184	if (ib_uses_virt_dma(dev))
   4185		return UINT_MAX;
   4186	return dma_get_max_seg_size(dev->dma_device);
   4187}
   4188
   4189/**
   4190 * ib_dma_sync_single_for_cpu - Prepare DMA region to be accessed by CPU
   4191 * @dev: The device for which the DMA address was created
   4192 * @addr: The DMA address
   4193 * @size: The size of the region in bytes
   4194 * @dir: The direction of the DMA
   4195 */
   4196static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
   4197					      u64 addr,
   4198					      size_t size,
   4199					      enum dma_data_direction dir)
   4200{
   4201	if (!ib_uses_virt_dma(dev))
   4202		dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
   4203}
   4204
   4205/**
   4206 * ib_dma_sync_single_for_device - Prepare DMA region to be accessed by device
   4207 * @dev: The device for which the DMA address was created
   4208 * @addr: The DMA address
   4209 * @size: The size of the region in bytes
   4210 * @dir: The direction of the DMA
   4211 */
   4212static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
   4213						 u64 addr,
   4214						 size_t size,
   4215						 enum dma_data_direction dir)
   4216{
   4217	if (!ib_uses_virt_dma(dev))
   4218		dma_sync_single_for_device(dev->dma_device, addr, size, dir);
   4219}
   4220
   4221/* ib_reg_user_mr - register a memory region for virtual addresses from kernel
   4222 * space. This function should be called when 'current' is the owning MM.
   4223 */
   4224struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
   4225			     u64 virt_addr, int mr_access_flags);
   4226
   4227/* ib_advise_mr -  give an advice about an address range in a memory region */
   4228int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
   4229		 u32 flags, struct ib_sge *sg_list, u32 num_sge);
   4230/**
   4231 * ib_dereg_mr_user - Deregisters a memory region and removes it from the
   4232 *   HCA translation table.
   4233 * @mr: The memory region to deregister.
   4234 * @udata: Valid user data or NULL for kernel object
   4235 *
   4236 * This function can fail, if the memory region has memory windows bound to it.
   4237 */
   4238int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata);
   4239
   4240/**
   4241 * ib_dereg_mr - Deregisters a kernel memory region and removes it from the
   4242 *   HCA translation table.
   4243 * @mr: The memory region to deregister.
   4244 *
   4245 * This function can fail, if the memory region has memory windows bound to it.
   4246 *
   4247 * NOTE: for user mr use ib_dereg_mr_user with valid udata!
   4248 */
   4249static inline int ib_dereg_mr(struct ib_mr *mr)
   4250{
   4251	return ib_dereg_mr_user(mr, NULL);
   4252}
   4253
   4254struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
   4255			  u32 max_num_sg);
   4256
   4257struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
   4258				    u32 max_num_data_sg,
   4259				    u32 max_num_meta_sg);
   4260
   4261/**
   4262 * ib_update_fast_reg_key - updates the key portion of the fast_reg MR
   4263 *   R_Key and L_Key.
   4264 * @mr - struct ib_mr pointer to be updated.
   4265 * @newkey - new key to be used.
   4266 */
   4267static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
   4268{
   4269	mr->lkey = (mr->lkey & 0xffffff00) | newkey;
   4270	mr->rkey = (mr->rkey & 0xffffff00) | newkey;
   4271}
   4272
   4273/**
   4274 * ib_inc_rkey - increments the key portion of the given rkey. Can be used
   4275 * for calculating a new rkey for type 2 memory windows.
   4276 * @rkey - the rkey to increment.
   4277 */
   4278static inline u32 ib_inc_rkey(u32 rkey)
   4279{
   4280	const u32 mask = 0x000000ff;
   4281	return ((rkey + 1) & mask) | (rkey & ~mask);
   4282}
   4283
   4284/**
   4285 * ib_attach_mcast - Attaches the specified QP to a multicast group.
   4286 * @qp: QP to attach to the multicast group.  The QP must be type
   4287 *   IB_QPT_UD.
   4288 * @gid: Multicast group GID.
   4289 * @lid: Multicast group LID in host byte order.
   4290 *
   4291 * In order to send and receive multicast packets, subnet
   4292 * administration must have created the multicast group and configured
   4293 * the fabric appropriately.  The port associated with the specified
   4294 * QP must also be a member of the multicast group.
   4295 */
   4296int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
   4297
   4298/**
   4299 * ib_detach_mcast - Detaches the specified QP from a multicast group.
   4300 * @qp: QP to detach from the multicast group.
   4301 * @gid: Multicast group GID.
   4302 * @lid: Multicast group LID in host byte order.
   4303 */
   4304int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
   4305
   4306struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device,
   4307				   struct inode *inode, struct ib_udata *udata);
   4308int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata);
   4309
   4310static inline int ib_check_mr_access(struct ib_device *ib_dev,
   4311				     unsigned int flags)
   4312{
   4313	/*
   4314	 * Local write permission is required if remote write or
   4315	 * remote atomic permission is also requested.
   4316	 */
   4317	if (flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
   4318	    !(flags & IB_ACCESS_LOCAL_WRITE))
   4319		return -EINVAL;
   4320
   4321	if (flags & ~IB_ACCESS_SUPPORTED)
   4322		return -EINVAL;
   4323
   4324	if (flags & IB_ACCESS_ON_DEMAND &&
   4325	    !(ib_dev->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING))
   4326		return -EINVAL;
   4327	return 0;
   4328}
   4329
   4330static inline bool ib_access_writable(int access_flags)
   4331{
   4332	/*
   4333	 * We have writable memory backing the MR if any of the following
   4334	 * access flags are set.  "Local write" and "remote write" obviously
   4335	 * require write access.  "Remote atomic" can do things like fetch and
   4336	 * add, which will modify memory, and "MW bind" can change permissions
   4337	 * by binding a window.
   4338	 */
   4339	return access_flags &
   4340		(IB_ACCESS_LOCAL_WRITE   | IB_ACCESS_REMOTE_WRITE |
   4341		 IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND);
   4342}
   4343
   4344/**
   4345 * ib_check_mr_status: lightweight check of MR status.
   4346 *     This routine may provide status checks on a selected
   4347 *     ib_mr. first use is for signature status check.
   4348 *
   4349 * @mr: A memory region.
   4350 * @check_mask: Bitmask of which checks to perform from
   4351 *     ib_mr_status_check enumeration.
   4352 * @mr_status: The container of relevant status checks.
   4353 *     failed checks will be indicated in the status bitmask
   4354 *     and the relevant info shall be in the error item.
   4355 */
   4356int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
   4357		       struct ib_mr_status *mr_status);
   4358
   4359/**
   4360 * ib_device_try_get: Hold a registration lock
   4361 * device: The device to lock
   4362 *
   4363 * A device under an active registration lock cannot become unregistered. It
   4364 * is only possible to obtain a registration lock on a device that is fully
   4365 * registered, otherwise this function returns false.
   4366 *
   4367 * The registration lock is only necessary for actions which require the
   4368 * device to still be registered. Uses that only require the device pointer to
   4369 * be valid should use get_device(&ibdev->dev) to hold the memory.
   4370 *
   4371 */
   4372static inline bool ib_device_try_get(struct ib_device *dev)
   4373{
   4374	return refcount_inc_not_zero(&dev->refcount);
   4375}
   4376
   4377void ib_device_put(struct ib_device *device);
   4378struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
   4379					  enum rdma_driver_id driver_id);
   4380struct ib_device *ib_device_get_by_name(const char *name,
   4381					enum rdma_driver_id driver_id);
   4382struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u32 port,
   4383					    u16 pkey, const union ib_gid *gid,
   4384					    const struct sockaddr *addr);
   4385int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
   4386			 unsigned int port);
   4387struct net_device *ib_device_netdev(struct ib_device *dev, u32 port);
   4388
   4389struct ib_wq *ib_create_wq(struct ib_pd *pd,
   4390			   struct ib_wq_init_attr *init_attr);
   4391int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata);
   4392
   4393int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
   4394		 unsigned int *sg_offset, unsigned int page_size);
   4395int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg,
   4396		    int data_sg_nents, unsigned int *data_sg_offset,
   4397		    struct scatterlist *meta_sg, int meta_sg_nents,
   4398		    unsigned int *meta_sg_offset, unsigned int page_size);
   4399
   4400static inline int
   4401ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
   4402		  unsigned int *sg_offset, unsigned int page_size)
   4403{
   4404	int n;
   4405
   4406	n = ib_map_mr_sg(mr, sg, sg_nents, sg_offset, page_size);
   4407	mr->iova = 0;
   4408
   4409	return n;
   4410}
   4411
   4412int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents,
   4413		unsigned int *sg_offset, int (*set_page)(struct ib_mr *, u64));
   4414
   4415void ib_drain_rq(struct ib_qp *qp);
   4416void ib_drain_sq(struct ib_qp *qp);
   4417void ib_drain_qp(struct ib_qp *qp);
   4418
   4419int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed,
   4420		     u8 *width);
   4421
   4422static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr)
   4423{
   4424	if (attr->type == RDMA_AH_ATTR_TYPE_ROCE)
   4425		return attr->roce.dmac;
   4426	return NULL;
   4427}
   4428
   4429static inline void rdma_ah_set_dlid(struct rdma_ah_attr *attr, u32 dlid)
   4430{
   4431	if (attr->type == RDMA_AH_ATTR_TYPE_IB)
   4432		attr->ib.dlid = (u16)dlid;
   4433	else if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
   4434		attr->opa.dlid = dlid;
   4435}
   4436
   4437static inline u32 rdma_ah_get_dlid(const struct rdma_ah_attr *attr)
   4438{
   4439	if (attr->type == RDMA_AH_ATTR_TYPE_IB)
   4440		return attr->ib.dlid;
   4441	else if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
   4442		return attr->opa.dlid;
   4443	return 0;
   4444}
   4445
   4446static inline void rdma_ah_set_sl(struct rdma_ah_attr *attr, u8 sl)
   4447{
   4448	attr->sl = sl;
   4449}
   4450
   4451static inline u8 rdma_ah_get_sl(const struct rdma_ah_attr *attr)
   4452{
   4453	return attr->sl;
   4454}
   4455
   4456static inline void rdma_ah_set_path_bits(struct rdma_ah_attr *attr,
   4457					 u8 src_path_bits)
   4458{
   4459	if (attr->type == RDMA_AH_ATTR_TYPE_IB)
   4460		attr->ib.src_path_bits = src_path_bits;
   4461	else if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
   4462		attr->opa.src_path_bits = src_path_bits;
   4463}
   4464
   4465static inline u8 rdma_ah_get_path_bits(const struct rdma_ah_attr *attr)
   4466{
   4467	if (attr->type == RDMA_AH_ATTR_TYPE_IB)
   4468		return attr->ib.src_path_bits;
   4469	else if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
   4470		return attr->opa.src_path_bits;
   4471	return 0;
   4472}
   4473
   4474static inline void rdma_ah_set_make_grd(struct rdma_ah_attr *attr,
   4475					bool make_grd)
   4476{
   4477	if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
   4478		attr->opa.make_grd = make_grd;
   4479}
   4480
   4481static inline bool rdma_ah_get_make_grd(const struct rdma_ah_attr *attr)
   4482{
   4483	if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
   4484		return attr->opa.make_grd;
   4485	return false;
   4486}
   4487
   4488static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u32 port_num)
   4489{
   4490	attr->port_num = port_num;
   4491}
   4492
   4493static inline u32 rdma_ah_get_port_num(const struct rdma_ah_attr *attr)
   4494{
   4495	return attr->port_num;
   4496}
   4497
   4498static inline void rdma_ah_set_static_rate(struct rdma_ah_attr *attr,
   4499					   u8 static_rate)
   4500{
   4501	attr->static_rate = static_rate;
   4502}
   4503
   4504static inline u8 rdma_ah_get_static_rate(const struct rdma_ah_attr *attr)
   4505{
   4506	return attr->static_rate;
   4507}
   4508
   4509static inline void rdma_ah_set_ah_flags(struct rdma_ah_attr *attr,
   4510					enum ib_ah_flags flag)
   4511{
   4512	attr->ah_flags = flag;
   4513}
   4514
   4515static inline enum ib_ah_flags
   4516		rdma_ah_get_ah_flags(const struct rdma_ah_attr *attr)
   4517{
   4518	return attr->ah_flags;
   4519}
   4520
   4521static inline const struct ib_global_route
   4522		*rdma_ah_read_grh(const struct rdma_ah_attr *attr)
   4523{
   4524	return &attr->grh;
   4525}
   4526
   4527/*To retrieve and modify the grh */
   4528static inline struct ib_global_route
   4529		*rdma_ah_retrieve_grh(struct rdma_ah_attr *attr)
   4530{
   4531	return &attr->grh;
   4532}
   4533
   4534static inline void rdma_ah_set_dgid_raw(struct rdma_ah_attr *attr, void *dgid)
   4535{
   4536	struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
   4537
   4538	memcpy(grh->dgid.raw, dgid, sizeof(grh->dgid));
   4539}
   4540
   4541static inline void rdma_ah_set_subnet_prefix(struct rdma_ah_attr *attr,
   4542					     __be64 prefix)
   4543{
   4544	struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
   4545
   4546	grh->dgid.global.subnet_prefix = prefix;
   4547}
   4548
   4549static inline void rdma_ah_set_interface_id(struct rdma_ah_attr *attr,
   4550					    __be64 if_id)
   4551{
   4552	struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
   4553
   4554	grh->dgid.global.interface_id = if_id;
   4555}
   4556
   4557static inline void rdma_ah_set_grh(struct rdma_ah_attr *attr,
   4558				   union ib_gid *dgid, u32 flow_label,
   4559				   u8 sgid_index, u8 hop_limit,
   4560				   u8 traffic_class)
   4561{
   4562	struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
   4563
   4564	attr->ah_flags = IB_AH_GRH;
   4565	if (dgid)
   4566		grh->dgid = *dgid;
   4567	grh->flow_label = flow_label;
   4568	grh->sgid_index = sgid_index;
   4569	grh->hop_limit = hop_limit;
   4570	grh->traffic_class = traffic_class;
   4571	grh->sgid_attr = NULL;
   4572}
   4573
   4574void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr);
   4575void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
   4576			     u32 flow_label, u8 hop_limit, u8 traffic_class,
   4577			     const struct ib_gid_attr *sgid_attr);
   4578void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
   4579		       const struct rdma_ah_attr *src);
   4580void rdma_replace_ah_attr(struct rdma_ah_attr *old,
   4581			  const struct rdma_ah_attr *new);
   4582void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src);
   4583
   4584/**
   4585 * rdma_ah_find_type - Return address handle type.
   4586 *
   4587 * @dev: Device to be checked
   4588 * @port_num: Port number
   4589 */
   4590static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev,
   4591						       u32 port_num)
   4592{
   4593	if (rdma_protocol_roce(dev, port_num))
   4594		return RDMA_AH_ATTR_TYPE_ROCE;
   4595	if (rdma_protocol_ib(dev, port_num)) {
   4596		if (rdma_cap_opa_ah(dev, port_num))
   4597			return RDMA_AH_ATTR_TYPE_OPA;
   4598		return RDMA_AH_ATTR_TYPE_IB;
   4599	}
   4600
   4601	return RDMA_AH_ATTR_TYPE_UNDEFINED;
   4602}
   4603
   4604/**
   4605 * ib_lid_cpu16 - Return lid in 16bit CPU encoding.
   4606 *     In the current implementation the only way to get
   4607 *     get the 32bit lid is from other sources for OPA.
   4608 *     For IB, lids will always be 16bits so cast the
   4609 *     value accordingly.
   4610 *
   4611 * @lid: A 32bit LID
   4612 */
   4613static inline u16 ib_lid_cpu16(u32 lid)
   4614{
   4615	WARN_ON_ONCE(lid & 0xFFFF0000);
   4616	return (u16)lid;
   4617}
   4618
   4619/**
   4620 * ib_lid_be16 - Return lid in 16bit BE encoding.
   4621 *
   4622 * @lid: A 32bit LID
   4623 */
   4624static inline __be16 ib_lid_be16(u32 lid)
   4625{
   4626	WARN_ON_ONCE(lid & 0xFFFF0000);
   4627	return cpu_to_be16((u16)lid);
   4628}
   4629
   4630/**
   4631 * ib_get_vector_affinity - Get the affinity mappings of a given completion
   4632 *   vector
   4633 * @device:         the rdma device
   4634 * @comp_vector:    index of completion vector
   4635 *
   4636 * Returns NULL on failure, otherwise a corresponding cpu map of the
   4637 * completion vector (returns all-cpus map if the device driver doesn't
   4638 * implement get_vector_affinity).
   4639 */
   4640static inline const struct cpumask *
   4641ib_get_vector_affinity(struct ib_device *device, int comp_vector)
   4642{
   4643	if (comp_vector < 0 || comp_vector >= device->num_comp_vectors ||
   4644	    !device->ops.get_vector_affinity)
   4645		return NULL;
   4646
   4647	return device->ops.get_vector_affinity(device, comp_vector);
   4648
   4649}
   4650
   4651/**
   4652 * rdma_roce_rescan_device - Rescan all of the network devices in the system
   4653 * and add their gids, as needed, to the relevant RoCE devices.
   4654 *
   4655 * @device:         the rdma device
   4656 */
   4657void rdma_roce_rescan_device(struct ib_device *ibdev);
   4658
   4659struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
   4660
   4661int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs);
   4662
   4663struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num,
   4664				     enum rdma_netdev_t type, const char *name,
   4665				     unsigned char name_assign_type,
   4666				     void (*setup)(struct net_device *));
   4667
   4668int rdma_init_netdev(struct ib_device *device, u32 port_num,
   4669		     enum rdma_netdev_t type, const char *name,
   4670		     unsigned char name_assign_type,
   4671		     void (*setup)(struct net_device *),
   4672		     struct net_device *netdev);
   4673
   4674/**
   4675 * rdma_device_to_ibdev - Get ib_device pointer from device pointer
   4676 *
   4677 * @device:	device pointer for which ib_device pointer to retrieve
   4678 *
   4679 * rdma_device_to_ibdev() retrieves ib_device pointer from device.
   4680 *
   4681 */
   4682static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
   4683{
   4684	struct ib_core_device *coredev =
   4685		container_of(device, struct ib_core_device, dev);
   4686
   4687	return coredev->owner;
   4688}
   4689
   4690/**
   4691 * ibdev_to_node - return the NUMA node for a given ib_device
   4692 * @dev:	device to get the NUMA node for.
   4693 */
   4694static inline int ibdev_to_node(struct ib_device *ibdev)
   4695{
   4696	struct device *parent = ibdev->dev.parent;
   4697
   4698	if (!parent)
   4699		return NUMA_NO_NODE;
   4700	return dev_to_node(parent);
   4701}
   4702
   4703/**
   4704 * rdma_device_to_drv_device - Helper macro to reach back to driver's
   4705 *			       ib_device holder structure from device pointer.
   4706 *
   4707 * NOTE: New drivers should not make use of this API; This API is only for
   4708 * existing drivers who have exposed sysfs entries using
   4709 * ops->device_group.
   4710 */
   4711#define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member)           \
   4712	container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member)
   4713
   4714bool rdma_dev_access_netns(const struct ib_device *device,
   4715			   const struct net *net);
   4716
   4717#define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000)
   4718#define IB_ROCE_UDP_ENCAP_VALID_PORT_MAX (0xFFFF)
   4719#define IB_GRH_FLOWLABEL_MASK (0x000FFFFF)
   4720
   4721/**
   4722 * rdma_flow_label_to_udp_sport - generate a RoCE v2 UDP src port value based
   4723 *                               on the flow_label
   4724 *
   4725 * This function will convert the 20 bit flow_label input to a valid RoCE v2
   4726 * UDP src port 14 bit value. All RoCE V2 drivers should use this same
   4727 * convention.
   4728 */
   4729static inline u16 rdma_flow_label_to_udp_sport(u32 fl)
   4730{
   4731	u32 fl_low = fl & 0x03fff, fl_high = fl & 0xFC000;
   4732
   4733	fl_low ^= fl_high >> 14;
   4734	return (u16)(fl_low | IB_ROCE_UDP_ENCAP_VALID_PORT_MIN);
   4735}
   4736
   4737/**
   4738 * rdma_calc_flow_label - generate a RDMA symmetric flow label value based on
   4739 *                        local and remote qpn values
   4740 *
   4741 * This function folded the multiplication results of two qpns, 24 bit each,
   4742 * fields, and converts it to a 20 bit results.
   4743 *
   4744 * This function will create symmetric flow_label value based on the local
   4745 * and remote qpn values. this will allow both the requester and responder
   4746 * to calculate the same flow_label for a given connection.
   4747 *
   4748 * This helper function should be used by driver in case the upper layer
   4749 * provide a zero flow_label value. This is to improve entropy of RDMA
   4750 * traffic in the network.
   4751 */
   4752static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn)
   4753{
   4754	u64 v = (u64)lqpn * rqpn;
   4755
   4756	v ^= v >> 20;
   4757	v ^= v >> 40;
   4758
   4759	return (u32)(v & IB_GRH_FLOWLABEL_MASK);
   4760}
   4761
   4762/**
   4763 * rdma_get_udp_sport - Calculate and set UDP source port based on the flow
   4764 *                      label. If flow label is not defined in GRH then
   4765 *                      calculate it based on lqpn/rqpn.
   4766 *
   4767 * @fl:                 flow label from GRH
   4768 * @lqpn:               local qp number
   4769 * @rqpn:               remote qp number
   4770 */
   4771static inline u16 rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn)
   4772{
   4773	if (!fl)
   4774		fl = rdma_calc_flow_label(lqpn, rqpn);
   4775
   4776	return rdma_flow_label_to_udp_sport(fl);
   4777}
   4778
   4779const struct ib_port_immutable*
   4780ib_port_immutable_read(struct ib_device *dev, unsigned int port);
   4781#endif /* IB_VERBS_H */