cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sa_query.c (61179B)


      1/*
      2 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
      3 * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
      4 * Copyright (c) 2006 Intel Corporation.  All rights reserved.
      5 *
      6 * This software is available to you under a choice of one of two
      7 * licenses.  You may choose to be licensed under the terms of the GNU
      8 * General Public License (GPL) Version 2, available from the file
      9 * COPYING in the main directory of this source tree, or the
     10 * OpenIB.org BSD license below:
     11 *
     12 *     Redistribution and use in source and binary forms, with or
     13 *     without modification, are permitted provided that the following
     14 *     conditions are met:
     15 *
     16 *      - Redistributions of source code must retain the above
     17 *        copyright notice, this list of conditions and the following
     18 *        disclaimer.
     19 *
     20 *      - Redistributions in binary form must reproduce the above
     21 *        copyright notice, this list of conditions and the following
     22 *        disclaimer in the documentation and/or other materials
     23 *        provided with the distribution.
     24 *
     25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     32 * SOFTWARE.
     33 */
     34
     35#include <linux/init.h>
     36#include <linux/err.h>
     37#include <linux/random.h>
     38#include <linux/spinlock.h>
     39#include <linux/slab.h>
     40#include <linux/dma-mapping.h>
     41#include <linux/kref.h>
     42#include <linux/xarray.h>
     43#include <linux/workqueue.h>
     44#include <uapi/linux/if_ether.h>
     45#include <rdma/ib_pack.h>
     46#include <rdma/ib_cache.h>
     47#include <rdma/rdma_netlink.h>
     48#include <net/netlink.h>
     49#include <uapi/rdma/ib_user_sa.h>
     50#include <rdma/ib_marshall.h>
     51#include <rdma/ib_addr.h>
     52#include <rdma/opa_addr.h>
     53#include "sa.h"
     54#include "core_priv.h"
     55
     56#define IB_SA_LOCAL_SVC_TIMEOUT_MIN		100
     57#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT		2000
     58#define IB_SA_LOCAL_SVC_TIMEOUT_MAX		200000
     59#define IB_SA_CPI_MAX_RETRY_CNT			3
     60#define IB_SA_CPI_RETRY_WAIT			1000 /*msecs */
     61static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
     62
     63struct ib_sa_sm_ah {
     64	struct ib_ah        *ah;
     65	struct kref          ref;
     66	u16		     pkey_index;
     67	u8		     src_path_mask;
     68};
     69
     70enum rdma_class_port_info_type {
     71	RDMA_CLASS_PORT_INFO_IB,
     72	RDMA_CLASS_PORT_INFO_OPA
     73};
     74
     75struct rdma_class_port_info {
     76	enum rdma_class_port_info_type type;
     77	union {
     78		struct ib_class_port_info ib;
     79		struct opa_class_port_info opa;
     80	};
     81};
     82
     83struct ib_sa_classport_cache {
     84	bool valid;
     85	int retry_cnt;
     86	struct rdma_class_port_info data;
     87};
     88
     89struct ib_sa_port {
     90	struct ib_mad_agent *agent;
     91	struct ib_sa_sm_ah  *sm_ah;
     92	struct work_struct   update_task;
     93	struct ib_sa_classport_cache classport_info;
     94	struct delayed_work ib_cpi_work;
     95	spinlock_t                   classport_lock; /* protects class port info set */
     96	spinlock_t           ah_lock;
     97	u32		     port_num;
     98};
     99
    100struct ib_sa_device {
    101	int                     start_port, end_port;
    102	struct ib_event_handler event_handler;
    103	struct ib_sa_port port[];
    104};
    105
    106struct ib_sa_query {
    107	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
    108	void (*release)(struct ib_sa_query *);
    109	struct ib_sa_client    *client;
    110	struct ib_sa_port      *port;
    111	struct ib_mad_send_buf *mad_buf;
    112	struct ib_sa_sm_ah     *sm_ah;
    113	int			id;
    114	u32			flags;
    115	struct list_head	list; /* Local svc request list */
    116	u32			seq; /* Local svc request sequence number */
    117	unsigned long		timeout; /* Local svc timeout */
    118	u8			path_use; /* How will the pathrecord be used */
    119};
    120
    121#define IB_SA_ENABLE_LOCAL_SERVICE	0x00000001
    122#define IB_SA_CANCEL			0x00000002
    123#define IB_SA_QUERY_OPA			0x00000004
    124
    125struct ib_sa_path_query {
    126	void (*callback)(int, struct sa_path_rec *, void *);
    127	void *context;
    128	struct ib_sa_query sa_query;
    129	struct sa_path_rec *conv_pr;
    130};
    131
    132struct ib_sa_guidinfo_query {
    133	void (*callback)(int, struct ib_sa_guidinfo_rec *, void *);
    134	void *context;
    135	struct ib_sa_query sa_query;
    136};
    137
    138struct ib_sa_classport_info_query {
    139	void (*callback)(void *);
    140	void *context;
    141	struct ib_sa_query sa_query;
    142};
    143
    144struct ib_sa_mcmember_query {
    145	void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
    146	void *context;
    147	struct ib_sa_query sa_query;
    148};
    149
    150static LIST_HEAD(ib_nl_request_list);
    151static DEFINE_SPINLOCK(ib_nl_request_lock);
    152static atomic_t ib_nl_sa_request_seq;
    153static struct workqueue_struct *ib_nl_wq;
    154static struct delayed_work ib_nl_timed_work;
    155static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = {
    156	[LS_NLA_TYPE_PATH_RECORD]	= {.type = NLA_BINARY,
    157		.len = sizeof(struct ib_path_rec_data)},
    158	[LS_NLA_TYPE_TIMEOUT]		= {.type = NLA_U32},
    159	[LS_NLA_TYPE_SERVICE_ID]	= {.type = NLA_U64},
    160	[LS_NLA_TYPE_DGID]		= {.type = NLA_BINARY,
    161		.len = sizeof(struct rdma_nla_ls_gid)},
    162	[LS_NLA_TYPE_SGID]		= {.type = NLA_BINARY,
    163		.len = sizeof(struct rdma_nla_ls_gid)},
    164	[LS_NLA_TYPE_TCLASS]		= {.type = NLA_U8},
    165	[LS_NLA_TYPE_PKEY]		= {.type = NLA_U16},
    166	[LS_NLA_TYPE_QOS_CLASS]		= {.type = NLA_U16},
    167};
    168
    169
    170static int ib_sa_add_one(struct ib_device *device);
    171static void ib_sa_remove_one(struct ib_device *device, void *client_data);
    172
    173static struct ib_client sa_client = {
    174	.name   = "sa",
    175	.add    = ib_sa_add_one,
    176	.remove = ib_sa_remove_one
    177};
    178
    179static DEFINE_XARRAY_FLAGS(queries, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
    180
    181static DEFINE_SPINLOCK(tid_lock);
    182static u32 tid;
    183
    184#define PATH_REC_FIELD(field) \
    185	.struct_offset_bytes = offsetof(struct sa_path_rec, field),	\
    186	.struct_size_bytes   = sizeof_field(struct sa_path_rec, field),	\
    187	.field_name          = "sa_path_rec:" #field
    188
    189static const struct ib_field path_rec_table[] = {
    190	{ PATH_REC_FIELD(service_id),
    191	  .offset_words = 0,
    192	  .offset_bits  = 0,
    193	  .size_bits    = 64 },
    194	{ PATH_REC_FIELD(dgid),
    195	  .offset_words = 2,
    196	  .offset_bits  = 0,
    197	  .size_bits    = 128 },
    198	{ PATH_REC_FIELD(sgid),
    199	  .offset_words = 6,
    200	  .offset_bits  = 0,
    201	  .size_bits    = 128 },
    202	{ PATH_REC_FIELD(ib.dlid),
    203	  .offset_words = 10,
    204	  .offset_bits  = 0,
    205	  .size_bits    = 16 },
    206	{ PATH_REC_FIELD(ib.slid),
    207	  .offset_words = 10,
    208	  .offset_bits  = 16,
    209	  .size_bits    = 16 },
    210	{ PATH_REC_FIELD(ib.raw_traffic),
    211	  .offset_words = 11,
    212	  .offset_bits  = 0,
    213	  .size_bits    = 1 },
    214	{ RESERVED,
    215	  .offset_words = 11,
    216	  .offset_bits  = 1,
    217	  .size_bits    = 3 },
    218	{ PATH_REC_FIELD(flow_label),
    219	  .offset_words = 11,
    220	  .offset_bits  = 4,
    221	  .size_bits    = 20 },
    222	{ PATH_REC_FIELD(hop_limit),
    223	  .offset_words = 11,
    224	  .offset_bits  = 24,
    225	  .size_bits    = 8 },
    226	{ PATH_REC_FIELD(traffic_class),
    227	  .offset_words = 12,
    228	  .offset_bits  = 0,
    229	  .size_bits    = 8 },
    230	{ PATH_REC_FIELD(reversible),
    231	  .offset_words = 12,
    232	  .offset_bits  = 8,
    233	  .size_bits    = 1 },
    234	{ PATH_REC_FIELD(numb_path),
    235	  .offset_words = 12,
    236	  .offset_bits  = 9,
    237	  .size_bits    = 7 },
    238	{ PATH_REC_FIELD(pkey),
    239	  .offset_words = 12,
    240	  .offset_bits  = 16,
    241	  .size_bits    = 16 },
    242	{ PATH_REC_FIELD(qos_class),
    243	  .offset_words = 13,
    244	  .offset_bits  = 0,
    245	  .size_bits    = 12 },
    246	{ PATH_REC_FIELD(sl),
    247	  .offset_words = 13,
    248	  .offset_bits  = 12,
    249	  .size_bits    = 4 },
    250	{ PATH_REC_FIELD(mtu_selector),
    251	  .offset_words = 13,
    252	  .offset_bits  = 16,
    253	  .size_bits    = 2 },
    254	{ PATH_REC_FIELD(mtu),
    255	  .offset_words = 13,
    256	  .offset_bits  = 18,
    257	  .size_bits    = 6 },
    258	{ PATH_REC_FIELD(rate_selector),
    259	  .offset_words = 13,
    260	  .offset_bits  = 24,
    261	  .size_bits    = 2 },
    262	{ PATH_REC_FIELD(rate),
    263	  .offset_words = 13,
    264	  .offset_bits  = 26,
    265	  .size_bits    = 6 },
    266	{ PATH_REC_FIELD(packet_life_time_selector),
    267	  .offset_words = 14,
    268	  .offset_bits  = 0,
    269	  .size_bits    = 2 },
    270	{ PATH_REC_FIELD(packet_life_time),
    271	  .offset_words = 14,
    272	  .offset_bits  = 2,
    273	  .size_bits    = 6 },
    274	{ PATH_REC_FIELD(preference),
    275	  .offset_words = 14,
    276	  .offset_bits  = 8,
    277	  .size_bits    = 8 },
    278	{ RESERVED,
    279	  .offset_words = 14,
    280	  .offset_bits  = 16,
    281	  .size_bits    = 48 },
    282};
    283
    284#define OPA_PATH_REC_FIELD(field) \
    285	.struct_offset_bytes = \
    286		offsetof(struct sa_path_rec, field), \
    287	.struct_size_bytes   = \
    288		sizeof_field(struct sa_path_rec, field),	\
    289	.field_name          = "sa_path_rec:" #field
    290
    291static const struct ib_field opa_path_rec_table[] = {
    292	{ OPA_PATH_REC_FIELD(service_id),
    293	  .offset_words = 0,
    294	  .offset_bits  = 0,
    295	  .size_bits    = 64 },
    296	{ OPA_PATH_REC_FIELD(dgid),
    297	  .offset_words = 2,
    298	  .offset_bits  = 0,
    299	  .size_bits    = 128 },
    300	{ OPA_PATH_REC_FIELD(sgid),
    301	  .offset_words = 6,
    302	  .offset_bits  = 0,
    303	  .size_bits    = 128 },
    304	{ OPA_PATH_REC_FIELD(opa.dlid),
    305	  .offset_words = 10,
    306	  .offset_bits  = 0,
    307	  .size_bits    = 32 },
    308	{ OPA_PATH_REC_FIELD(opa.slid),
    309	  .offset_words = 11,
    310	  .offset_bits  = 0,
    311	  .size_bits    = 32 },
    312	{ OPA_PATH_REC_FIELD(opa.raw_traffic),
    313	  .offset_words = 12,
    314	  .offset_bits  = 0,
    315	  .size_bits    = 1 },
    316	{ RESERVED,
    317	  .offset_words = 12,
    318	  .offset_bits  = 1,
    319	  .size_bits    = 3 },
    320	{ OPA_PATH_REC_FIELD(flow_label),
    321	  .offset_words = 12,
    322	  .offset_bits  = 4,
    323	  .size_bits    = 20 },
    324	{ OPA_PATH_REC_FIELD(hop_limit),
    325	  .offset_words = 12,
    326	  .offset_bits  = 24,
    327	  .size_bits    = 8 },
    328	{ OPA_PATH_REC_FIELD(traffic_class),
    329	  .offset_words = 13,
    330	  .offset_bits  = 0,
    331	  .size_bits    = 8 },
    332	{ OPA_PATH_REC_FIELD(reversible),
    333	  .offset_words = 13,
    334	  .offset_bits  = 8,
    335	  .size_bits    = 1 },
    336	{ OPA_PATH_REC_FIELD(numb_path),
    337	  .offset_words = 13,
    338	  .offset_bits  = 9,
    339	  .size_bits    = 7 },
    340	{ OPA_PATH_REC_FIELD(pkey),
    341	  .offset_words = 13,
    342	  .offset_bits  = 16,
    343	  .size_bits    = 16 },
    344	{ OPA_PATH_REC_FIELD(opa.l2_8B),
    345	  .offset_words = 14,
    346	  .offset_bits  = 0,
    347	  .size_bits    = 1 },
    348	{ OPA_PATH_REC_FIELD(opa.l2_10B),
    349	  .offset_words = 14,
    350	  .offset_bits  = 1,
    351	  .size_bits    = 1 },
    352	{ OPA_PATH_REC_FIELD(opa.l2_9B),
    353	  .offset_words = 14,
    354	  .offset_bits  = 2,
    355	  .size_bits    = 1 },
    356	{ OPA_PATH_REC_FIELD(opa.l2_16B),
    357	  .offset_words = 14,
    358	  .offset_bits  = 3,
    359	  .size_bits    = 1 },
    360	{ RESERVED,
    361	  .offset_words = 14,
    362	  .offset_bits  = 4,
    363	  .size_bits    = 2 },
    364	{ OPA_PATH_REC_FIELD(opa.qos_type),
    365	  .offset_words = 14,
    366	  .offset_bits  = 6,
    367	  .size_bits    = 2 },
    368	{ OPA_PATH_REC_FIELD(opa.qos_priority),
    369	  .offset_words = 14,
    370	  .offset_bits  = 8,
    371	  .size_bits    = 8 },
    372	{ RESERVED,
    373	  .offset_words = 14,
    374	  .offset_bits  = 16,
    375	  .size_bits    = 3 },
    376	{ OPA_PATH_REC_FIELD(sl),
    377	  .offset_words = 14,
    378	  .offset_bits  = 19,
    379	  .size_bits    = 5 },
    380	{ RESERVED,
    381	  .offset_words = 14,
    382	  .offset_bits  = 24,
    383	  .size_bits    = 8 },
    384	{ OPA_PATH_REC_FIELD(mtu_selector),
    385	  .offset_words = 15,
    386	  .offset_bits  = 0,
    387	  .size_bits    = 2 },
    388	{ OPA_PATH_REC_FIELD(mtu),
    389	  .offset_words = 15,
    390	  .offset_bits  = 2,
    391	  .size_bits    = 6 },
    392	{ OPA_PATH_REC_FIELD(rate_selector),
    393	  .offset_words = 15,
    394	  .offset_bits  = 8,
    395	  .size_bits    = 2 },
    396	{ OPA_PATH_REC_FIELD(rate),
    397	  .offset_words = 15,
    398	  .offset_bits  = 10,
    399	  .size_bits    = 6 },
    400	{ OPA_PATH_REC_FIELD(packet_life_time_selector),
    401	  .offset_words = 15,
    402	  .offset_bits  = 16,
    403	  .size_bits    = 2 },
    404	{ OPA_PATH_REC_FIELD(packet_life_time),
    405	  .offset_words = 15,
    406	  .offset_bits  = 18,
    407	  .size_bits    = 6 },
    408	{ OPA_PATH_REC_FIELD(preference),
    409	  .offset_words = 15,
    410	  .offset_bits  = 24,
    411	  .size_bits    = 8 },
    412};
    413
    414#define MCMEMBER_REC_FIELD(field) \
    415	.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field),	\
    416	.struct_size_bytes   = sizeof_field(struct ib_sa_mcmember_rec, field),	\
    417	.field_name          = "sa_mcmember_rec:" #field
    418
    419static const struct ib_field mcmember_rec_table[] = {
    420	{ MCMEMBER_REC_FIELD(mgid),
    421	  .offset_words = 0,
    422	  .offset_bits  = 0,
    423	  .size_bits    = 128 },
    424	{ MCMEMBER_REC_FIELD(port_gid),
    425	  .offset_words = 4,
    426	  .offset_bits  = 0,
    427	  .size_bits    = 128 },
    428	{ MCMEMBER_REC_FIELD(qkey),
    429	  .offset_words = 8,
    430	  .offset_bits  = 0,
    431	  .size_bits    = 32 },
    432	{ MCMEMBER_REC_FIELD(mlid),
    433	  .offset_words = 9,
    434	  .offset_bits  = 0,
    435	  .size_bits    = 16 },
    436	{ MCMEMBER_REC_FIELD(mtu_selector),
    437	  .offset_words = 9,
    438	  .offset_bits  = 16,
    439	  .size_bits    = 2 },
    440	{ MCMEMBER_REC_FIELD(mtu),
    441	  .offset_words = 9,
    442	  .offset_bits  = 18,
    443	  .size_bits    = 6 },
    444	{ MCMEMBER_REC_FIELD(traffic_class),
    445	  .offset_words = 9,
    446	  .offset_bits  = 24,
    447	  .size_bits    = 8 },
    448	{ MCMEMBER_REC_FIELD(pkey),
    449	  .offset_words = 10,
    450	  .offset_bits  = 0,
    451	  .size_bits    = 16 },
    452	{ MCMEMBER_REC_FIELD(rate_selector),
    453	  .offset_words = 10,
    454	  .offset_bits  = 16,
    455	  .size_bits    = 2 },
    456	{ MCMEMBER_REC_FIELD(rate),
    457	  .offset_words = 10,
    458	  .offset_bits  = 18,
    459	  .size_bits    = 6 },
    460	{ MCMEMBER_REC_FIELD(packet_life_time_selector),
    461	  .offset_words = 10,
    462	  .offset_bits  = 24,
    463	  .size_bits    = 2 },
    464	{ MCMEMBER_REC_FIELD(packet_life_time),
    465	  .offset_words = 10,
    466	  .offset_bits  = 26,
    467	  .size_bits    = 6 },
    468	{ MCMEMBER_REC_FIELD(sl),
    469	  .offset_words = 11,
    470	  .offset_bits  = 0,
    471	  .size_bits    = 4 },
    472	{ MCMEMBER_REC_FIELD(flow_label),
    473	  .offset_words = 11,
    474	  .offset_bits  = 4,
    475	  .size_bits    = 20 },
    476	{ MCMEMBER_REC_FIELD(hop_limit),
    477	  .offset_words = 11,
    478	  .offset_bits  = 24,
    479	  .size_bits    = 8 },
    480	{ MCMEMBER_REC_FIELD(scope),
    481	  .offset_words = 12,
    482	  .offset_bits  = 0,
    483	  .size_bits    = 4 },
    484	{ MCMEMBER_REC_FIELD(join_state),
    485	  .offset_words = 12,
    486	  .offset_bits  = 4,
    487	  .size_bits    = 4 },
    488	{ MCMEMBER_REC_FIELD(proxy_join),
    489	  .offset_words = 12,
    490	  .offset_bits  = 8,
    491	  .size_bits    = 1 },
    492	{ RESERVED,
    493	  .offset_words = 12,
    494	  .offset_bits  = 9,
    495	  .size_bits    = 23 },
    496};
    497
    498#define CLASSPORTINFO_REC_FIELD(field) \
    499	.struct_offset_bytes = offsetof(struct ib_class_port_info, field),	\
    500	.struct_size_bytes   = sizeof_field(struct ib_class_port_info, field),	\
    501	.field_name          = "ib_class_port_info:" #field
    502
    503static const struct ib_field ib_classport_info_rec_table[] = {
    504	{ CLASSPORTINFO_REC_FIELD(base_version),
    505	  .offset_words = 0,
    506	  .offset_bits  = 0,
    507	  .size_bits    = 8 },
    508	{ CLASSPORTINFO_REC_FIELD(class_version),
    509	  .offset_words = 0,
    510	  .offset_bits  = 8,
    511	  .size_bits    = 8 },
    512	{ CLASSPORTINFO_REC_FIELD(capability_mask),
    513	  .offset_words = 0,
    514	  .offset_bits  = 16,
    515	  .size_bits    = 16 },
    516	{ CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
    517	  .offset_words = 1,
    518	  .offset_bits  = 0,
    519	  .size_bits    = 32 },
    520	{ CLASSPORTINFO_REC_FIELD(redirect_gid),
    521	  .offset_words = 2,
    522	  .offset_bits  = 0,
    523	  .size_bits    = 128 },
    524	{ CLASSPORTINFO_REC_FIELD(redirect_tcslfl),
    525	  .offset_words = 6,
    526	  .offset_bits  = 0,
    527	  .size_bits    = 32 },
    528	{ CLASSPORTINFO_REC_FIELD(redirect_lid),
    529	  .offset_words = 7,
    530	  .offset_bits  = 0,
    531	  .size_bits    = 16 },
    532	{ CLASSPORTINFO_REC_FIELD(redirect_pkey),
    533	  .offset_words = 7,
    534	  .offset_bits  = 16,
    535	  .size_bits    = 16 },
    536
    537	{ CLASSPORTINFO_REC_FIELD(redirect_qp),
    538	  .offset_words = 8,
    539	  .offset_bits  = 0,
    540	  .size_bits    = 32 },
    541	{ CLASSPORTINFO_REC_FIELD(redirect_qkey),
    542	  .offset_words = 9,
    543	  .offset_bits  = 0,
    544	  .size_bits    = 32 },
    545
    546	{ CLASSPORTINFO_REC_FIELD(trap_gid),
    547	  .offset_words = 10,
    548	  .offset_bits  = 0,
    549	  .size_bits    = 128 },
    550	{ CLASSPORTINFO_REC_FIELD(trap_tcslfl),
    551	  .offset_words = 14,
    552	  .offset_bits  = 0,
    553	  .size_bits    = 32 },
    554
    555	{ CLASSPORTINFO_REC_FIELD(trap_lid),
    556	  .offset_words = 15,
    557	  .offset_bits  = 0,
    558	  .size_bits    = 16 },
    559	{ CLASSPORTINFO_REC_FIELD(trap_pkey),
    560	  .offset_words = 15,
    561	  .offset_bits  = 16,
    562	  .size_bits    = 16 },
    563
    564	{ CLASSPORTINFO_REC_FIELD(trap_hlqp),
    565	  .offset_words = 16,
    566	  .offset_bits  = 0,
    567	  .size_bits    = 32 },
    568	{ CLASSPORTINFO_REC_FIELD(trap_qkey),
    569	  .offset_words = 17,
    570	  .offset_bits  = 0,
    571	  .size_bits    = 32 },
    572};
    573
    574#define OPA_CLASSPORTINFO_REC_FIELD(field) \
    575	.struct_offset_bytes =\
    576		offsetof(struct opa_class_port_info, field),	\
    577	.struct_size_bytes   = \
    578		sizeof_field(struct opa_class_port_info, field),	\
    579	.field_name          = "opa_class_port_info:" #field
    580
    581static const struct ib_field opa_classport_info_rec_table[] = {
    582	{ OPA_CLASSPORTINFO_REC_FIELD(base_version),
    583	  .offset_words = 0,
    584	  .offset_bits  = 0,
    585	  .size_bits    = 8 },
    586	{ OPA_CLASSPORTINFO_REC_FIELD(class_version),
    587	  .offset_words = 0,
    588	  .offset_bits  = 8,
    589	  .size_bits    = 8 },
    590	{ OPA_CLASSPORTINFO_REC_FIELD(cap_mask),
    591	  .offset_words = 0,
    592	  .offset_bits  = 16,
    593	  .size_bits    = 16 },
    594	{ OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
    595	  .offset_words = 1,
    596	  .offset_bits  = 0,
    597	  .size_bits    = 32 },
    598	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_gid),
    599	  .offset_words = 2,
    600	  .offset_bits  = 0,
    601	  .size_bits    = 128 },
    602	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl),
    603	  .offset_words = 6,
    604	  .offset_bits  = 0,
    605	  .size_bits    = 32 },
    606	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_lid),
    607	  .offset_words = 7,
    608	  .offset_bits  = 0,
    609	  .size_bits    = 32 },
    610	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp),
    611	  .offset_words = 8,
    612	  .offset_bits  = 0,
    613	  .size_bits    = 32 },
    614	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey),
    615	  .offset_words = 9,
    616	  .offset_bits  = 0,
    617	  .size_bits    = 32 },
    618	{ OPA_CLASSPORTINFO_REC_FIELD(trap_gid),
    619	  .offset_words = 10,
    620	  .offset_bits  = 0,
    621	  .size_bits    = 128 },
    622	{ OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl),
    623	  .offset_words = 14,
    624	  .offset_bits  = 0,
    625	  .size_bits    = 32 },
    626	{ OPA_CLASSPORTINFO_REC_FIELD(trap_lid),
    627	  .offset_words = 15,
    628	  .offset_bits  = 0,
    629	  .size_bits    = 32 },
    630	{ OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp),
    631	  .offset_words = 16,
    632	  .offset_bits  = 0,
    633	  .size_bits    = 32 },
    634	{ OPA_CLASSPORTINFO_REC_FIELD(trap_qkey),
    635	  .offset_words = 17,
    636	  .offset_bits  = 0,
    637	  .size_bits    = 32 },
    638	{ OPA_CLASSPORTINFO_REC_FIELD(trap_pkey),
    639	  .offset_words = 18,
    640	  .offset_bits  = 0,
    641	  .size_bits    = 16 },
    642	{ OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey),
    643	  .offset_words = 18,
    644	  .offset_bits  = 16,
    645	  .size_bits    = 16 },
    646	{ OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd),
    647	  .offset_words = 19,
    648	  .offset_bits  = 0,
    649	  .size_bits    = 8 },
    650	{ RESERVED,
    651	  .offset_words = 19,
    652	  .offset_bits  = 8,
    653	  .size_bits    = 24 },
    654};
    655
    656#define GUIDINFO_REC_FIELD(field) \
    657	.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field),	\
    658	.struct_size_bytes   = sizeof_field(struct ib_sa_guidinfo_rec, field),	\
    659	.field_name          = "sa_guidinfo_rec:" #field
    660
    661static const struct ib_field guidinfo_rec_table[] = {
    662	{ GUIDINFO_REC_FIELD(lid),
    663	  .offset_words = 0,
    664	  .offset_bits  = 0,
    665	  .size_bits    = 16 },
    666	{ GUIDINFO_REC_FIELD(block_num),
    667	  .offset_words = 0,
    668	  .offset_bits  = 16,
    669	  .size_bits    = 8 },
    670	{ GUIDINFO_REC_FIELD(res1),
    671	  .offset_words = 0,
    672	  .offset_bits  = 24,
    673	  .size_bits    = 8 },
    674	{ GUIDINFO_REC_FIELD(res2),
    675	  .offset_words = 1,
    676	  .offset_bits  = 0,
    677	  .size_bits    = 32 },
    678	{ GUIDINFO_REC_FIELD(guid_info_list),
    679	  .offset_words = 2,
    680	  .offset_bits  = 0,
    681	  .size_bits    = 512 },
    682};
    683
    684static inline void ib_sa_disable_local_svc(struct ib_sa_query *query)
    685{
    686	query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE;
    687}
    688
    689static inline int ib_sa_query_cancelled(struct ib_sa_query *query)
    690{
    691	return (query->flags & IB_SA_CANCEL);
    692}
    693
    694static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
    695				     struct ib_sa_query *query)
    696{
    697	struct sa_path_rec *sa_rec = query->mad_buf->context[1];
    698	struct ib_sa_mad *mad = query->mad_buf->mad;
    699	ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask;
    700	u16 val16;
    701	u64 val64;
    702	struct rdma_ls_resolve_header *header;
    703
    704	query->mad_buf->context[1] = NULL;
    705
    706	/* Construct the family header first */
    707	header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
    708	strscpy_pad(header->device_name,
    709		    dev_name(&query->port->agent->device->dev),
    710		    LS_DEVICE_NAME_MAX);
    711	header->port_num = query->port->port_num;
    712
    713	if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) &&
    714	    sa_rec->reversible != 0)
    715		query->path_use = LS_RESOLVE_PATH_USE_GMP;
    716	else
    717		query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL;
    718	header->path_use = query->path_use;
    719
    720	/* Now build the attributes */
    721	if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) {
    722		val64 = be64_to_cpu(sa_rec->service_id);
    723		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID,
    724			sizeof(val64), &val64);
    725	}
    726	if (comp_mask & IB_SA_PATH_REC_DGID)
    727		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_DGID,
    728			sizeof(sa_rec->dgid), &sa_rec->dgid);
    729	if (comp_mask & IB_SA_PATH_REC_SGID)
    730		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SGID,
    731			sizeof(sa_rec->sgid), &sa_rec->sgid);
    732	if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
    733		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_TCLASS,
    734			sizeof(sa_rec->traffic_class), &sa_rec->traffic_class);
    735
    736	if (comp_mask & IB_SA_PATH_REC_PKEY) {
    737		val16 = be16_to_cpu(sa_rec->pkey);
    738		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_PKEY,
    739			sizeof(val16), &val16);
    740	}
    741	if (comp_mask & IB_SA_PATH_REC_QOS_CLASS) {
    742		val16 = be16_to_cpu(sa_rec->qos_class);
    743		nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_QOS_CLASS,
    744			sizeof(val16), &val16);
    745	}
    746}
    747
    748static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
    749{
    750	int len = 0;
    751
    752	if (comp_mask & IB_SA_PATH_REC_SERVICE_ID)
    753		len += nla_total_size(sizeof(u64));
    754	if (comp_mask & IB_SA_PATH_REC_DGID)
    755		len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
    756	if (comp_mask & IB_SA_PATH_REC_SGID)
    757		len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
    758	if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
    759		len += nla_total_size(sizeof(u8));
    760	if (comp_mask & IB_SA_PATH_REC_PKEY)
    761		len += nla_total_size(sizeof(u16));
    762	if (comp_mask & IB_SA_PATH_REC_QOS_CLASS)
    763		len += nla_total_size(sizeof(u16));
    764
    765	/*
    766	 * Make sure that at least some of the required comp_mask bits are
    767	 * set.
    768	 */
    769	if (WARN_ON(len == 0))
    770		return len;
    771
    772	/* Add the family header */
    773	len += NLMSG_ALIGN(sizeof(struct rdma_ls_resolve_header));
    774
    775	return len;
    776}
    777
    778static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
    779{
    780	struct sk_buff *skb = NULL;
    781	struct nlmsghdr *nlh;
    782	void *data;
    783	struct ib_sa_mad *mad;
    784	int len;
    785	unsigned long flags;
    786	unsigned long delay;
    787	gfp_t gfp_flag;
    788	int ret;
    789
    790	INIT_LIST_HEAD(&query->list);
    791	query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
    792
    793	mad = query->mad_buf->mad;
    794	len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask);
    795	if (len <= 0)
    796		return -EMSGSIZE;
    797
    798	skb = nlmsg_new(len, gfp_mask);
    799	if (!skb)
    800		return -ENOMEM;
    801
    802	/* Put nlmsg header only for now */
    803	data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS,
    804			    RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST);
    805	if (!data) {
    806		nlmsg_free(skb);
    807		return -EMSGSIZE;
    808	}
    809
    810	/* Add attributes */
    811	ib_nl_set_path_rec_attrs(skb, query);
    812
    813	/* Repair the nlmsg header length */
    814	nlmsg_end(skb, nlh);
    815
    816	gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC :
    817		GFP_NOWAIT;
    818
    819	spin_lock_irqsave(&ib_nl_request_lock, flags);
    820	ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_flag);
    821
    822	if (ret)
    823		goto out;
    824
    825	/* Put the request on the list.*/
    826	delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
    827	query->timeout = delay + jiffies;
    828	list_add_tail(&query->list, &ib_nl_request_list);
    829	/* Start the timeout if this is the only request */
    830	if (ib_nl_request_list.next == &query->list)
    831		queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
    832
    833out:
    834	spin_unlock_irqrestore(&ib_nl_request_lock, flags);
    835
    836	return ret;
    837}
    838
    839static int ib_nl_cancel_request(struct ib_sa_query *query)
    840{
    841	unsigned long flags;
    842	struct ib_sa_query *wait_query;
    843	int found = 0;
    844
    845	spin_lock_irqsave(&ib_nl_request_lock, flags);
    846	list_for_each_entry(wait_query, &ib_nl_request_list, list) {
    847		/* Let the timeout to take care of the callback */
    848		if (query == wait_query) {
    849			query->flags |= IB_SA_CANCEL;
    850			query->timeout = jiffies;
    851			list_move(&query->list, &ib_nl_request_list);
    852			found = 1;
    853			mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, 1);
    854			break;
    855		}
    856	}
    857	spin_unlock_irqrestore(&ib_nl_request_lock, flags);
    858
    859	return found;
    860}
    861
    862static void send_handler(struct ib_mad_agent *agent,
    863			 struct ib_mad_send_wc *mad_send_wc);
    864
    865static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query,
    866					   const struct nlmsghdr *nlh)
    867{
    868	struct ib_mad_send_wc mad_send_wc;
    869	struct ib_sa_mad *mad = NULL;
    870	const struct nlattr *head, *curr;
    871	struct ib_path_rec_data  *rec;
    872	int len, rem;
    873	u32 mask = 0;
    874	int status = -EIO;
    875
    876	if (query->callback) {
    877		head = (const struct nlattr *) nlmsg_data(nlh);
    878		len = nlmsg_len(nlh);
    879		switch (query->path_use) {
    880		case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
    881			mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
    882			break;
    883
    884		case LS_RESOLVE_PATH_USE_ALL:
    885		case LS_RESOLVE_PATH_USE_GMP:
    886		default:
    887			mask = IB_PATH_PRIMARY | IB_PATH_GMP |
    888				IB_PATH_BIDIRECTIONAL;
    889			break;
    890		}
    891		nla_for_each_attr(curr, head, len, rem) {
    892			if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) {
    893				rec = nla_data(curr);
    894				/*
    895				 * Get the first one. In the future, we may
    896				 * need to get up to 6 pathrecords.
    897				 */
    898				if ((rec->flags & mask) == mask) {
    899					mad = query->mad_buf->mad;
    900					mad->mad_hdr.method |=
    901						IB_MGMT_METHOD_RESP;
    902					memcpy(mad->data, rec->path_rec,
    903					       sizeof(rec->path_rec));
    904					status = 0;
    905					break;
    906				}
    907			}
    908		}
    909		query->callback(query, status, mad);
    910	}
    911
    912	mad_send_wc.send_buf = query->mad_buf;
    913	mad_send_wc.status = IB_WC_SUCCESS;
    914	send_handler(query->mad_buf->mad_agent, &mad_send_wc);
    915}
    916
    917static void ib_nl_request_timeout(struct work_struct *work)
    918{
    919	unsigned long flags;
    920	struct ib_sa_query *query;
    921	unsigned long delay;
    922	struct ib_mad_send_wc mad_send_wc;
    923	int ret;
    924
    925	spin_lock_irqsave(&ib_nl_request_lock, flags);
    926	while (!list_empty(&ib_nl_request_list)) {
    927		query = list_entry(ib_nl_request_list.next,
    928				   struct ib_sa_query, list);
    929
    930		if (time_after(query->timeout, jiffies)) {
    931			delay = query->timeout - jiffies;
    932			if ((long)delay <= 0)
    933				delay = 1;
    934			queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
    935			break;
    936		}
    937
    938		list_del(&query->list);
    939		ib_sa_disable_local_svc(query);
    940		/* Hold the lock to protect against query cancellation */
    941		if (ib_sa_query_cancelled(query))
    942			ret = -1;
    943		else
    944			ret = ib_post_send_mad(query->mad_buf, NULL);
    945		if (ret) {
    946			mad_send_wc.send_buf = query->mad_buf;
    947			mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
    948			spin_unlock_irqrestore(&ib_nl_request_lock, flags);
    949			send_handler(query->port->agent, &mad_send_wc);
    950			spin_lock_irqsave(&ib_nl_request_lock, flags);
    951		}
    952	}
    953	spin_unlock_irqrestore(&ib_nl_request_lock, flags);
    954}
    955
    956int ib_nl_handle_set_timeout(struct sk_buff *skb,
    957			     struct nlmsghdr *nlh,
    958			     struct netlink_ext_ack *extack)
    959{
    960	int timeout, delta, abs_delta;
    961	const struct nlattr *attr;
    962	unsigned long flags;
    963	struct ib_sa_query *query;
    964	long delay = 0;
    965	struct nlattr *tb[LS_NLA_TYPE_MAX];
    966	int ret;
    967
    968	if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
    969	    !(NETLINK_CB(skb).sk))
    970		return -EPERM;
    971
    972	ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
    973				   nlmsg_len(nlh), ib_nl_policy, NULL);
    974	attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT];
    975	if (ret || !attr)
    976		goto settimeout_out;
    977
    978	timeout = *(int *) nla_data(attr);
    979	if (timeout < IB_SA_LOCAL_SVC_TIMEOUT_MIN)
    980		timeout = IB_SA_LOCAL_SVC_TIMEOUT_MIN;
    981	if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX)
    982		timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX;
    983
    984	delta = timeout - sa_local_svc_timeout_ms;
    985	if (delta < 0)
    986		abs_delta = -delta;
    987	else
    988		abs_delta = delta;
    989
    990	if (delta != 0) {
    991		spin_lock_irqsave(&ib_nl_request_lock, flags);
    992		sa_local_svc_timeout_ms = timeout;
    993		list_for_each_entry(query, &ib_nl_request_list, list) {
    994			if (delta < 0 && abs_delta > query->timeout)
    995				query->timeout = 0;
    996			else
    997				query->timeout += delta;
    998
    999			/* Get the new delay from the first entry */
   1000			if (!delay) {
   1001				delay = query->timeout - jiffies;
   1002				if (delay <= 0)
   1003					delay = 1;
   1004			}
   1005		}
   1006		if (delay)
   1007			mod_delayed_work(ib_nl_wq, &ib_nl_timed_work,
   1008					 (unsigned long)delay);
   1009		spin_unlock_irqrestore(&ib_nl_request_lock, flags);
   1010	}
   1011
   1012settimeout_out:
   1013	return 0;
   1014}
   1015
   1016static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
   1017{
   1018	struct nlattr *tb[LS_NLA_TYPE_MAX];
   1019	int ret;
   1020
   1021	if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
   1022		return 0;
   1023
   1024	ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
   1025				   nlmsg_len(nlh), ib_nl_policy, NULL);
   1026	if (ret)
   1027		return 0;
   1028
   1029	return 1;
   1030}
   1031
   1032int ib_nl_handle_resolve_resp(struct sk_buff *skb,
   1033			      struct nlmsghdr *nlh,
   1034			      struct netlink_ext_ack *extack)
   1035{
   1036	unsigned long flags;
   1037	struct ib_sa_query *query = NULL, *iter;
   1038	struct ib_mad_send_buf *send_buf;
   1039	struct ib_mad_send_wc mad_send_wc;
   1040	int ret;
   1041
   1042	if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
   1043	    !(NETLINK_CB(skb).sk))
   1044		return -EPERM;
   1045
   1046	spin_lock_irqsave(&ib_nl_request_lock, flags);
   1047	list_for_each_entry(iter, &ib_nl_request_list, list) {
   1048		/*
   1049		 * If the query is cancelled, let the timeout routine
   1050		 * take care of it.
   1051		 */
   1052		if (nlh->nlmsg_seq == iter->seq) {
   1053			if (!ib_sa_query_cancelled(iter)) {
   1054				list_del(&iter->list);
   1055				query = iter;
   1056			}
   1057			break;
   1058		}
   1059	}
   1060
   1061	if (!query) {
   1062		spin_unlock_irqrestore(&ib_nl_request_lock, flags);
   1063		goto resp_out;
   1064	}
   1065
   1066	send_buf = query->mad_buf;
   1067
   1068	if (!ib_nl_is_good_resolve_resp(nlh)) {
   1069		/* if the result is a failure, send out the packet via IB */
   1070		ib_sa_disable_local_svc(query);
   1071		ret = ib_post_send_mad(query->mad_buf, NULL);
   1072		spin_unlock_irqrestore(&ib_nl_request_lock, flags);
   1073		if (ret) {
   1074			mad_send_wc.send_buf = send_buf;
   1075			mad_send_wc.status = IB_WC_GENERAL_ERR;
   1076			send_handler(query->port->agent, &mad_send_wc);
   1077		}
   1078	} else {
   1079		spin_unlock_irqrestore(&ib_nl_request_lock, flags);
   1080		ib_nl_process_good_resolve_rsp(query, nlh);
   1081	}
   1082
   1083resp_out:
   1084	return 0;
   1085}
   1086
   1087static void free_sm_ah(struct kref *kref)
   1088{
   1089	struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
   1090
   1091	rdma_destroy_ah(sm_ah->ah, 0);
   1092	kfree(sm_ah);
   1093}
   1094
   1095void ib_sa_register_client(struct ib_sa_client *client)
   1096{
   1097	atomic_set(&client->users, 1);
   1098	init_completion(&client->comp);
   1099}
   1100EXPORT_SYMBOL(ib_sa_register_client);
   1101
   1102void ib_sa_unregister_client(struct ib_sa_client *client)
   1103{
   1104	ib_sa_client_put(client);
   1105	wait_for_completion(&client->comp);
   1106}
   1107EXPORT_SYMBOL(ib_sa_unregister_client);
   1108
   1109/**
   1110 * ib_sa_cancel_query - try to cancel an SA query
   1111 * @id:ID of query to cancel
   1112 * @query:query pointer to cancel
   1113 *
   1114 * Try to cancel an SA query.  If the id and query don't match up or
   1115 * the query has already completed, nothing is done.  Otherwise the
   1116 * query is canceled and will complete with a status of -EINTR.
   1117 */
   1118void ib_sa_cancel_query(int id, struct ib_sa_query *query)
   1119{
   1120	unsigned long flags;
   1121	struct ib_mad_send_buf *mad_buf;
   1122
   1123	xa_lock_irqsave(&queries, flags);
   1124	if (xa_load(&queries, id) != query) {
   1125		xa_unlock_irqrestore(&queries, flags);
   1126		return;
   1127	}
   1128	mad_buf = query->mad_buf;
   1129	xa_unlock_irqrestore(&queries, flags);
   1130
   1131	/*
   1132	 * If the query is still on the netlink request list, schedule
   1133	 * it to be cancelled by the timeout routine. Otherwise, it has been
   1134	 * sent to the MAD layer and has to be cancelled from there.
   1135	 */
   1136	if (!ib_nl_cancel_request(query))
   1137		ib_cancel_mad(mad_buf);
   1138}
   1139EXPORT_SYMBOL(ib_sa_cancel_query);
   1140
   1141static u8 get_src_path_mask(struct ib_device *device, u32 port_num)
   1142{
   1143	struct ib_sa_device *sa_dev;
   1144	struct ib_sa_port   *port;
   1145	unsigned long flags;
   1146	u8 src_path_mask;
   1147
   1148	sa_dev = ib_get_client_data(device, &sa_client);
   1149	if (!sa_dev)
   1150		return 0x7f;
   1151
   1152	port  = &sa_dev->port[port_num - sa_dev->start_port];
   1153	spin_lock_irqsave(&port->ah_lock, flags);
   1154	src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
   1155	spin_unlock_irqrestore(&port->ah_lock, flags);
   1156
   1157	return src_path_mask;
   1158}
   1159
   1160static int init_ah_attr_grh_fields(struct ib_device *device, u32 port_num,
   1161				   struct sa_path_rec *rec,
   1162				   struct rdma_ah_attr *ah_attr,
   1163				   const struct ib_gid_attr *gid_attr)
   1164{
   1165	enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
   1166
   1167	if (!gid_attr) {
   1168		gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type,
   1169						 port_num, NULL);
   1170		if (IS_ERR(gid_attr))
   1171			return PTR_ERR(gid_attr);
   1172	} else
   1173		rdma_hold_gid_attr(gid_attr);
   1174
   1175	rdma_move_grh_sgid_attr(ah_attr, &rec->dgid,
   1176				be32_to_cpu(rec->flow_label),
   1177				rec->hop_limit,	rec->traffic_class,
   1178				gid_attr);
   1179	return 0;
   1180}
   1181
   1182/**
   1183 * ib_init_ah_attr_from_path - Initialize address handle attributes based on
   1184 *   an SA path record.
   1185 * @device: Device associated ah attributes initialization.
   1186 * @port_num: Port on the specified device.
   1187 * @rec: path record entry to use for ah attributes initialization.
   1188 * @ah_attr: address handle attributes to initialization from path record.
   1189 * @gid_attr: SGID attribute to consider during initialization.
   1190 *
   1191 * When ib_init_ah_attr_from_path() returns success,
   1192 * (a) for IB link layer it optionally contains a reference to SGID attribute
   1193 * when GRH is present for IB link layer.
   1194 * (b) for RoCE link layer it contains a reference to SGID attribute.
   1195 * User must invoke rdma_destroy_ah_attr() to release reference to SGID
   1196 * attributes which are initialized using ib_init_ah_attr_from_path().
   1197 */
   1198int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num,
   1199			      struct sa_path_rec *rec,
   1200			      struct rdma_ah_attr *ah_attr,
   1201			      const struct ib_gid_attr *gid_attr)
   1202{
   1203	int ret = 0;
   1204
   1205	memset(ah_attr, 0, sizeof(*ah_attr));
   1206	ah_attr->type = rdma_ah_find_type(device, port_num);
   1207	rdma_ah_set_sl(ah_attr, rec->sl);
   1208	rdma_ah_set_port_num(ah_attr, port_num);
   1209	rdma_ah_set_static_rate(ah_attr, rec->rate);
   1210
   1211	if (sa_path_is_roce(rec)) {
   1212		ret = roce_resolve_route_from_path(rec, gid_attr);
   1213		if (ret)
   1214			return ret;
   1215
   1216		memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN);
   1217	} else {
   1218		rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
   1219		if (sa_path_is_opa(rec) &&
   1220		    rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))
   1221			rdma_ah_set_make_grd(ah_attr, true);
   1222
   1223		rdma_ah_set_path_bits(ah_attr,
   1224				      be32_to_cpu(sa_path_get_slid(rec)) &
   1225				      get_src_path_mask(device, port_num));
   1226	}
   1227
   1228	if (rec->hop_limit > 0 || sa_path_is_roce(rec))
   1229		ret = init_ah_attr_grh_fields(device, port_num,
   1230					      rec, ah_attr, gid_attr);
   1231	return ret;
   1232}
   1233EXPORT_SYMBOL(ib_init_ah_attr_from_path);
   1234
   1235static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
   1236{
   1237	struct rdma_ah_attr ah_attr;
   1238	unsigned long flags;
   1239
   1240	spin_lock_irqsave(&query->port->ah_lock, flags);
   1241	if (!query->port->sm_ah) {
   1242		spin_unlock_irqrestore(&query->port->ah_lock, flags);
   1243		return -EAGAIN;
   1244	}
   1245	kref_get(&query->port->sm_ah->ref);
   1246	query->sm_ah = query->port->sm_ah;
   1247	spin_unlock_irqrestore(&query->port->ah_lock, flags);
   1248
   1249	/*
   1250	 * Always check if sm_ah has valid dlid assigned,
   1251	 * before querying for class port info
   1252	 */
   1253	if ((rdma_query_ah(query->sm_ah->ah, &ah_attr) < 0) ||
   1254	    !rdma_is_valid_unicast_lid(&ah_attr)) {
   1255		kref_put(&query->sm_ah->ref, free_sm_ah);
   1256		return -EAGAIN;
   1257	}
   1258	query->mad_buf = ib_create_send_mad(query->port->agent, 1,
   1259					    query->sm_ah->pkey_index,
   1260					    0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
   1261					    gfp_mask,
   1262					    ((query->flags & IB_SA_QUERY_OPA) ?
   1263					     OPA_MGMT_BASE_VERSION :
   1264					     IB_MGMT_BASE_VERSION));
   1265	if (IS_ERR(query->mad_buf)) {
   1266		kref_put(&query->sm_ah->ref, free_sm_ah);
   1267		return -ENOMEM;
   1268	}
   1269
   1270	query->mad_buf->ah = query->sm_ah->ah;
   1271
   1272	return 0;
   1273}
   1274
   1275static void free_mad(struct ib_sa_query *query)
   1276{
   1277	ib_free_send_mad(query->mad_buf);
   1278	kref_put(&query->sm_ah->ref, free_sm_ah);
   1279}
   1280
   1281static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
   1282{
   1283	struct ib_sa_mad *mad = query->mad_buf->mad;
   1284	unsigned long flags;
   1285
   1286	memset(mad, 0, sizeof *mad);
   1287
   1288	if (query->flags & IB_SA_QUERY_OPA) {
   1289		mad->mad_hdr.base_version  = OPA_MGMT_BASE_VERSION;
   1290		mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION;
   1291	} else {
   1292		mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
   1293		mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
   1294	}
   1295	mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_SUBN_ADM;
   1296	spin_lock_irqsave(&tid_lock, flags);
   1297	mad->mad_hdr.tid           =
   1298		cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
   1299	spin_unlock_irqrestore(&tid_lock, flags);
   1300}
   1301
   1302static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
   1303		    gfp_t gfp_mask)
   1304{
   1305	unsigned long flags;
   1306	int ret, id;
   1307	const int nmbr_sa_query_retries = 10;
   1308
   1309	xa_lock_irqsave(&queries, flags);
   1310	ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask);
   1311	xa_unlock_irqrestore(&queries, flags);
   1312	if (ret < 0)
   1313		return ret;
   1314
   1315	query->mad_buf->timeout_ms  = timeout_ms / nmbr_sa_query_retries;
   1316	query->mad_buf->retries = nmbr_sa_query_retries;
   1317	if (!query->mad_buf->timeout_ms) {
   1318		/* Special case, very small timeout_ms */
   1319		query->mad_buf->timeout_ms = 1;
   1320		query->mad_buf->retries = timeout_ms;
   1321	}
   1322	query->mad_buf->context[0] = query;
   1323	query->id = id;
   1324
   1325	if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
   1326	    (!(query->flags & IB_SA_QUERY_OPA))) {
   1327		if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
   1328			if (!ib_nl_make_request(query, gfp_mask))
   1329				return id;
   1330		}
   1331		ib_sa_disable_local_svc(query);
   1332	}
   1333
   1334	ret = ib_post_send_mad(query->mad_buf, NULL);
   1335	if (ret) {
   1336		xa_lock_irqsave(&queries, flags);
   1337		__xa_erase(&queries, id);
   1338		xa_unlock_irqrestore(&queries, flags);
   1339	}
   1340
   1341	/*
   1342	 * It's not safe to dereference query any more, because the
   1343	 * send may already have completed and freed the query in
   1344	 * another context.
   1345	 */
   1346	return ret ? ret : id;
   1347}
   1348
   1349void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec)
   1350{
   1351	ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
   1352}
   1353EXPORT_SYMBOL(ib_sa_unpack_path);
   1354
   1355void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
   1356{
   1357	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
   1358}
   1359EXPORT_SYMBOL(ib_sa_pack_path);
   1360
   1361static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
   1362					 struct ib_sa_device *sa_dev,
   1363					 u32 port_num)
   1364{
   1365	struct ib_sa_port *port;
   1366	unsigned long flags;
   1367	bool ret = false;
   1368
   1369	port = &sa_dev->port[port_num - sa_dev->start_port];
   1370	spin_lock_irqsave(&port->classport_lock, flags);
   1371	if (!port->classport_info.valid)
   1372		goto ret;
   1373
   1374	if (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_OPA)
   1375		ret = opa_get_cpi_capmask2(&port->classport_info.data.opa) &
   1376			OPA_CLASS_PORT_INFO_PR_SUPPORT;
   1377ret:
   1378	spin_unlock_irqrestore(&port->classport_lock, flags);
   1379	return ret;
   1380}
   1381
   1382enum opa_pr_supported {
   1383	PR_NOT_SUPPORTED,
   1384	PR_OPA_SUPPORTED,
   1385	PR_IB_SUPPORTED
   1386};
   1387
   1388/*
   1389 * opa_pr_query_possible - Check if current PR query can be an OPA query.
   1390 *
   1391 * Retuns PR_NOT_SUPPORTED if a path record query is not
   1392 * possible, PR_OPA_SUPPORTED if an OPA path record query
   1393 * is possible and PR_IB_SUPPORTED if an IB path record
   1394 * query is possible.
   1395 */
   1396static int opa_pr_query_possible(struct ib_sa_client *client,
   1397				 struct ib_sa_device *sa_dev,
   1398				 struct ib_device *device, u32 port_num)
   1399{
   1400	struct ib_port_attr port_attr;
   1401
   1402	if (ib_query_port(device, port_num, &port_attr))
   1403		return PR_NOT_SUPPORTED;
   1404
   1405	if (ib_sa_opa_pathrecord_support(client, sa_dev, port_num))
   1406		return PR_OPA_SUPPORTED;
   1407
   1408	if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
   1409		return PR_NOT_SUPPORTED;
   1410	else
   1411		return PR_IB_SUPPORTED;
   1412}
   1413
   1414static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
   1415				    int status,
   1416				    struct ib_sa_mad *mad)
   1417{
   1418	struct ib_sa_path_query *query =
   1419		container_of(sa_query, struct ib_sa_path_query, sa_query);
   1420
   1421	if (mad) {
   1422		struct sa_path_rec rec;
   1423
   1424		if (sa_query->flags & IB_SA_QUERY_OPA) {
   1425			ib_unpack(opa_path_rec_table,
   1426				  ARRAY_SIZE(opa_path_rec_table),
   1427				  mad->data, &rec);
   1428			rec.rec_type = SA_PATH_REC_TYPE_OPA;
   1429			query->callback(status, &rec, query->context);
   1430		} else {
   1431			ib_unpack(path_rec_table,
   1432				  ARRAY_SIZE(path_rec_table),
   1433				  mad->data, &rec);
   1434			rec.rec_type = SA_PATH_REC_TYPE_IB;
   1435			sa_path_set_dmac_zero(&rec);
   1436
   1437			if (query->conv_pr) {
   1438				struct sa_path_rec opa;
   1439
   1440				memset(&opa, 0, sizeof(struct sa_path_rec));
   1441				sa_convert_path_ib_to_opa(&opa, &rec);
   1442				query->callback(status, &opa, query->context);
   1443			} else {
   1444				query->callback(status, &rec, query->context);
   1445			}
   1446		}
   1447	} else
   1448		query->callback(status, NULL, query->context);
   1449}
   1450
   1451static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
   1452{
   1453	struct ib_sa_path_query *query =
   1454		container_of(sa_query, struct ib_sa_path_query, sa_query);
   1455
   1456	kfree(query->conv_pr);
   1457	kfree(query);
   1458}
   1459
   1460/**
   1461 * ib_sa_path_rec_get - Start a Path get query
   1462 * @client:SA client
   1463 * @device:device to send query on
   1464 * @port_num: port number to send query on
   1465 * @rec:Path Record to send in query
   1466 * @comp_mask:component mask to send in query
   1467 * @timeout_ms:time to wait for response
   1468 * @gfp_mask:GFP mask to use for internal allocations
   1469 * @callback:function called when query completes, times out or is
   1470 * canceled
   1471 * @context:opaque user context passed to callback
   1472 * @sa_query:query context, used to cancel query
   1473 *
   1474 * Send a Path Record Get query to the SA to look up a path.  The
   1475 * callback function will be called when the query completes (or
   1476 * fails); status is 0 for a successful response, -EINTR if the query
   1477 * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
   1478 * occurred sending the query.  The resp parameter of the callback is
   1479 * only valid if status is 0.
   1480 *
   1481 * If the return value of ib_sa_path_rec_get() is negative, it is an
   1482 * error code.  Otherwise it is a query ID that can be used to cancel
   1483 * the query.
   1484 */
   1485int ib_sa_path_rec_get(struct ib_sa_client *client,
   1486		       struct ib_device *device, u32 port_num,
   1487		       struct sa_path_rec *rec,
   1488		       ib_sa_comp_mask comp_mask,
   1489		       unsigned long timeout_ms, gfp_t gfp_mask,
   1490		       void (*callback)(int status,
   1491					struct sa_path_rec *resp,
   1492					void *context),
   1493		       void *context,
   1494		       struct ib_sa_query **sa_query)
   1495{
   1496	struct ib_sa_path_query *query;
   1497	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
   1498	struct ib_sa_port   *port;
   1499	struct ib_mad_agent *agent;
   1500	struct ib_sa_mad *mad;
   1501	enum opa_pr_supported status;
   1502	int ret;
   1503
   1504	if (!sa_dev)
   1505		return -ENODEV;
   1506
   1507	if ((rec->rec_type != SA_PATH_REC_TYPE_IB) &&
   1508	    (rec->rec_type != SA_PATH_REC_TYPE_OPA))
   1509		return -EINVAL;
   1510
   1511	port  = &sa_dev->port[port_num - sa_dev->start_port];
   1512	agent = port->agent;
   1513
   1514	query = kzalloc(sizeof(*query), gfp_mask);
   1515	if (!query)
   1516		return -ENOMEM;
   1517
   1518	query->sa_query.port     = port;
   1519	if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
   1520		status = opa_pr_query_possible(client, sa_dev, device, port_num);
   1521		if (status == PR_NOT_SUPPORTED) {
   1522			ret = -EINVAL;
   1523			goto err1;
   1524		} else if (status == PR_OPA_SUPPORTED) {
   1525			query->sa_query.flags |= IB_SA_QUERY_OPA;
   1526		} else {
   1527			query->conv_pr =
   1528				kmalloc(sizeof(*query->conv_pr), gfp_mask);
   1529			if (!query->conv_pr) {
   1530				ret = -ENOMEM;
   1531				goto err1;
   1532			}
   1533		}
   1534	}
   1535
   1536	ret = alloc_mad(&query->sa_query, gfp_mask);
   1537	if (ret)
   1538		goto err2;
   1539
   1540	ib_sa_client_get(client);
   1541	query->sa_query.client = client;
   1542	query->callback        = callback;
   1543	query->context         = context;
   1544
   1545	mad = query->sa_query.mad_buf->mad;
   1546	init_mad(&query->sa_query, agent);
   1547
   1548	query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
   1549	query->sa_query.release  = ib_sa_path_rec_release;
   1550	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
   1551	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_PATH_REC);
   1552	mad->sa_hdr.comp_mask	 = comp_mask;
   1553
   1554	if (query->sa_query.flags & IB_SA_QUERY_OPA) {
   1555		ib_pack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
   1556			rec, mad->data);
   1557	} else if (query->conv_pr) {
   1558		sa_convert_path_opa_to_ib(query->conv_pr, rec);
   1559		ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
   1560			query->conv_pr, mad->data);
   1561	} else {
   1562		ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
   1563			rec, mad->data);
   1564	}
   1565
   1566	*sa_query = &query->sa_query;
   1567
   1568	query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE;
   1569	query->sa_query.mad_buf->context[1] = (query->conv_pr) ?
   1570						query->conv_pr : rec;
   1571
   1572	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
   1573	if (ret < 0)
   1574		goto err3;
   1575
   1576	return ret;
   1577
   1578err3:
   1579	*sa_query = NULL;
   1580	ib_sa_client_put(query->sa_query.client);
   1581	free_mad(&query->sa_query);
   1582err2:
   1583	kfree(query->conv_pr);
   1584err1:
   1585	kfree(query);
   1586	return ret;
   1587}
   1588EXPORT_SYMBOL(ib_sa_path_rec_get);
   1589
   1590static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
   1591					int status,
   1592					struct ib_sa_mad *mad)
   1593{
   1594	struct ib_sa_mcmember_query *query =
   1595		container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
   1596
   1597	if (mad) {
   1598		struct ib_sa_mcmember_rec rec;
   1599
   1600		ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
   1601			  mad->data, &rec);
   1602		query->callback(status, &rec, query->context);
   1603	} else
   1604		query->callback(status, NULL, query->context);
   1605}
   1606
   1607static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
   1608{
   1609	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
   1610}
   1611
   1612int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
   1613			     struct ib_device *device, u32 port_num,
   1614			     u8 method,
   1615			     struct ib_sa_mcmember_rec *rec,
   1616			     ib_sa_comp_mask comp_mask,
   1617			     unsigned long timeout_ms, gfp_t gfp_mask,
   1618			     void (*callback)(int status,
   1619					      struct ib_sa_mcmember_rec *resp,
   1620					      void *context),
   1621			     void *context,
   1622			     struct ib_sa_query **sa_query)
   1623{
   1624	struct ib_sa_mcmember_query *query;
   1625	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
   1626	struct ib_sa_port   *port;
   1627	struct ib_mad_agent *agent;
   1628	struct ib_sa_mad *mad;
   1629	int ret;
   1630
   1631	if (!sa_dev)
   1632		return -ENODEV;
   1633
   1634	port  = &sa_dev->port[port_num - sa_dev->start_port];
   1635	agent = port->agent;
   1636
   1637	query = kzalloc(sizeof(*query), gfp_mask);
   1638	if (!query)
   1639		return -ENOMEM;
   1640
   1641	query->sa_query.port     = port;
   1642	ret = alloc_mad(&query->sa_query, gfp_mask);
   1643	if (ret)
   1644		goto err1;
   1645
   1646	ib_sa_client_get(client);
   1647	query->sa_query.client = client;
   1648	query->callback        = callback;
   1649	query->context         = context;
   1650
   1651	mad = query->sa_query.mad_buf->mad;
   1652	init_mad(&query->sa_query, agent);
   1653
   1654	query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
   1655	query->sa_query.release  = ib_sa_mcmember_rec_release;
   1656	mad->mad_hdr.method	 = method;
   1657	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
   1658	mad->sa_hdr.comp_mask	 = comp_mask;
   1659
   1660	ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
   1661		rec, mad->data);
   1662
   1663	*sa_query = &query->sa_query;
   1664
   1665	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
   1666	if (ret < 0)
   1667		goto err2;
   1668
   1669	return ret;
   1670
   1671err2:
   1672	*sa_query = NULL;
   1673	ib_sa_client_put(query->sa_query.client);
   1674	free_mad(&query->sa_query);
   1675
   1676err1:
   1677	kfree(query);
   1678	return ret;
   1679}
   1680
   1681/* Support GuidInfoRecord */
   1682static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
   1683					int status,
   1684					struct ib_sa_mad *mad)
   1685{
   1686	struct ib_sa_guidinfo_query *query =
   1687		container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
   1688
   1689	if (mad) {
   1690		struct ib_sa_guidinfo_rec rec;
   1691
   1692		ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table),
   1693			  mad->data, &rec);
   1694		query->callback(status, &rec, query->context);
   1695	} else
   1696		query->callback(status, NULL, query->context);
   1697}
   1698
   1699static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
   1700{
   1701	kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query));
   1702}
   1703
   1704int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
   1705			      struct ib_device *device, u32 port_num,
   1706			      struct ib_sa_guidinfo_rec *rec,
   1707			      ib_sa_comp_mask comp_mask, u8 method,
   1708			      unsigned long timeout_ms, gfp_t gfp_mask,
   1709			      void (*callback)(int status,
   1710					       struct ib_sa_guidinfo_rec *resp,
   1711					       void *context),
   1712			      void *context,
   1713			      struct ib_sa_query **sa_query)
   1714{
   1715	struct ib_sa_guidinfo_query *query;
   1716	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
   1717	struct ib_sa_port *port;
   1718	struct ib_mad_agent *agent;
   1719	struct ib_sa_mad *mad;
   1720	int ret;
   1721
   1722	if (!sa_dev)
   1723		return -ENODEV;
   1724
   1725	if (method != IB_MGMT_METHOD_GET &&
   1726	    method != IB_MGMT_METHOD_SET &&
   1727	    method != IB_SA_METHOD_DELETE) {
   1728		return -EINVAL;
   1729	}
   1730
   1731	port  = &sa_dev->port[port_num - sa_dev->start_port];
   1732	agent = port->agent;
   1733
   1734	query = kzalloc(sizeof(*query), gfp_mask);
   1735	if (!query)
   1736		return -ENOMEM;
   1737
   1738	query->sa_query.port = port;
   1739	ret = alloc_mad(&query->sa_query, gfp_mask);
   1740	if (ret)
   1741		goto err1;
   1742
   1743	ib_sa_client_get(client);
   1744	query->sa_query.client = client;
   1745	query->callback        = callback;
   1746	query->context         = context;
   1747
   1748	mad = query->sa_query.mad_buf->mad;
   1749	init_mad(&query->sa_query, agent);
   1750
   1751	query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
   1752	query->sa_query.release  = ib_sa_guidinfo_rec_release;
   1753
   1754	mad->mad_hdr.method	 = method;
   1755	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC);
   1756	mad->sa_hdr.comp_mask	 = comp_mask;
   1757
   1758	ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec,
   1759		mad->data);
   1760
   1761	*sa_query = &query->sa_query;
   1762
   1763	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
   1764	if (ret < 0)
   1765		goto err2;
   1766
   1767	return ret;
   1768
   1769err2:
   1770	*sa_query = NULL;
   1771	ib_sa_client_put(query->sa_query.client);
   1772	free_mad(&query->sa_query);
   1773
   1774err1:
   1775	kfree(query);
   1776	return ret;
   1777}
   1778EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
   1779
   1780struct ib_classport_info_context {
   1781	struct completion	done;
   1782	struct ib_sa_query	*sa_query;
   1783};
   1784
   1785static void ib_classportinfo_cb(void *context)
   1786{
   1787	struct ib_classport_info_context *cb_ctx = context;
   1788
   1789	complete(&cb_ctx->done);
   1790}
   1791
   1792static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
   1793					      int status,
   1794					      struct ib_sa_mad *mad)
   1795{
   1796	unsigned long flags;
   1797	struct ib_sa_classport_info_query *query =
   1798		container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
   1799	struct ib_sa_classport_cache *info = &sa_query->port->classport_info;
   1800
   1801	if (mad) {
   1802		if (sa_query->flags & IB_SA_QUERY_OPA) {
   1803			struct opa_class_port_info rec;
   1804
   1805			ib_unpack(opa_classport_info_rec_table,
   1806				  ARRAY_SIZE(opa_classport_info_rec_table),
   1807				  mad->data, &rec);
   1808
   1809			spin_lock_irqsave(&sa_query->port->classport_lock,
   1810					  flags);
   1811			if (!status && !info->valid) {
   1812				memcpy(&info->data.opa, &rec,
   1813				       sizeof(info->data.opa));
   1814
   1815				info->valid = true;
   1816				info->data.type = RDMA_CLASS_PORT_INFO_OPA;
   1817			}
   1818			spin_unlock_irqrestore(&sa_query->port->classport_lock,
   1819					       flags);
   1820
   1821		} else {
   1822			struct ib_class_port_info rec;
   1823
   1824			ib_unpack(ib_classport_info_rec_table,
   1825				  ARRAY_SIZE(ib_classport_info_rec_table),
   1826				  mad->data, &rec);
   1827
   1828			spin_lock_irqsave(&sa_query->port->classport_lock,
   1829					  flags);
   1830			if (!status && !info->valid) {
   1831				memcpy(&info->data.ib, &rec,
   1832				       sizeof(info->data.ib));
   1833
   1834				info->valid = true;
   1835				info->data.type = RDMA_CLASS_PORT_INFO_IB;
   1836			}
   1837			spin_unlock_irqrestore(&sa_query->port->classport_lock,
   1838					       flags);
   1839		}
   1840	}
   1841	query->callback(query->context);
   1842}
   1843
   1844static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
   1845{
   1846	kfree(container_of(sa_query, struct ib_sa_classport_info_query,
   1847			   sa_query));
   1848}
   1849
   1850static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
   1851					  unsigned long timeout_ms,
   1852					  void (*callback)(void *context),
   1853					  void *context,
   1854					  struct ib_sa_query **sa_query)
   1855{
   1856	struct ib_mad_agent *agent;
   1857	struct ib_sa_classport_info_query *query;
   1858	struct ib_sa_mad *mad;
   1859	gfp_t gfp_mask = GFP_KERNEL;
   1860	int ret;
   1861
   1862	agent = port->agent;
   1863
   1864	query = kzalloc(sizeof(*query), gfp_mask);
   1865	if (!query)
   1866		return -ENOMEM;
   1867
   1868	query->sa_query.port = port;
   1869	query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device,
   1870						 port->port_num) ?
   1871				 IB_SA_QUERY_OPA : 0;
   1872	ret = alloc_mad(&query->sa_query, gfp_mask);
   1873	if (ret)
   1874		goto err_free;
   1875
   1876	query->callback = callback;
   1877	query->context = context;
   1878
   1879	mad = query->sa_query.mad_buf->mad;
   1880	init_mad(&query->sa_query, agent);
   1881
   1882	query->sa_query.callback = ib_sa_classport_info_rec_callback;
   1883	query->sa_query.release  = ib_sa_classport_info_rec_release;
   1884	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
   1885	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
   1886	mad->sa_hdr.comp_mask	 = 0;
   1887	*sa_query = &query->sa_query;
   1888
   1889	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
   1890	if (ret < 0)
   1891		goto err_free_mad;
   1892
   1893	return ret;
   1894
   1895err_free_mad:
   1896	*sa_query = NULL;
   1897	free_mad(&query->sa_query);
   1898
   1899err_free:
   1900	kfree(query);
   1901	return ret;
   1902}
   1903
   1904static void update_ib_cpi(struct work_struct *work)
   1905{
   1906	struct ib_sa_port *port =
   1907		container_of(work, struct ib_sa_port, ib_cpi_work.work);
   1908	struct ib_classport_info_context *cb_context;
   1909	unsigned long flags;
   1910	int ret;
   1911
   1912	/* If the classport info is valid, nothing
   1913	 * to do here.
   1914	 */
   1915	spin_lock_irqsave(&port->classport_lock, flags);
   1916	if (port->classport_info.valid) {
   1917		spin_unlock_irqrestore(&port->classport_lock, flags);
   1918		return;
   1919	}
   1920	spin_unlock_irqrestore(&port->classport_lock, flags);
   1921
   1922	cb_context = kmalloc(sizeof(*cb_context), GFP_KERNEL);
   1923	if (!cb_context)
   1924		goto err_nomem;
   1925
   1926	init_completion(&cb_context->done);
   1927
   1928	ret = ib_sa_classport_info_rec_query(port, 3000,
   1929					     ib_classportinfo_cb, cb_context,
   1930					     &cb_context->sa_query);
   1931	if (ret < 0)
   1932		goto free_cb_err;
   1933	wait_for_completion(&cb_context->done);
   1934free_cb_err:
   1935	kfree(cb_context);
   1936	spin_lock_irqsave(&port->classport_lock, flags);
   1937
   1938	/* If the classport info is still not valid, the query should have
   1939	 * failed for some reason. Retry issuing the query
   1940	 */
   1941	if (!port->classport_info.valid) {
   1942		port->classport_info.retry_cnt++;
   1943		if (port->classport_info.retry_cnt <=
   1944		    IB_SA_CPI_MAX_RETRY_CNT) {
   1945			unsigned long delay =
   1946				msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
   1947
   1948			queue_delayed_work(ib_wq, &port->ib_cpi_work, delay);
   1949		}
   1950	}
   1951	spin_unlock_irqrestore(&port->classport_lock, flags);
   1952
   1953err_nomem:
   1954	return;
   1955}
   1956
   1957static void send_handler(struct ib_mad_agent *agent,
   1958			 struct ib_mad_send_wc *mad_send_wc)
   1959{
   1960	struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
   1961	unsigned long flags;
   1962
   1963	if (query->callback)
   1964		switch (mad_send_wc->status) {
   1965		case IB_WC_SUCCESS:
   1966			/* No callback -- already got recv */
   1967			break;
   1968		case IB_WC_RESP_TIMEOUT_ERR:
   1969			query->callback(query, -ETIMEDOUT, NULL);
   1970			break;
   1971		case IB_WC_WR_FLUSH_ERR:
   1972			query->callback(query, -EINTR, NULL);
   1973			break;
   1974		default:
   1975			query->callback(query, -EIO, NULL);
   1976			break;
   1977		}
   1978
   1979	xa_lock_irqsave(&queries, flags);
   1980	__xa_erase(&queries, query->id);
   1981	xa_unlock_irqrestore(&queries, flags);
   1982
   1983	free_mad(query);
   1984	if (query->client)
   1985		ib_sa_client_put(query->client);
   1986	query->release(query);
   1987}
   1988
   1989static void recv_handler(struct ib_mad_agent *mad_agent,
   1990			 struct ib_mad_send_buf *send_buf,
   1991			 struct ib_mad_recv_wc *mad_recv_wc)
   1992{
   1993	struct ib_sa_query *query;
   1994
   1995	if (!send_buf)
   1996		return;
   1997
   1998	query = send_buf->context[0];
   1999	if (query->callback) {
   2000		if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
   2001			query->callback(query,
   2002					mad_recv_wc->recv_buf.mad->mad_hdr.status ?
   2003					-EINVAL : 0,
   2004					(struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
   2005		else
   2006			query->callback(query, -EIO, NULL);
   2007	}
   2008
   2009	ib_free_recv_mad(mad_recv_wc);
   2010}
   2011
   2012static void update_sm_ah(struct work_struct *work)
   2013{
   2014	struct ib_sa_port *port =
   2015		container_of(work, struct ib_sa_port, update_task);
   2016	struct ib_sa_sm_ah *new_ah;
   2017	struct ib_port_attr port_attr;
   2018	struct rdma_ah_attr   ah_attr;
   2019	bool grh_required;
   2020
   2021	if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
   2022		pr_warn("Couldn't query port\n");
   2023		return;
   2024	}
   2025
   2026	new_ah = kmalloc(sizeof(*new_ah), GFP_KERNEL);
   2027	if (!new_ah)
   2028		return;
   2029
   2030	kref_init(&new_ah->ref);
   2031	new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
   2032
   2033	new_ah->pkey_index = 0;
   2034	if (ib_find_pkey(port->agent->device, port->port_num,
   2035			 IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
   2036		pr_err("Couldn't find index for default PKey\n");
   2037
   2038	memset(&ah_attr, 0, sizeof(ah_attr));
   2039	ah_attr.type = rdma_ah_find_type(port->agent->device,
   2040					 port->port_num);
   2041	rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
   2042	rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
   2043	rdma_ah_set_port_num(&ah_attr, port->port_num);
   2044
   2045	grh_required = rdma_is_grh_required(port->agent->device,
   2046					    port->port_num);
   2047
   2048	/*
   2049	 * The OPA sm_lid of 0xFFFF needs special handling so that it can be
   2050	 * differentiated from a permissive LID of 0xFFFF.  We set the
   2051	 * grh_required flag here so the SA can program the DGID in the
   2052	 * address handle appropriately
   2053	 */
   2054	if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA &&
   2055	    (grh_required ||
   2056	     port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)))
   2057		rdma_ah_set_make_grd(&ah_attr, true);
   2058
   2059	if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) {
   2060		rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
   2061		rdma_ah_set_subnet_prefix(&ah_attr,
   2062					  cpu_to_be64(port_attr.subnet_prefix));
   2063		rdma_ah_set_interface_id(&ah_attr,
   2064					 cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
   2065	}
   2066
   2067	new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr,
   2068				    RDMA_CREATE_AH_SLEEPABLE);
   2069	if (IS_ERR(new_ah->ah)) {
   2070		pr_warn("Couldn't create new SM AH\n");
   2071		kfree(new_ah);
   2072		return;
   2073	}
   2074
   2075	spin_lock_irq(&port->ah_lock);
   2076	if (port->sm_ah)
   2077		kref_put(&port->sm_ah->ref, free_sm_ah);
   2078	port->sm_ah = new_ah;
   2079	spin_unlock_irq(&port->ah_lock);
   2080}
   2081
   2082static void ib_sa_event(struct ib_event_handler *handler,
   2083			struct ib_event *event)
   2084{
   2085	if (event->event == IB_EVENT_PORT_ERR    ||
   2086	    event->event == IB_EVENT_PORT_ACTIVE ||
   2087	    event->event == IB_EVENT_LID_CHANGE  ||
   2088	    event->event == IB_EVENT_PKEY_CHANGE ||
   2089	    event->event == IB_EVENT_SM_CHANGE   ||
   2090	    event->event == IB_EVENT_CLIENT_REREGISTER) {
   2091		unsigned long flags;
   2092		struct ib_sa_device *sa_dev =
   2093			container_of(handler, typeof(*sa_dev), event_handler);
   2094		u32 port_num = event->element.port_num - sa_dev->start_port;
   2095		struct ib_sa_port *port = &sa_dev->port[port_num];
   2096
   2097		if (!rdma_cap_ib_sa(handler->device, port->port_num))
   2098			return;
   2099
   2100		spin_lock_irqsave(&port->ah_lock, flags);
   2101		if (port->sm_ah)
   2102			kref_put(&port->sm_ah->ref, free_sm_ah);
   2103		port->sm_ah = NULL;
   2104		spin_unlock_irqrestore(&port->ah_lock, flags);
   2105
   2106		if (event->event == IB_EVENT_SM_CHANGE ||
   2107		    event->event == IB_EVENT_CLIENT_REREGISTER ||
   2108		    event->event == IB_EVENT_LID_CHANGE ||
   2109		    event->event == IB_EVENT_PORT_ACTIVE) {
   2110			unsigned long delay =
   2111				msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
   2112
   2113			spin_lock_irqsave(&port->classport_lock, flags);
   2114			port->classport_info.valid = false;
   2115			port->classport_info.retry_cnt = 0;
   2116			spin_unlock_irqrestore(&port->classport_lock, flags);
   2117			queue_delayed_work(ib_wq,
   2118					   &port->ib_cpi_work, delay);
   2119		}
   2120		queue_work(ib_wq, &sa_dev->port[port_num].update_task);
   2121	}
   2122}
   2123
   2124static int ib_sa_add_one(struct ib_device *device)
   2125{
   2126	struct ib_sa_device *sa_dev;
   2127	int s, e, i;
   2128	int count = 0;
   2129	int ret;
   2130
   2131	s = rdma_start_port(device);
   2132	e = rdma_end_port(device);
   2133
   2134	sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL);
   2135	if (!sa_dev)
   2136		return -ENOMEM;
   2137
   2138	sa_dev->start_port = s;
   2139	sa_dev->end_port   = e;
   2140
   2141	for (i = 0; i <= e - s; ++i) {
   2142		spin_lock_init(&sa_dev->port[i].ah_lock);
   2143		if (!rdma_cap_ib_sa(device, i + 1))
   2144			continue;
   2145
   2146		sa_dev->port[i].sm_ah    = NULL;
   2147		sa_dev->port[i].port_num = i + s;
   2148
   2149		spin_lock_init(&sa_dev->port[i].classport_lock);
   2150		sa_dev->port[i].classport_info.valid = false;
   2151
   2152		sa_dev->port[i].agent =
   2153			ib_register_mad_agent(device, i + s, IB_QPT_GSI,
   2154					      NULL, 0, send_handler,
   2155					      recv_handler, sa_dev, 0);
   2156		if (IS_ERR(sa_dev->port[i].agent)) {
   2157			ret = PTR_ERR(sa_dev->port[i].agent);
   2158			goto err;
   2159		}
   2160
   2161		INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
   2162		INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
   2163				  update_ib_cpi);
   2164
   2165		count++;
   2166	}
   2167
   2168	if (!count) {
   2169		ret = -EOPNOTSUPP;
   2170		goto free;
   2171	}
   2172
   2173	ib_set_client_data(device, &sa_client, sa_dev);
   2174
   2175	/*
   2176	 * We register our event handler after everything is set up,
   2177	 * and then update our cached info after the event handler is
   2178	 * registered to avoid any problems if a port changes state
   2179	 * during our initialization.
   2180	 */
   2181
   2182	INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
   2183	ib_register_event_handler(&sa_dev->event_handler);
   2184
   2185	for (i = 0; i <= e - s; ++i) {
   2186		if (rdma_cap_ib_sa(device, i + 1))
   2187			update_sm_ah(&sa_dev->port[i].update_task);
   2188	}
   2189
   2190	return 0;
   2191
   2192err:
   2193	while (--i >= 0) {
   2194		if (rdma_cap_ib_sa(device, i + 1))
   2195			ib_unregister_mad_agent(sa_dev->port[i].agent);
   2196	}
   2197free:
   2198	kfree(sa_dev);
   2199	return ret;
   2200}
   2201
   2202static void ib_sa_remove_one(struct ib_device *device, void *client_data)
   2203{
   2204	struct ib_sa_device *sa_dev = client_data;
   2205	int i;
   2206
   2207	ib_unregister_event_handler(&sa_dev->event_handler);
   2208	flush_workqueue(ib_wq);
   2209
   2210	for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
   2211		if (rdma_cap_ib_sa(device, i + 1)) {
   2212			cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work);
   2213			ib_unregister_mad_agent(sa_dev->port[i].agent);
   2214			if (sa_dev->port[i].sm_ah)
   2215				kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
   2216		}
   2217
   2218	}
   2219
   2220	kfree(sa_dev);
   2221}
   2222
   2223int ib_sa_init(void)
   2224{
   2225	int ret;
   2226
   2227	get_random_bytes(&tid, sizeof tid);
   2228
   2229	atomic_set(&ib_nl_sa_request_seq, 0);
   2230
   2231	ret = ib_register_client(&sa_client);
   2232	if (ret) {
   2233		pr_err("Couldn't register ib_sa client\n");
   2234		goto err1;
   2235	}
   2236
   2237	ret = mcast_init();
   2238	if (ret) {
   2239		pr_err("Couldn't initialize multicast handling\n");
   2240		goto err2;
   2241	}
   2242
   2243	ib_nl_wq = alloc_ordered_workqueue("ib_nl_sa_wq", WQ_MEM_RECLAIM);
   2244	if (!ib_nl_wq) {
   2245		ret = -ENOMEM;
   2246		goto err3;
   2247	}
   2248
   2249	INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout);
   2250
   2251	return 0;
   2252
   2253err3:
   2254	mcast_cleanup();
   2255err2:
   2256	ib_unregister_client(&sa_client);
   2257err1:
   2258	return ret;
   2259}
   2260
   2261void ib_sa_cleanup(void)
   2262{
   2263	cancel_delayed_work(&ib_nl_timed_work);
   2264	destroy_workqueue(ib_nl_wq);
   2265	mcast_cleanup();
   2266	ib_unregister_client(&sa_client);
   2267	WARN_ON(!xa_empty(&queries));
   2268}