cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

cache.c (43511B)


      1/*
      2 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
      3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
      4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
      5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
      6 *
      7 * This software is available to you under a choice of one of two
      8 * licenses.  You may choose to be licensed under the terms of the GNU
      9 * General Public License (GPL) Version 2, available from the file
     10 * COPYING in the main directory of this source tree, or the
     11 * OpenIB.org BSD license below:
     12 *
     13 *     Redistribution and use in source and binary forms, with or
     14 *     without modification, are permitted provided that the following
     15 *     conditions are met:
     16 *
     17 *      - Redistributions of source code must retain the above
     18 *        copyright notice, this list of conditions and the following
     19 *        disclaimer.
     20 *
     21 *      - Redistributions in binary form must reproduce the above
     22 *        copyright notice, this list of conditions and the following
     23 *        disclaimer in the documentation and/or other materials
     24 *        provided with the distribution.
     25 *
     26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     33 * SOFTWARE.
     34 */
     35
     36#include <linux/if_vlan.h>
     37#include <linux/errno.h>
     38#include <linux/slab.h>
     39#include <linux/workqueue.h>
     40#include <linux/netdevice.h>
     41#include <net/addrconf.h>
     42
     43#include <rdma/ib_cache.h>
     44
     45#include "core_priv.h"
     46
     47struct ib_pkey_cache {
     48	int             table_len;
     49	u16             table[];
     50};
     51
     52struct ib_update_work {
     53	struct work_struct work;
     54	struct ib_event event;
     55	bool enforce_security;
     56};
     57
     58union ib_gid zgid;
     59EXPORT_SYMBOL(zgid);
     60
     61enum gid_attr_find_mask {
     62	GID_ATTR_FIND_MASK_GID          = 1UL << 0,
     63	GID_ATTR_FIND_MASK_NETDEV	= 1UL << 1,
     64	GID_ATTR_FIND_MASK_DEFAULT	= 1UL << 2,
     65	GID_ATTR_FIND_MASK_GID_TYPE	= 1UL << 3,
     66};
     67
     68enum gid_table_entry_state {
     69	GID_TABLE_ENTRY_INVALID		= 1,
     70	GID_TABLE_ENTRY_VALID		= 2,
     71	/*
     72	 * Indicates that entry is pending to be removed, there may
     73	 * be active users of this GID entry.
     74	 * When last user of the GID entry releases reference to it,
     75	 * GID entry is detached from the table.
     76	 */
     77	GID_TABLE_ENTRY_PENDING_DEL	= 3,
     78};
     79
     80struct roce_gid_ndev_storage {
     81	struct rcu_head rcu_head;
     82	struct net_device *ndev;
     83};
     84
     85struct ib_gid_table_entry {
     86	struct kref			kref;
     87	struct work_struct		del_work;
     88	struct ib_gid_attr		attr;
     89	void				*context;
     90	/* Store the ndev pointer to release reference later on in
     91	 * call_rcu context because by that time gid_table_entry
     92	 * and attr might be already freed. So keep a copy of it.
     93	 * ndev_storage is freed by rcu callback.
     94	 */
     95	struct roce_gid_ndev_storage	*ndev_storage;
     96	enum gid_table_entry_state	state;
     97};
     98
     99struct ib_gid_table {
    100	int				sz;
    101	/* In RoCE, adding a GID to the table requires:
    102	 * (a) Find if this GID is already exists.
    103	 * (b) Find a free space.
    104	 * (c) Write the new GID
    105	 *
    106	 * Delete requires different set of operations:
    107	 * (a) Find the GID
    108	 * (b) Delete it.
    109	 *
    110	 **/
    111	/* Any writer to data_vec must hold this lock and the write side of
    112	 * rwlock. Readers must hold only rwlock. All writers must be in a
    113	 * sleepable context.
    114	 */
    115	struct mutex			lock;
    116	/* rwlock protects data_vec[ix]->state and entry pointer.
    117	 */
    118	rwlock_t			rwlock;
    119	struct ib_gid_table_entry	**data_vec;
    120	/* bit field, each bit indicates the index of default GID */
    121	u32				default_gid_indices;
    122};
    123
    124static void dispatch_gid_change_event(struct ib_device *ib_dev, u32 port)
    125{
    126	struct ib_event event;
    127
    128	event.device		= ib_dev;
    129	event.element.port_num	= port;
    130	event.event		= IB_EVENT_GID_CHANGE;
    131
    132	ib_dispatch_event_clients(&event);
    133}
    134
    135static const char * const gid_type_str[] = {
    136	/* IB/RoCE v1 value is set for IB_GID_TYPE_IB and IB_GID_TYPE_ROCE for
    137	 * user space compatibility reasons.
    138	 */
    139	[IB_GID_TYPE_IB]	= "IB/RoCE v1",
    140	[IB_GID_TYPE_ROCE]	= "IB/RoCE v1",
    141	[IB_GID_TYPE_ROCE_UDP_ENCAP]	= "RoCE v2",
    142};
    143
    144const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
    145{
    146	if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
    147		return gid_type_str[gid_type];
    148
    149	return "Invalid GID type";
    150}
    151EXPORT_SYMBOL(ib_cache_gid_type_str);
    152
    153/** rdma_is_zero_gid - Check if given GID is zero or not.
    154 * @gid:	GID to check
    155 * Returns true if given GID is zero, returns false otherwise.
    156 */
    157bool rdma_is_zero_gid(const union ib_gid *gid)
    158{
    159	return !memcmp(gid, &zgid, sizeof(*gid));
    160}
    161EXPORT_SYMBOL(rdma_is_zero_gid);
    162
    163/** is_gid_index_default - Check if a given index belongs to
    164 * reserved default GIDs or not.
    165 * @table:	GID table pointer
    166 * @index:	Index to check in GID table
    167 * Returns true if index is one of the reserved default GID index otherwise
    168 * returns false.
    169 */
    170static bool is_gid_index_default(const struct ib_gid_table *table,
    171				 unsigned int index)
    172{
    173	return index < 32 && (BIT(index) & table->default_gid_indices);
    174}
    175
    176int ib_cache_gid_parse_type_str(const char *buf)
    177{
    178	unsigned int i;
    179	size_t len;
    180	int err = -EINVAL;
    181
    182	len = strlen(buf);
    183	if (len == 0)
    184		return -EINVAL;
    185
    186	if (buf[len - 1] == '\n')
    187		len--;
    188
    189	for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
    190		if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
    191		    len == strlen(gid_type_str[i])) {
    192			err = i;
    193			break;
    194		}
    195
    196	return err;
    197}
    198EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
    199
    200static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u32 port)
    201{
    202	return device->port_data[port].cache.gid;
    203}
    204
    205static bool is_gid_entry_free(const struct ib_gid_table_entry *entry)
    206{
    207	return !entry;
    208}
    209
    210static bool is_gid_entry_valid(const struct ib_gid_table_entry *entry)
    211{
    212	return entry && entry->state == GID_TABLE_ENTRY_VALID;
    213}
    214
    215static void schedule_free_gid(struct kref *kref)
    216{
    217	struct ib_gid_table_entry *entry =
    218			container_of(kref, struct ib_gid_table_entry, kref);
    219
    220	queue_work(ib_wq, &entry->del_work);
    221}
    222
    223static void put_gid_ndev(struct rcu_head *head)
    224{
    225	struct roce_gid_ndev_storage *storage =
    226		container_of(head, struct roce_gid_ndev_storage, rcu_head);
    227
    228	WARN_ON(!storage->ndev);
    229	/* At this point its safe to release netdev reference,
    230	 * as all callers working on gid_attr->ndev are done
    231	 * using this netdev.
    232	 */
    233	dev_put(storage->ndev);
    234	kfree(storage);
    235}
    236
    237static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
    238{
    239	struct ib_device *device = entry->attr.device;
    240	u32 port_num = entry->attr.port_num;
    241	struct ib_gid_table *table = rdma_gid_table(device, port_num);
    242
    243	dev_dbg(&device->dev, "%s port=%u index=%u gid %pI6\n", __func__,
    244		port_num, entry->attr.index, entry->attr.gid.raw);
    245
    246	write_lock_irq(&table->rwlock);
    247
    248	/*
    249	 * The only way to avoid overwriting NULL in table is
    250	 * by comparing if it is same entry in table or not!
    251	 * If new entry in table is added by the time we free here,
    252	 * don't overwrite the table entry.
    253	 */
    254	if (entry == table->data_vec[entry->attr.index])
    255		table->data_vec[entry->attr.index] = NULL;
    256	/* Now this index is ready to be allocated */
    257	write_unlock_irq(&table->rwlock);
    258
    259	if (entry->ndev_storage)
    260		call_rcu(&entry->ndev_storage->rcu_head, put_gid_ndev);
    261	kfree(entry);
    262}
    263
    264static void free_gid_entry(struct kref *kref)
    265{
    266	struct ib_gid_table_entry *entry =
    267			container_of(kref, struct ib_gid_table_entry, kref);
    268
    269	free_gid_entry_locked(entry);
    270}
    271
    272/**
    273 * free_gid_work - Release reference to the GID entry
    274 * @work: Work structure to refer to GID entry which needs to be
    275 * deleted.
    276 *
    277 * free_gid_work() frees the entry from the HCA's hardware table
    278 * if provider supports it. It releases reference to netdevice.
    279 */
    280static void free_gid_work(struct work_struct *work)
    281{
    282	struct ib_gid_table_entry *entry =
    283		container_of(work, struct ib_gid_table_entry, del_work);
    284	struct ib_device *device = entry->attr.device;
    285	u32 port_num = entry->attr.port_num;
    286	struct ib_gid_table *table = rdma_gid_table(device, port_num);
    287
    288	mutex_lock(&table->lock);
    289	free_gid_entry_locked(entry);
    290	mutex_unlock(&table->lock);
    291}
    292
    293static struct ib_gid_table_entry *
    294alloc_gid_entry(const struct ib_gid_attr *attr)
    295{
    296	struct ib_gid_table_entry *entry;
    297	struct net_device *ndev;
    298
    299	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
    300	if (!entry)
    301		return NULL;
    302
    303	ndev = rcu_dereference_protected(attr->ndev, 1);
    304	if (ndev) {
    305		entry->ndev_storage = kzalloc(sizeof(*entry->ndev_storage),
    306					      GFP_KERNEL);
    307		if (!entry->ndev_storage) {
    308			kfree(entry);
    309			return NULL;
    310		}
    311		dev_hold(ndev);
    312		entry->ndev_storage->ndev = ndev;
    313	}
    314	kref_init(&entry->kref);
    315	memcpy(&entry->attr, attr, sizeof(*attr));
    316	INIT_WORK(&entry->del_work, free_gid_work);
    317	entry->state = GID_TABLE_ENTRY_INVALID;
    318	return entry;
    319}
    320
    321static void store_gid_entry(struct ib_gid_table *table,
    322			    struct ib_gid_table_entry *entry)
    323{
    324	entry->state = GID_TABLE_ENTRY_VALID;
    325
    326	dev_dbg(&entry->attr.device->dev, "%s port=%u index=%u gid %pI6\n",
    327		__func__, entry->attr.port_num, entry->attr.index,
    328		entry->attr.gid.raw);
    329
    330	lockdep_assert_held(&table->lock);
    331	write_lock_irq(&table->rwlock);
    332	table->data_vec[entry->attr.index] = entry;
    333	write_unlock_irq(&table->rwlock);
    334}
    335
    336static void get_gid_entry(struct ib_gid_table_entry *entry)
    337{
    338	kref_get(&entry->kref);
    339}
    340
    341static void put_gid_entry(struct ib_gid_table_entry *entry)
    342{
    343	kref_put(&entry->kref, schedule_free_gid);
    344}
    345
    346static void put_gid_entry_locked(struct ib_gid_table_entry *entry)
    347{
    348	kref_put(&entry->kref, free_gid_entry);
    349}
    350
    351static int add_roce_gid(struct ib_gid_table_entry *entry)
    352{
    353	const struct ib_gid_attr *attr = &entry->attr;
    354	int ret;
    355
    356	if (!attr->ndev) {
    357		dev_err(&attr->device->dev, "%s NULL netdev port=%u index=%u\n",
    358			__func__, attr->port_num, attr->index);
    359		return -EINVAL;
    360	}
    361	if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
    362		ret = attr->device->ops.add_gid(attr, &entry->context);
    363		if (ret) {
    364			dev_err(&attr->device->dev,
    365				"%s GID add failed port=%u index=%u\n",
    366				__func__, attr->port_num, attr->index);
    367			return ret;
    368		}
    369	}
    370	return 0;
    371}
    372
    373/**
    374 * del_gid - Delete GID table entry
    375 *
    376 * @ib_dev:	IB device whose GID entry to be deleted
    377 * @port:	Port number of the IB device
    378 * @table:	GID table of the IB device for a port
    379 * @ix:		GID entry index to delete
    380 *
    381 */
    382static void del_gid(struct ib_device *ib_dev, u32 port,
    383		    struct ib_gid_table *table, int ix)
    384{
    385	struct roce_gid_ndev_storage *ndev_storage;
    386	struct ib_gid_table_entry *entry;
    387
    388	lockdep_assert_held(&table->lock);
    389
    390	dev_dbg(&ib_dev->dev, "%s port=%u index=%d gid %pI6\n", __func__, port,
    391		ix, table->data_vec[ix]->attr.gid.raw);
    392
    393	write_lock_irq(&table->rwlock);
    394	entry = table->data_vec[ix];
    395	entry->state = GID_TABLE_ENTRY_PENDING_DEL;
    396	/*
    397	 * For non RoCE protocol, GID entry slot is ready to use.
    398	 */
    399	if (!rdma_protocol_roce(ib_dev, port))
    400		table->data_vec[ix] = NULL;
    401	write_unlock_irq(&table->rwlock);
    402
    403	ndev_storage = entry->ndev_storage;
    404	if (ndev_storage) {
    405		entry->ndev_storage = NULL;
    406		rcu_assign_pointer(entry->attr.ndev, NULL);
    407		call_rcu(&ndev_storage->rcu_head, put_gid_ndev);
    408	}
    409
    410	if (rdma_cap_roce_gid_table(ib_dev, port))
    411		ib_dev->ops.del_gid(&entry->attr, &entry->context);
    412
    413	put_gid_entry_locked(entry);
    414}
    415
    416/**
    417 * add_modify_gid - Add or modify GID table entry
    418 *
    419 * @table:	GID table in which GID to be added or modified
    420 * @attr:	Attributes of the GID
    421 *
    422 * Returns 0 on success or appropriate error code. It accepts zero
    423 * GID addition for non RoCE ports for HCA's who report them as valid
    424 * GID. However such zero GIDs are not added to the cache.
    425 */
    426static int add_modify_gid(struct ib_gid_table *table,
    427			  const struct ib_gid_attr *attr)
    428{
    429	struct ib_gid_table_entry *entry;
    430	int ret = 0;
    431
    432	/*
    433	 * Invalidate any old entry in the table to make it safe to write to
    434	 * this index.
    435	 */
    436	if (is_gid_entry_valid(table->data_vec[attr->index]))
    437		del_gid(attr->device, attr->port_num, table, attr->index);
    438
    439	/*
    440	 * Some HCA's report multiple GID entries with only one valid GID, and
    441	 * leave other unused entries as the zero GID. Convert zero GIDs to
    442	 * empty table entries instead of storing them.
    443	 */
    444	if (rdma_is_zero_gid(&attr->gid))
    445		return 0;
    446
    447	entry = alloc_gid_entry(attr);
    448	if (!entry)
    449		return -ENOMEM;
    450
    451	if (rdma_protocol_roce(attr->device, attr->port_num)) {
    452		ret = add_roce_gid(entry);
    453		if (ret)
    454			goto done;
    455	}
    456
    457	store_gid_entry(table, entry);
    458	return 0;
    459
    460done:
    461	put_gid_entry(entry);
    462	return ret;
    463}
    464
    465/* rwlock should be read locked, or lock should be held */
    466static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
    467		    const struct ib_gid_attr *val, bool default_gid,
    468		    unsigned long mask, int *pempty)
    469{
    470	int i = 0;
    471	int found = -1;
    472	int empty = pempty ? -1 : 0;
    473
    474	while (i < table->sz && (found < 0 || empty < 0)) {
    475		struct ib_gid_table_entry *data = table->data_vec[i];
    476		struct ib_gid_attr *attr;
    477		int curr_index = i;
    478
    479		i++;
    480
    481		/* find_gid() is used during GID addition where it is expected
    482		 * to return a free entry slot which is not duplicate.
    483		 * Free entry slot is requested and returned if pempty is set,
    484		 * so lookup free slot only if requested.
    485		 */
    486		if (pempty && empty < 0) {
    487			if (is_gid_entry_free(data) &&
    488			    default_gid ==
    489				is_gid_index_default(table, curr_index)) {
    490				/*
    491				 * Found an invalid (free) entry; allocate it.
    492				 * If default GID is requested, then our
    493				 * found slot must be one of the DEFAULT
    494				 * reserved slots or we fail.
    495				 * This ensures that only DEFAULT reserved
    496				 * slots are used for default property GIDs.
    497				 */
    498				empty = curr_index;
    499			}
    500		}
    501
    502		/*
    503		 * Additionally find_gid() is used to find valid entry during
    504		 * lookup operation; so ignore the entries which are marked as
    505		 * pending for removal and the entries which are marked as
    506		 * invalid.
    507		 */
    508		if (!is_gid_entry_valid(data))
    509			continue;
    510
    511		if (found >= 0)
    512			continue;
    513
    514		attr = &data->attr;
    515		if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
    516		    attr->gid_type != val->gid_type)
    517			continue;
    518
    519		if (mask & GID_ATTR_FIND_MASK_GID &&
    520		    memcmp(gid, &data->attr.gid, sizeof(*gid)))
    521			continue;
    522
    523		if (mask & GID_ATTR_FIND_MASK_NETDEV &&
    524		    attr->ndev != val->ndev)
    525			continue;
    526
    527		if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
    528		    is_gid_index_default(table, curr_index) != default_gid)
    529			continue;
    530
    531		found = curr_index;
    532	}
    533
    534	if (pempty)
    535		*pempty = empty;
    536
    537	return found;
    538}
    539
    540static void make_default_gid(struct  net_device *dev, union ib_gid *gid)
    541{
    542	gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
    543	addrconf_ifid_eui48(&gid->raw[8], dev);
    544}
    545
    546static int __ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
    547			      union ib_gid *gid, struct ib_gid_attr *attr,
    548			      unsigned long mask, bool default_gid)
    549{
    550	struct ib_gid_table *table;
    551	int ret = 0;
    552	int empty;
    553	int ix;
    554
    555	/* Do not allow adding zero GID in support of
    556	 * IB spec version 1.3 section 4.1.1 point (6) and
    557	 * section 12.7.10 and section 12.7.20
    558	 */
    559	if (rdma_is_zero_gid(gid))
    560		return -EINVAL;
    561
    562	table = rdma_gid_table(ib_dev, port);
    563
    564	mutex_lock(&table->lock);
    565
    566	ix = find_gid(table, gid, attr, default_gid, mask, &empty);
    567	if (ix >= 0)
    568		goto out_unlock;
    569
    570	if (empty < 0) {
    571		ret = -ENOSPC;
    572		goto out_unlock;
    573	}
    574	attr->device = ib_dev;
    575	attr->index = empty;
    576	attr->port_num = port;
    577	attr->gid = *gid;
    578	ret = add_modify_gid(table, attr);
    579	if (!ret)
    580		dispatch_gid_change_event(ib_dev, port);
    581
    582out_unlock:
    583	mutex_unlock(&table->lock);
    584	if (ret)
    585		pr_warn("%s: unable to add gid %pI6 error=%d\n",
    586			__func__, gid->raw, ret);
    587	return ret;
    588}
    589
    590int ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
    591		     union ib_gid *gid, struct ib_gid_attr *attr)
    592{
    593	unsigned long mask = GID_ATTR_FIND_MASK_GID |
    594			     GID_ATTR_FIND_MASK_GID_TYPE |
    595			     GID_ATTR_FIND_MASK_NETDEV;
    596
    597	return __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
    598}
    599
    600static int
    601_ib_cache_gid_del(struct ib_device *ib_dev, u32 port,
    602		  union ib_gid *gid, struct ib_gid_attr *attr,
    603		  unsigned long mask, bool default_gid)
    604{
    605	struct ib_gid_table *table;
    606	int ret = 0;
    607	int ix;
    608
    609	table = rdma_gid_table(ib_dev, port);
    610
    611	mutex_lock(&table->lock);
    612
    613	ix = find_gid(table, gid, attr, default_gid, mask, NULL);
    614	if (ix < 0) {
    615		ret = -EINVAL;
    616		goto out_unlock;
    617	}
    618
    619	del_gid(ib_dev, port, table, ix);
    620	dispatch_gid_change_event(ib_dev, port);
    621
    622out_unlock:
    623	mutex_unlock(&table->lock);
    624	if (ret)
    625		pr_debug("%s: can't delete gid %pI6 error=%d\n",
    626			 __func__, gid->raw, ret);
    627	return ret;
    628}
    629
    630int ib_cache_gid_del(struct ib_device *ib_dev, u32 port,
    631		     union ib_gid *gid, struct ib_gid_attr *attr)
    632{
    633	unsigned long mask = GID_ATTR_FIND_MASK_GID	  |
    634			     GID_ATTR_FIND_MASK_GID_TYPE |
    635			     GID_ATTR_FIND_MASK_DEFAULT  |
    636			     GID_ATTR_FIND_MASK_NETDEV;
    637
    638	return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false);
    639}
    640
    641int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u32 port,
    642				     struct net_device *ndev)
    643{
    644	struct ib_gid_table *table;
    645	int ix;
    646	bool deleted = false;
    647
    648	table = rdma_gid_table(ib_dev, port);
    649
    650	mutex_lock(&table->lock);
    651
    652	for (ix = 0; ix < table->sz; ix++) {
    653		if (is_gid_entry_valid(table->data_vec[ix]) &&
    654		    table->data_vec[ix]->attr.ndev == ndev) {
    655			del_gid(ib_dev, port, table, ix);
    656			deleted = true;
    657		}
    658	}
    659
    660	mutex_unlock(&table->lock);
    661
    662	if (deleted)
    663		dispatch_gid_change_event(ib_dev, port);
    664
    665	return 0;
    666}
    667
    668/**
    669 * rdma_find_gid_by_port - Returns the GID entry attributes when it finds
    670 * a valid GID entry for given search parameters. It searches for the specified
    671 * GID value in the local software cache.
    672 * @ib_dev: The device to query.
    673 * @gid: The GID value to search for.
    674 * @gid_type: The GID type to search for.
    675 * @port: The port number of the device where the GID value should be searched.
    676 * @ndev: In RoCE, the net device of the device. NULL means ignore.
    677 *
    678 * Returns sgid attributes if the GID is found with valid reference or
    679 * returns ERR_PTR for the error.
    680 * The caller must invoke rdma_put_gid_attr() to release the reference.
    681 */
    682const struct ib_gid_attr *
    683rdma_find_gid_by_port(struct ib_device *ib_dev,
    684		      const union ib_gid *gid,
    685		      enum ib_gid_type gid_type,
    686		      u32 port, struct net_device *ndev)
    687{
    688	int local_index;
    689	struct ib_gid_table *table;
    690	unsigned long mask = GID_ATTR_FIND_MASK_GID |
    691			     GID_ATTR_FIND_MASK_GID_TYPE;
    692	struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
    693	const struct ib_gid_attr *attr;
    694	unsigned long flags;
    695
    696	if (!rdma_is_port_valid(ib_dev, port))
    697		return ERR_PTR(-ENOENT);
    698
    699	table = rdma_gid_table(ib_dev, port);
    700
    701	if (ndev)
    702		mask |= GID_ATTR_FIND_MASK_NETDEV;
    703
    704	read_lock_irqsave(&table->rwlock, flags);
    705	local_index = find_gid(table, gid, &val, false, mask, NULL);
    706	if (local_index >= 0) {
    707		get_gid_entry(table->data_vec[local_index]);
    708		attr = &table->data_vec[local_index]->attr;
    709		read_unlock_irqrestore(&table->rwlock, flags);
    710		return attr;
    711	}
    712
    713	read_unlock_irqrestore(&table->rwlock, flags);
    714	return ERR_PTR(-ENOENT);
    715}
    716EXPORT_SYMBOL(rdma_find_gid_by_port);
    717
    718/**
    719 * rdma_find_gid_by_filter - Returns the GID table attribute where a
    720 * specified GID value occurs
    721 * @ib_dev: The device to query.
    722 * @gid: The GID value to search for.
    723 * @port: The port number of the device where the GID value could be
    724 *   searched.
    725 * @filter: The filter function is executed on any matching GID in the table.
    726 *   If the filter function returns true, the corresponding index is returned,
    727 *   otherwise, we continue searching the GID table. It's guaranteed that
    728 *   while filter is executed, ndev field is valid and the structure won't
    729 *   change. filter is executed in an atomic context. filter must not be NULL.
    730 * @context: Private data to pass into the call-back.
    731 *
    732 * rdma_find_gid_by_filter() searches for the specified GID value
    733 * of which the filter function returns true in the port's GID table.
    734 *
    735 */
    736const struct ib_gid_attr *rdma_find_gid_by_filter(
    737	struct ib_device *ib_dev, const union ib_gid *gid, u32 port,
    738	bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
    739		       void *),
    740	void *context)
    741{
    742	const struct ib_gid_attr *res = ERR_PTR(-ENOENT);
    743	struct ib_gid_table *table;
    744	unsigned long flags;
    745	unsigned int i;
    746
    747	if (!rdma_is_port_valid(ib_dev, port))
    748		return ERR_PTR(-EINVAL);
    749
    750	table = rdma_gid_table(ib_dev, port);
    751
    752	read_lock_irqsave(&table->rwlock, flags);
    753	for (i = 0; i < table->sz; i++) {
    754		struct ib_gid_table_entry *entry = table->data_vec[i];
    755
    756		if (!is_gid_entry_valid(entry))
    757			continue;
    758
    759		if (memcmp(gid, &entry->attr.gid, sizeof(*gid)))
    760			continue;
    761
    762		if (filter(gid, &entry->attr, context)) {
    763			get_gid_entry(entry);
    764			res = &entry->attr;
    765			break;
    766		}
    767	}
    768	read_unlock_irqrestore(&table->rwlock, flags);
    769	return res;
    770}
    771
    772static struct ib_gid_table *alloc_gid_table(int sz)
    773{
    774	struct ib_gid_table *table = kzalloc(sizeof(*table), GFP_KERNEL);
    775
    776	if (!table)
    777		return NULL;
    778
    779	table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL);
    780	if (!table->data_vec)
    781		goto err_free_table;
    782
    783	mutex_init(&table->lock);
    784
    785	table->sz = sz;
    786	rwlock_init(&table->rwlock);
    787	return table;
    788
    789err_free_table:
    790	kfree(table);
    791	return NULL;
    792}
    793
    794static void release_gid_table(struct ib_device *device,
    795			      struct ib_gid_table *table)
    796{
    797	bool leak = false;
    798	int i;
    799
    800	if (!table)
    801		return;
    802
    803	for (i = 0; i < table->sz; i++) {
    804		if (is_gid_entry_free(table->data_vec[i]))
    805			continue;
    806		if (kref_read(&table->data_vec[i]->kref) > 1) {
    807			dev_err(&device->dev,
    808				"GID entry ref leak for index %d ref=%u\n", i,
    809				kref_read(&table->data_vec[i]->kref));
    810			leak = true;
    811		}
    812	}
    813	if (leak)
    814		return;
    815
    816	mutex_destroy(&table->lock);
    817	kfree(table->data_vec);
    818	kfree(table);
    819}
    820
    821static void cleanup_gid_table_port(struct ib_device *ib_dev, u32 port,
    822				   struct ib_gid_table *table)
    823{
    824	int i;
    825
    826	if (!table)
    827		return;
    828
    829	mutex_lock(&table->lock);
    830	for (i = 0; i < table->sz; ++i) {
    831		if (is_gid_entry_valid(table->data_vec[i]))
    832			del_gid(ib_dev, port, table, i);
    833	}
    834	mutex_unlock(&table->lock);
    835}
    836
    837void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u32 port,
    838				  struct net_device *ndev,
    839				  unsigned long gid_type_mask,
    840				  enum ib_cache_gid_default_mode mode)
    841{
    842	union ib_gid gid = { };
    843	struct ib_gid_attr gid_attr;
    844	unsigned int gid_type;
    845	unsigned long mask;
    846
    847	mask = GID_ATTR_FIND_MASK_GID_TYPE |
    848	       GID_ATTR_FIND_MASK_DEFAULT |
    849	       GID_ATTR_FIND_MASK_NETDEV;
    850	memset(&gid_attr, 0, sizeof(gid_attr));
    851	gid_attr.ndev = ndev;
    852
    853	for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
    854		if (1UL << gid_type & ~gid_type_mask)
    855			continue;
    856
    857		gid_attr.gid_type = gid_type;
    858
    859		if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
    860			make_default_gid(ndev, &gid);
    861			__ib_cache_gid_add(ib_dev, port, &gid,
    862					   &gid_attr, mask, true);
    863		} else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) {
    864			_ib_cache_gid_del(ib_dev, port, &gid,
    865					  &gid_attr, mask, true);
    866		}
    867	}
    868}
    869
    870static void gid_table_reserve_default(struct ib_device *ib_dev, u32 port,
    871				      struct ib_gid_table *table)
    872{
    873	unsigned int i;
    874	unsigned long roce_gid_type_mask;
    875	unsigned int num_default_gids;
    876
    877	roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
    878	num_default_gids = hweight_long(roce_gid_type_mask);
    879	/* Reserve starting indices for default GIDs */
    880	for (i = 0; i < num_default_gids && i < table->sz; i++)
    881		table->default_gid_indices |= BIT(i);
    882}
    883
    884
    885static void gid_table_release_one(struct ib_device *ib_dev)
    886{
    887	u32 p;
    888
    889	rdma_for_each_port (ib_dev, p) {
    890		release_gid_table(ib_dev, ib_dev->port_data[p].cache.gid);
    891		ib_dev->port_data[p].cache.gid = NULL;
    892	}
    893}
    894
    895static int _gid_table_setup_one(struct ib_device *ib_dev)
    896{
    897	struct ib_gid_table *table;
    898	u32 rdma_port;
    899
    900	rdma_for_each_port (ib_dev, rdma_port) {
    901		table = alloc_gid_table(
    902			ib_dev->port_data[rdma_port].immutable.gid_tbl_len);
    903		if (!table)
    904			goto rollback_table_setup;
    905
    906		gid_table_reserve_default(ib_dev, rdma_port, table);
    907		ib_dev->port_data[rdma_port].cache.gid = table;
    908	}
    909	return 0;
    910
    911rollback_table_setup:
    912	gid_table_release_one(ib_dev);
    913	return -ENOMEM;
    914}
    915
    916static void gid_table_cleanup_one(struct ib_device *ib_dev)
    917{
    918	u32 p;
    919
    920	rdma_for_each_port (ib_dev, p)
    921		cleanup_gid_table_port(ib_dev, p,
    922				       ib_dev->port_data[p].cache.gid);
    923}
    924
    925static int gid_table_setup_one(struct ib_device *ib_dev)
    926{
    927	int err;
    928
    929	err = _gid_table_setup_one(ib_dev);
    930
    931	if (err)
    932		return err;
    933
    934	rdma_roce_rescan_device(ib_dev);
    935
    936	return err;
    937}
    938
    939/**
    940 * rdma_query_gid - Read the GID content from the GID software cache
    941 * @device:		Device to query the GID
    942 * @port_num:		Port number of the device
    943 * @index:		Index of the GID table entry to read
    944 * @gid:		Pointer to GID where to store the entry's GID
    945 *
    946 * rdma_query_gid() only reads the GID entry content for requested device,
    947 * port and index. It reads for IB, RoCE and iWarp link layers.  It doesn't
    948 * hold any reference to the GID table entry in the HCA or software cache.
    949 *
    950 * Returns 0 on success or appropriate error code.
    951 *
    952 */
    953int rdma_query_gid(struct ib_device *device, u32 port_num,
    954		   int index, union ib_gid *gid)
    955{
    956	struct ib_gid_table *table;
    957	unsigned long flags;
    958	int res;
    959
    960	if (!rdma_is_port_valid(device, port_num))
    961		return -EINVAL;
    962
    963	table = rdma_gid_table(device, port_num);
    964	read_lock_irqsave(&table->rwlock, flags);
    965
    966	if (index < 0 || index >= table->sz) {
    967		res = -EINVAL;
    968		goto done;
    969	}
    970
    971	if (!is_gid_entry_valid(table->data_vec[index])) {
    972		res = -ENOENT;
    973		goto done;
    974	}
    975
    976	memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid));
    977	res = 0;
    978
    979done:
    980	read_unlock_irqrestore(&table->rwlock, flags);
    981	return res;
    982}
    983EXPORT_SYMBOL(rdma_query_gid);
    984
    985/**
    986 * rdma_read_gid_hw_context - Read the HW GID context from GID attribute
    987 * @attr:		Potinter to the GID attribute
    988 *
    989 * rdma_read_gid_hw_context() reads the drivers GID HW context corresponding
    990 * to the SGID attr. Callers are required to already be holding the reference
    991 * to an existing GID entry.
    992 *
    993 * Returns the HW GID context
    994 *
    995 */
    996void *rdma_read_gid_hw_context(const struct ib_gid_attr *attr)
    997{
    998	return container_of(attr, struct ib_gid_table_entry, attr)->context;
    999}
   1000EXPORT_SYMBOL(rdma_read_gid_hw_context);
   1001
   1002/**
   1003 * rdma_find_gid - Returns SGID attributes if the matching GID is found.
   1004 * @device: The device to query.
   1005 * @gid: The GID value to search for.
   1006 * @gid_type: The GID type to search for.
   1007 * @ndev: In RoCE, the net device of the device. NULL means ignore.
   1008 *
   1009 * rdma_find_gid() searches for the specified GID value in the software cache.
   1010 *
   1011 * Returns GID attributes if a valid GID is found or returns ERR_PTR for the
   1012 * error. The caller must invoke rdma_put_gid_attr() to release the reference.
   1013 *
   1014 */
   1015const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
   1016					const union ib_gid *gid,
   1017					enum ib_gid_type gid_type,
   1018					struct net_device *ndev)
   1019{
   1020	unsigned long mask = GID_ATTR_FIND_MASK_GID |
   1021			     GID_ATTR_FIND_MASK_GID_TYPE;
   1022	struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
   1023	u32 p;
   1024
   1025	if (ndev)
   1026		mask |= GID_ATTR_FIND_MASK_NETDEV;
   1027
   1028	rdma_for_each_port(device, p) {
   1029		struct ib_gid_table *table;
   1030		unsigned long flags;
   1031		int index;
   1032
   1033		table = device->port_data[p].cache.gid;
   1034		read_lock_irqsave(&table->rwlock, flags);
   1035		index = find_gid(table, gid, &gid_attr_val, false, mask, NULL);
   1036		if (index >= 0) {
   1037			const struct ib_gid_attr *attr;
   1038
   1039			get_gid_entry(table->data_vec[index]);
   1040			attr = &table->data_vec[index]->attr;
   1041			read_unlock_irqrestore(&table->rwlock, flags);
   1042			return attr;
   1043		}
   1044		read_unlock_irqrestore(&table->rwlock, flags);
   1045	}
   1046
   1047	return ERR_PTR(-ENOENT);
   1048}
   1049EXPORT_SYMBOL(rdma_find_gid);
   1050
   1051int ib_get_cached_pkey(struct ib_device *device,
   1052		       u32               port_num,
   1053		       int               index,
   1054		       u16              *pkey)
   1055{
   1056	struct ib_pkey_cache *cache;
   1057	unsigned long flags;
   1058	int ret = 0;
   1059
   1060	if (!rdma_is_port_valid(device, port_num))
   1061		return -EINVAL;
   1062
   1063	read_lock_irqsave(&device->cache_lock, flags);
   1064
   1065	cache = device->port_data[port_num].cache.pkey;
   1066
   1067	if (!cache || index < 0 || index >= cache->table_len)
   1068		ret = -EINVAL;
   1069	else
   1070		*pkey = cache->table[index];
   1071
   1072	read_unlock_irqrestore(&device->cache_lock, flags);
   1073
   1074	return ret;
   1075}
   1076EXPORT_SYMBOL(ib_get_cached_pkey);
   1077
   1078void ib_get_cached_subnet_prefix(struct ib_device *device, u32 port_num,
   1079				u64 *sn_pfx)
   1080{
   1081	unsigned long flags;
   1082
   1083	read_lock_irqsave(&device->cache_lock, flags);
   1084	*sn_pfx = device->port_data[port_num].cache.subnet_prefix;
   1085	read_unlock_irqrestore(&device->cache_lock, flags);
   1086}
   1087EXPORT_SYMBOL(ib_get_cached_subnet_prefix);
   1088
   1089int ib_find_cached_pkey(struct ib_device *device, u32 port_num,
   1090			u16 pkey, u16 *index)
   1091{
   1092	struct ib_pkey_cache *cache;
   1093	unsigned long flags;
   1094	int i;
   1095	int ret = -ENOENT;
   1096	int partial_ix = -1;
   1097
   1098	if (!rdma_is_port_valid(device, port_num))
   1099		return -EINVAL;
   1100
   1101	read_lock_irqsave(&device->cache_lock, flags);
   1102
   1103	cache = device->port_data[port_num].cache.pkey;
   1104	if (!cache) {
   1105		ret = -EINVAL;
   1106		goto err;
   1107	}
   1108
   1109	*index = -1;
   1110
   1111	for (i = 0; i < cache->table_len; ++i)
   1112		if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
   1113			if (cache->table[i] & 0x8000) {
   1114				*index = i;
   1115				ret = 0;
   1116				break;
   1117			} else {
   1118				partial_ix = i;
   1119			}
   1120		}
   1121
   1122	if (ret && partial_ix >= 0) {
   1123		*index = partial_ix;
   1124		ret = 0;
   1125	}
   1126
   1127err:
   1128	read_unlock_irqrestore(&device->cache_lock, flags);
   1129
   1130	return ret;
   1131}
   1132EXPORT_SYMBOL(ib_find_cached_pkey);
   1133
   1134int ib_find_exact_cached_pkey(struct ib_device *device, u32 port_num,
   1135			      u16 pkey, u16 *index)
   1136{
   1137	struct ib_pkey_cache *cache;
   1138	unsigned long flags;
   1139	int i;
   1140	int ret = -ENOENT;
   1141
   1142	if (!rdma_is_port_valid(device, port_num))
   1143		return -EINVAL;
   1144
   1145	read_lock_irqsave(&device->cache_lock, flags);
   1146
   1147	cache = device->port_data[port_num].cache.pkey;
   1148	if (!cache) {
   1149		ret = -EINVAL;
   1150		goto err;
   1151	}
   1152
   1153	*index = -1;
   1154
   1155	for (i = 0; i < cache->table_len; ++i)
   1156		if (cache->table[i] == pkey) {
   1157			*index = i;
   1158			ret = 0;
   1159			break;
   1160		}
   1161
   1162err:
   1163	read_unlock_irqrestore(&device->cache_lock, flags);
   1164
   1165	return ret;
   1166}
   1167EXPORT_SYMBOL(ib_find_exact_cached_pkey);
   1168
   1169int ib_get_cached_lmc(struct ib_device *device, u32 port_num, u8 *lmc)
   1170{
   1171	unsigned long flags;
   1172	int ret = 0;
   1173
   1174	if (!rdma_is_port_valid(device, port_num))
   1175		return -EINVAL;
   1176
   1177	read_lock_irqsave(&device->cache_lock, flags);
   1178	*lmc = device->port_data[port_num].cache.lmc;
   1179	read_unlock_irqrestore(&device->cache_lock, flags);
   1180
   1181	return ret;
   1182}
   1183EXPORT_SYMBOL(ib_get_cached_lmc);
   1184
   1185int ib_get_cached_port_state(struct ib_device *device, u32 port_num,
   1186			     enum ib_port_state *port_state)
   1187{
   1188	unsigned long flags;
   1189	int ret = 0;
   1190
   1191	if (!rdma_is_port_valid(device, port_num))
   1192		return -EINVAL;
   1193
   1194	read_lock_irqsave(&device->cache_lock, flags);
   1195	*port_state = device->port_data[port_num].cache.port_state;
   1196	read_unlock_irqrestore(&device->cache_lock, flags);
   1197
   1198	return ret;
   1199}
   1200EXPORT_SYMBOL(ib_get_cached_port_state);
   1201
   1202/**
   1203 * rdma_get_gid_attr - Returns GID attributes for a port of a device
   1204 * at a requested gid_index, if a valid GID entry exists.
   1205 * @device:		The device to query.
   1206 * @port_num:		The port number on the device where the GID value
   1207 *			is to be queried.
   1208 * @index:		Index of the GID table entry whose attributes are to
   1209 *                      be queried.
   1210 *
   1211 * rdma_get_gid_attr() acquires reference count of gid attributes from the
   1212 * cached GID table. Caller must invoke rdma_put_gid_attr() to release
   1213 * reference to gid attribute regardless of link layer.
   1214 *
   1215 * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error
   1216 * code.
   1217 */
   1218const struct ib_gid_attr *
   1219rdma_get_gid_attr(struct ib_device *device, u32 port_num, int index)
   1220{
   1221	const struct ib_gid_attr *attr = ERR_PTR(-ENODATA);
   1222	struct ib_gid_table *table;
   1223	unsigned long flags;
   1224
   1225	if (!rdma_is_port_valid(device, port_num))
   1226		return ERR_PTR(-EINVAL);
   1227
   1228	table = rdma_gid_table(device, port_num);
   1229	if (index < 0 || index >= table->sz)
   1230		return ERR_PTR(-EINVAL);
   1231
   1232	read_lock_irqsave(&table->rwlock, flags);
   1233	if (!is_gid_entry_valid(table->data_vec[index]))
   1234		goto done;
   1235
   1236	get_gid_entry(table->data_vec[index]);
   1237	attr = &table->data_vec[index]->attr;
   1238done:
   1239	read_unlock_irqrestore(&table->rwlock, flags);
   1240	return attr;
   1241}
   1242EXPORT_SYMBOL(rdma_get_gid_attr);
   1243
   1244/**
   1245 * rdma_query_gid_table - Reads GID table entries of all the ports of a device up to max_entries.
   1246 * @device: The device to query.
   1247 * @entries: Entries where GID entries are returned.
   1248 * @max_entries: Maximum number of entries that can be returned.
   1249 * Entries array must be allocated to hold max_entries number of entries.
   1250 *
   1251 * Returns number of entries on success or appropriate error code.
   1252 */
   1253ssize_t rdma_query_gid_table(struct ib_device *device,
   1254			     struct ib_uverbs_gid_entry *entries,
   1255			     size_t max_entries)
   1256{
   1257	const struct ib_gid_attr *gid_attr;
   1258	ssize_t num_entries = 0, ret;
   1259	struct ib_gid_table *table;
   1260	u32 port_num, i;
   1261	struct net_device *ndev;
   1262	unsigned long flags;
   1263
   1264	rdma_for_each_port(device, port_num) {
   1265		table = rdma_gid_table(device, port_num);
   1266		read_lock_irqsave(&table->rwlock, flags);
   1267		for (i = 0; i < table->sz; i++) {
   1268			if (!is_gid_entry_valid(table->data_vec[i]))
   1269				continue;
   1270			if (num_entries >= max_entries) {
   1271				ret = -EINVAL;
   1272				goto err;
   1273			}
   1274
   1275			gid_attr = &table->data_vec[i]->attr;
   1276
   1277			memcpy(&entries->gid, &gid_attr->gid,
   1278			       sizeof(gid_attr->gid));
   1279			entries->gid_index = gid_attr->index;
   1280			entries->port_num = gid_attr->port_num;
   1281			entries->gid_type = gid_attr->gid_type;
   1282			ndev = rcu_dereference_protected(
   1283				gid_attr->ndev,
   1284				lockdep_is_held(&table->rwlock));
   1285			if (ndev)
   1286				entries->netdev_ifindex = ndev->ifindex;
   1287
   1288			num_entries++;
   1289			entries++;
   1290		}
   1291		read_unlock_irqrestore(&table->rwlock, flags);
   1292	}
   1293
   1294	return num_entries;
   1295err:
   1296	read_unlock_irqrestore(&table->rwlock, flags);
   1297	return ret;
   1298}
   1299EXPORT_SYMBOL(rdma_query_gid_table);
   1300
   1301/**
   1302 * rdma_put_gid_attr - Release reference to the GID attribute
   1303 * @attr:		Pointer to the GID attribute whose reference
   1304 *			needs to be released.
   1305 *
   1306 * rdma_put_gid_attr() must be used to release reference whose
   1307 * reference is acquired using rdma_get_gid_attr() or any APIs
   1308 * which returns a pointer to the ib_gid_attr regardless of link layer
   1309 * of IB or RoCE.
   1310 *
   1311 */
   1312void rdma_put_gid_attr(const struct ib_gid_attr *attr)
   1313{
   1314	struct ib_gid_table_entry *entry =
   1315		container_of(attr, struct ib_gid_table_entry, attr);
   1316
   1317	put_gid_entry(entry);
   1318}
   1319EXPORT_SYMBOL(rdma_put_gid_attr);
   1320
   1321/**
   1322 * rdma_hold_gid_attr - Get reference to existing GID attribute
   1323 *
   1324 * @attr:		Pointer to the GID attribute whose reference
   1325 *			needs to be taken.
   1326 *
   1327 * Increase the reference count to a GID attribute to keep it from being
   1328 * freed. Callers are required to already be holding a reference to attribute.
   1329 *
   1330 */
   1331void rdma_hold_gid_attr(const struct ib_gid_attr *attr)
   1332{
   1333	struct ib_gid_table_entry *entry =
   1334		container_of(attr, struct ib_gid_table_entry, attr);
   1335
   1336	get_gid_entry(entry);
   1337}
   1338EXPORT_SYMBOL(rdma_hold_gid_attr);
   1339
   1340/**
   1341 * rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice
   1342 * which must be in UP state.
   1343 *
   1344 * @attr:Pointer to the GID attribute
   1345 *
   1346 * Returns pointer to netdevice if the netdevice was attached to GID and
   1347 * netdevice is in UP state. Caller must hold RCU lock as this API
   1348 * reads the netdev flags which can change while netdevice migrates to
   1349 * different net namespace. Returns ERR_PTR with error code otherwise.
   1350 *
   1351 */
   1352struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
   1353{
   1354	struct ib_gid_table_entry *entry =
   1355			container_of(attr, struct ib_gid_table_entry, attr);
   1356	struct ib_device *device = entry->attr.device;
   1357	struct net_device *ndev = ERR_PTR(-EINVAL);
   1358	u32 port_num = entry->attr.port_num;
   1359	struct ib_gid_table *table;
   1360	unsigned long flags;
   1361	bool valid;
   1362
   1363	table = rdma_gid_table(device, port_num);
   1364
   1365	read_lock_irqsave(&table->rwlock, flags);
   1366	valid = is_gid_entry_valid(table->data_vec[attr->index]);
   1367	if (valid) {
   1368		ndev = rcu_dereference(attr->ndev);
   1369		if (!ndev)
   1370			ndev = ERR_PTR(-ENODEV);
   1371	}
   1372	read_unlock_irqrestore(&table->rwlock, flags);
   1373	return ndev;
   1374}
   1375EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu);
   1376
   1377static int get_lower_dev_vlan(struct net_device *lower_dev,
   1378			      struct netdev_nested_priv *priv)
   1379{
   1380	u16 *vlan_id = (u16 *)priv->data;
   1381
   1382	if (is_vlan_dev(lower_dev))
   1383		*vlan_id = vlan_dev_vlan_id(lower_dev);
   1384
   1385	/* We are interested only in first level vlan device, so
   1386	 * always return 1 to stop iterating over next level devices.
   1387	 */
   1388	return 1;
   1389}
   1390
   1391/**
   1392 * rdma_read_gid_l2_fields - Read the vlan ID and source MAC address
   1393 *			     of a GID entry.
   1394 *
   1395 * @attr:	GID attribute pointer whose L2 fields to be read
   1396 * @vlan_id:	Pointer to vlan id to fill up if the GID entry has
   1397 *		vlan id. It is optional.
   1398 * @smac:	Pointer to smac to fill up for a GID entry. It is optional.
   1399 *
   1400 * rdma_read_gid_l2_fields() returns 0 on success and returns vlan id
   1401 * (if gid entry has vlan) and source MAC, or returns error.
   1402 */
   1403int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
   1404			    u16 *vlan_id, u8 *smac)
   1405{
   1406	struct netdev_nested_priv priv = {
   1407		.data = (void *)vlan_id,
   1408	};
   1409	struct net_device *ndev;
   1410
   1411	rcu_read_lock();
   1412	ndev = rcu_dereference(attr->ndev);
   1413	if (!ndev) {
   1414		rcu_read_unlock();
   1415		return -ENODEV;
   1416	}
   1417	if (smac)
   1418		ether_addr_copy(smac, ndev->dev_addr);
   1419	if (vlan_id) {
   1420		*vlan_id = 0xffff;
   1421		if (is_vlan_dev(ndev)) {
   1422			*vlan_id = vlan_dev_vlan_id(ndev);
   1423		} else {
   1424			/* If the netdev is upper device and if it's lower
   1425			 * device is vlan device, consider vlan id of the
   1426			 * the lower vlan device for this gid entry.
   1427			 */
   1428			netdev_walk_all_lower_dev_rcu(attr->ndev,
   1429					get_lower_dev_vlan, &priv);
   1430		}
   1431	}
   1432	rcu_read_unlock();
   1433	return 0;
   1434}
   1435EXPORT_SYMBOL(rdma_read_gid_l2_fields);
   1436
   1437static int config_non_roce_gid_cache(struct ib_device *device,
   1438				     u32 port, struct ib_port_attr *tprops)
   1439{
   1440	struct ib_gid_attr gid_attr = {};
   1441	struct ib_gid_table *table;
   1442	int ret = 0;
   1443	int i;
   1444
   1445	gid_attr.device = device;
   1446	gid_attr.port_num = port;
   1447	table = rdma_gid_table(device, port);
   1448
   1449	mutex_lock(&table->lock);
   1450	for (i = 0; i < tprops->gid_tbl_len; ++i) {
   1451		if (!device->ops.query_gid)
   1452			continue;
   1453		ret = device->ops.query_gid(device, port, i, &gid_attr.gid);
   1454		if (ret) {
   1455			dev_warn(&device->dev,
   1456				 "query_gid failed (%d) for index %d\n", ret,
   1457				 i);
   1458			goto err;
   1459		}
   1460		gid_attr.index = i;
   1461		tprops->subnet_prefix =
   1462			be64_to_cpu(gid_attr.gid.global.subnet_prefix);
   1463		add_modify_gid(table, &gid_attr);
   1464	}
   1465err:
   1466	mutex_unlock(&table->lock);
   1467	return ret;
   1468}
   1469
   1470static int
   1471ib_cache_update(struct ib_device *device, u32 port, bool update_gids,
   1472		bool update_pkeys, bool enforce_security)
   1473{
   1474	struct ib_port_attr       *tprops = NULL;
   1475	struct ib_pkey_cache      *pkey_cache = NULL;
   1476	struct ib_pkey_cache      *old_pkey_cache = NULL;
   1477	int                        i;
   1478	int                        ret;
   1479
   1480	if (!rdma_is_port_valid(device, port))
   1481		return -EINVAL;
   1482
   1483	tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
   1484	if (!tprops)
   1485		return -ENOMEM;
   1486
   1487	ret = ib_query_port(device, port, tprops);
   1488	if (ret) {
   1489		dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret);
   1490		goto err;
   1491	}
   1492
   1493	if (!rdma_protocol_roce(device, port) && update_gids) {
   1494		ret = config_non_roce_gid_cache(device, port,
   1495						tprops);
   1496		if (ret)
   1497			goto err;
   1498	}
   1499
   1500	update_pkeys &= !!tprops->pkey_tbl_len;
   1501
   1502	if (update_pkeys) {
   1503		pkey_cache = kmalloc(struct_size(pkey_cache, table,
   1504						 tprops->pkey_tbl_len),
   1505				     GFP_KERNEL);
   1506		if (!pkey_cache) {
   1507			ret = -ENOMEM;
   1508			goto err;
   1509		}
   1510
   1511		pkey_cache->table_len = tprops->pkey_tbl_len;
   1512
   1513		for (i = 0; i < pkey_cache->table_len; ++i) {
   1514			ret = ib_query_pkey(device, port, i,
   1515					    pkey_cache->table + i);
   1516			if (ret) {
   1517				dev_warn(&device->dev,
   1518					 "ib_query_pkey failed (%d) for index %d\n",
   1519					 ret, i);
   1520				goto err;
   1521			}
   1522		}
   1523	}
   1524
   1525	write_lock_irq(&device->cache_lock);
   1526
   1527	if (update_pkeys) {
   1528		old_pkey_cache = device->port_data[port].cache.pkey;
   1529		device->port_data[port].cache.pkey = pkey_cache;
   1530	}
   1531	device->port_data[port].cache.lmc = tprops->lmc;
   1532	device->port_data[port].cache.port_state = tprops->state;
   1533
   1534	device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
   1535	write_unlock_irq(&device->cache_lock);
   1536
   1537	if (enforce_security)
   1538		ib_security_cache_change(device,
   1539					 port,
   1540					 tprops->subnet_prefix);
   1541
   1542	kfree(old_pkey_cache);
   1543	kfree(tprops);
   1544	return 0;
   1545
   1546err:
   1547	kfree(pkey_cache);
   1548	kfree(tprops);
   1549	return ret;
   1550}
   1551
   1552static void ib_cache_event_task(struct work_struct *_work)
   1553{
   1554	struct ib_update_work *work =
   1555		container_of(_work, struct ib_update_work, work);
   1556	int ret;
   1557
   1558	/* Before distributing the cache update event, first sync
   1559	 * the cache.
   1560	 */
   1561	ret = ib_cache_update(work->event.device, work->event.element.port_num,
   1562			      work->event.event == IB_EVENT_GID_CHANGE,
   1563			      work->event.event == IB_EVENT_PKEY_CHANGE,
   1564			      work->enforce_security);
   1565
   1566	/* GID event is notified already for individual GID entries by
   1567	 * dispatch_gid_change_event(). Hence, notifiy for rest of the
   1568	 * events.
   1569	 */
   1570	if (!ret && work->event.event != IB_EVENT_GID_CHANGE)
   1571		ib_dispatch_event_clients(&work->event);
   1572
   1573	kfree(work);
   1574}
   1575
   1576static void ib_generic_event_task(struct work_struct *_work)
   1577{
   1578	struct ib_update_work *work =
   1579		container_of(_work, struct ib_update_work, work);
   1580
   1581	ib_dispatch_event_clients(&work->event);
   1582	kfree(work);
   1583}
   1584
   1585static bool is_cache_update_event(const struct ib_event *event)
   1586{
   1587	return (event->event == IB_EVENT_PORT_ERR    ||
   1588		event->event == IB_EVENT_PORT_ACTIVE ||
   1589		event->event == IB_EVENT_LID_CHANGE  ||
   1590		event->event == IB_EVENT_PKEY_CHANGE ||
   1591		event->event == IB_EVENT_CLIENT_REREGISTER ||
   1592		event->event == IB_EVENT_GID_CHANGE);
   1593}
   1594
   1595/**
   1596 * ib_dispatch_event - Dispatch an asynchronous event
   1597 * @event:Event to dispatch
   1598 *
   1599 * Low-level drivers must call ib_dispatch_event() to dispatch the
   1600 * event to all registered event handlers when an asynchronous event
   1601 * occurs.
   1602 */
   1603void ib_dispatch_event(const struct ib_event *event)
   1604{
   1605	struct ib_update_work *work;
   1606
   1607	work = kzalloc(sizeof(*work), GFP_ATOMIC);
   1608	if (!work)
   1609		return;
   1610
   1611	if (is_cache_update_event(event))
   1612		INIT_WORK(&work->work, ib_cache_event_task);
   1613	else
   1614		INIT_WORK(&work->work, ib_generic_event_task);
   1615
   1616	work->event = *event;
   1617	if (event->event == IB_EVENT_PKEY_CHANGE ||
   1618	    event->event == IB_EVENT_GID_CHANGE)
   1619		work->enforce_security = true;
   1620
   1621	queue_work(ib_wq, &work->work);
   1622}
   1623EXPORT_SYMBOL(ib_dispatch_event);
   1624
   1625int ib_cache_setup_one(struct ib_device *device)
   1626{
   1627	u32 p;
   1628	int err;
   1629
   1630	err = gid_table_setup_one(device);
   1631	if (err)
   1632		return err;
   1633
   1634	rdma_for_each_port (device, p) {
   1635		err = ib_cache_update(device, p, true, true, true);
   1636		if (err)
   1637			return err;
   1638	}
   1639
   1640	return 0;
   1641}
   1642
   1643void ib_cache_release_one(struct ib_device *device)
   1644{
   1645	u32 p;
   1646
   1647	/*
   1648	 * The release function frees all the cache elements.
   1649	 * This function should be called as part of freeing
   1650	 * all the device's resources when the cache could no
   1651	 * longer be accessed.
   1652	 */
   1653	rdma_for_each_port (device, p)
   1654		kfree(device->port_data[p].cache.pkey);
   1655
   1656	gid_table_release_one(device);
   1657}
   1658
   1659void ib_cache_cleanup_one(struct ib_device *device)
   1660{
   1661	/* The cleanup function waits for all in-progress workqueue
   1662	 * elements and cleans up the GID cache. This function should be
   1663	 * called after the device was removed from the devices list and
   1664	 * all clients were removed, so the cache exists but is
   1665	 * non-functional and shouldn't be updated anymore.
   1666	 */
   1667	flush_workqueue(ib_wq);
   1668	gid_table_cleanup_one(device);
   1669
   1670	/*
   1671	 * Flush the wq second time for any pending GID delete work.
   1672	 */
   1673	flush_workqueue(ib_wq);
   1674}