cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

fs_counters.c (21141B)


      1/*
      2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
      3 *
      4 * This software is available to you under a choice of one of two
      5 * licenses.  You may choose to be licensed under the terms of the GNU
      6 * General Public License (GPL) Version 2, available from the file
      7 * COPYING in the main directory of this source tree, or the
      8 * OpenIB.org BSD license below:
      9 *
     10 *     Redistribution and use in source and binary forms, with or
     11 *     without modification, are permitted provided that the following
     12 *     conditions are met:
     13 *
     14 *      - Redistributions of source code must retain the above
     15 *        copyright notice, this list of conditions and the following
     16 *        disclaimer.
     17 *
     18 *      - Redistributions in binary form must reproduce the above
     19 *        copyright notice, this list of conditions and the following
     20 *        disclaimer in the documentation and/or other materials
     21 *        provided with the distribution.
     22 *
     23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     30 * SOFTWARE.
     31 */
     32
     33#include <linux/mlx5/driver.h>
     34#include <linux/mlx5/fs.h>
     35#include <linux/rbtree.h>
     36#include "mlx5_core.h"
     37#include "fs_core.h"
     38#include "fs_cmd.h"
     39
     40#define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
     41#define MLX5_FC_BULK_QUERY_ALLOC_PERIOD msecs_to_jiffies(180 * 1000)
     42/* Max number of counters to query in bulk read is 32K */
     43#define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
     44#define MLX5_INIT_COUNTERS_BULK 8
     45#define MLX5_FC_POOL_MAX_THRESHOLD BIT(18)
     46#define MLX5_FC_POOL_USED_BUFF_RATIO 10
     47
     48struct mlx5_fc_cache {
     49	u64 packets;
     50	u64 bytes;
     51	u64 lastuse;
     52};
     53
     54struct mlx5_fc {
     55	struct list_head list;
     56	struct llist_node addlist;
     57	struct llist_node dellist;
     58
     59	/* last{packets,bytes} members are used when calculating the delta since
     60	 * last reading
     61	 */
     62	u64 lastpackets;
     63	u64 lastbytes;
     64
     65	struct mlx5_fc_bulk *bulk;
     66	u32 id;
     67	bool aging;
     68
     69	struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
     70};
     71
     72static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev);
     73static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool);
     74static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool);
     75static void mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc);
     76
     77/* locking scheme:
     78 *
     79 * It is the responsibility of the user to prevent concurrent calls or bad
     80 * ordering to mlx5_fc_create(), mlx5_fc_destroy() and accessing a reference
     81 * to struct mlx5_fc.
     82 * e.g en_tc.c is protected by RTNL lock of its caller, and will never call a
     83 * dump (access to struct mlx5_fc) after a counter is destroyed.
     84 *
     85 * access to counter list:
     86 * - create (user context)
     87 *   - mlx5_fc_create() only adds to an addlist to be used by
     88 *     mlx5_fc_stats_work(). addlist is a lockless single linked list
     89 *     that doesn't require any additional synchronization when adding single
     90 *     node.
     91 *   - spawn thread to do the actual destroy
     92 *
     93 * - destroy (user context)
     94 *   - add a counter to lockless dellist
     95 *   - spawn thread to do the actual del
     96 *
     97 * - dump (user context)
     98 *   user should not call dump after destroy
     99 *
    100 * - query (single thread workqueue context)
    101 *   destroy/dump - no conflict (see destroy)
    102 *   query/dump - packets and bytes might be inconsistent (since update is not
    103 *                atomic)
    104 *   query/create - no conflict (see create)
    105 *   since every create/destroy spawn the work, only after necessary time has
    106 *   elapsed, the thread will actually query the hardware.
    107 */
    108
    109static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev *dev,
    110						      u32 id)
    111{
    112	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
    113	unsigned long next_id = (unsigned long)id + 1;
    114	struct mlx5_fc *counter;
    115	unsigned long tmp;
    116
    117	rcu_read_lock();
    118	/* skip counters that are in idr, but not yet in counters list */
    119	idr_for_each_entry_continue_ul(&fc_stats->counters_idr,
    120				       counter, tmp, next_id) {
    121		if (!list_empty(&counter->list))
    122			break;
    123	}
    124	rcu_read_unlock();
    125
    126	return counter ? &counter->list : &fc_stats->counters;
    127}
    128
    129static void mlx5_fc_stats_insert(struct mlx5_core_dev *dev,
    130				 struct mlx5_fc *counter)
    131{
    132	struct list_head *next = mlx5_fc_counters_lookup_next(dev, counter->id);
    133
    134	list_add_tail(&counter->list, next);
    135}
    136
    137static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev,
    138				 struct mlx5_fc *counter)
    139{
    140	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
    141
    142	list_del(&counter->list);
    143
    144	spin_lock(&fc_stats->counters_idr_lock);
    145	WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id));
    146	spin_unlock(&fc_stats->counters_idr_lock);
    147}
    148
    149static int get_init_bulk_query_len(struct mlx5_core_dev *dev)
    150{
    151	return min_t(int, MLX5_INIT_COUNTERS_BULK,
    152		     (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
    153}
    154
    155static int get_max_bulk_query_len(struct mlx5_core_dev *dev)
    156{
    157	return min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
    158		     (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
    159}
    160
    161static void update_counter_cache(int index, u32 *bulk_raw_data,
    162				 struct mlx5_fc_cache *cache)
    163{
    164	void *stats = MLX5_ADDR_OF(query_flow_counter_out, bulk_raw_data,
    165			     flow_statistics[index]);
    166	u64 packets = MLX5_GET64(traffic_counter, stats, packets);
    167	u64 bytes = MLX5_GET64(traffic_counter, stats, octets);
    168
    169	if (cache->packets == packets)
    170		return;
    171
    172	cache->packets = packets;
    173	cache->bytes = bytes;
    174	cache->lastuse = jiffies;
    175}
    176
    177static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev,
    178					      struct mlx5_fc *first,
    179					      u32 last_id)
    180{
    181	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
    182	bool query_more_counters = (first->id <= last_id);
    183	int cur_bulk_len = fc_stats->bulk_query_len;
    184	u32 *data = fc_stats->bulk_query_out;
    185	struct mlx5_fc *counter = first;
    186	u32 bulk_base_id;
    187	int bulk_len;
    188	int err;
    189
    190	while (query_more_counters) {
    191		/* first id must be aligned to 4 when using bulk query */
    192		bulk_base_id = counter->id & ~0x3;
    193
    194		/* number of counters to query inc. the last counter */
    195		bulk_len = min_t(int, cur_bulk_len,
    196				 ALIGN(last_id - bulk_base_id + 1, 4));
    197
    198		err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len,
    199					     data);
    200		if (err) {
    201			mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
    202			return;
    203		}
    204		query_more_counters = false;
    205
    206		list_for_each_entry_from(counter, &fc_stats->counters, list) {
    207			int counter_index = counter->id - bulk_base_id;
    208			struct mlx5_fc_cache *cache = &counter->cache;
    209
    210			if (counter->id >= bulk_base_id + bulk_len) {
    211				query_more_counters = true;
    212				break;
    213			}
    214
    215			update_counter_cache(counter_index, data, cache);
    216		}
    217	}
    218}
    219
    220static void mlx5_fc_free(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
    221{
    222	mlx5_cmd_fc_free(dev, counter->id);
    223	kfree(counter);
    224}
    225
    226static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
    227{
    228	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
    229
    230	if (counter->bulk)
    231		mlx5_fc_pool_release_counter(&fc_stats->fc_pool, counter);
    232	else
    233		mlx5_fc_free(dev, counter);
    234}
    235
    236static void mlx5_fc_stats_bulk_query_size_increase(struct mlx5_core_dev *dev)
    237{
    238	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
    239	int max_bulk_len = get_max_bulk_query_len(dev);
    240	unsigned long now = jiffies;
    241	u32 *bulk_query_out_tmp;
    242	int max_out_len;
    243
    244	if (fc_stats->bulk_query_alloc_failed &&
    245	    time_before(now, fc_stats->next_bulk_query_alloc))
    246		return;
    247
    248	max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len);
    249	bulk_query_out_tmp = kzalloc(max_out_len, GFP_KERNEL);
    250	if (!bulk_query_out_tmp) {
    251		mlx5_core_warn_once(dev,
    252				    "Can't increase flow counters bulk query buffer size, insufficient memory, bulk_size(%d)\n",
    253				    max_bulk_len);
    254		fc_stats->bulk_query_alloc_failed = true;
    255		fc_stats->next_bulk_query_alloc =
    256			now + MLX5_FC_BULK_QUERY_ALLOC_PERIOD;
    257		return;
    258	}
    259
    260	kfree(fc_stats->bulk_query_out);
    261	fc_stats->bulk_query_out = bulk_query_out_tmp;
    262	fc_stats->bulk_query_len = max_bulk_len;
    263	if (fc_stats->bulk_query_alloc_failed) {
    264		mlx5_core_info(dev,
    265			       "Flow counters bulk query buffer size increased, bulk_size(%d)\n",
    266			       max_bulk_len);
    267		fc_stats->bulk_query_alloc_failed = false;
    268	}
    269}
    270
    271static void mlx5_fc_stats_work(struct work_struct *work)
    272{
    273	struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
    274						 priv.fc_stats.work.work);
    275	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
    276	/* Take dellist first to ensure that counters cannot be deleted before
    277	 * they are inserted.
    278	 */
    279	struct llist_node *dellist = llist_del_all(&fc_stats->dellist);
    280	struct llist_node *addlist = llist_del_all(&fc_stats->addlist);
    281	struct mlx5_fc *counter = NULL, *last = NULL, *tmp;
    282	unsigned long now = jiffies;
    283
    284	if (addlist || !list_empty(&fc_stats->counters))
    285		queue_delayed_work(fc_stats->wq, &fc_stats->work,
    286				   fc_stats->sampling_interval);
    287
    288	llist_for_each_entry(counter, addlist, addlist) {
    289		mlx5_fc_stats_insert(dev, counter);
    290		fc_stats->num_counters++;
    291	}
    292
    293	llist_for_each_entry_safe(counter, tmp, dellist, dellist) {
    294		mlx5_fc_stats_remove(dev, counter);
    295
    296		mlx5_fc_release(dev, counter);
    297		fc_stats->num_counters--;
    298	}
    299
    300	if (fc_stats->bulk_query_len < get_max_bulk_query_len(dev) &&
    301	    fc_stats->num_counters > get_init_bulk_query_len(dev))
    302		mlx5_fc_stats_bulk_query_size_increase(dev);
    303
    304	if (time_before(now, fc_stats->next_query) ||
    305	    list_empty(&fc_stats->counters))
    306		return;
    307	last = list_last_entry(&fc_stats->counters, struct mlx5_fc, list);
    308
    309	counter = list_first_entry(&fc_stats->counters, struct mlx5_fc,
    310				   list);
    311	if (counter)
    312		mlx5_fc_stats_query_counter_range(dev, counter, last->id);
    313
    314	fc_stats->next_query = now + fc_stats->sampling_interval;
    315}
    316
    317static struct mlx5_fc *mlx5_fc_single_alloc(struct mlx5_core_dev *dev)
    318{
    319	struct mlx5_fc *counter;
    320	int err;
    321
    322	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
    323	if (!counter)
    324		return ERR_PTR(-ENOMEM);
    325
    326	err = mlx5_cmd_fc_alloc(dev, &counter->id);
    327	if (err) {
    328		kfree(counter);
    329		return ERR_PTR(err);
    330	}
    331
    332	return counter;
    333}
    334
    335static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging)
    336{
    337	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
    338	struct mlx5_fc *counter;
    339
    340	if (aging && MLX5_CAP_GEN(dev, flow_counter_bulk_alloc) != 0) {
    341		counter = mlx5_fc_pool_acquire_counter(&fc_stats->fc_pool);
    342		if (!IS_ERR(counter))
    343			return counter;
    344	}
    345
    346	return mlx5_fc_single_alloc(dev);
    347}
    348
    349struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging)
    350{
    351	struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging);
    352	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
    353	int err;
    354
    355	if (IS_ERR(counter))
    356		return counter;
    357
    358	INIT_LIST_HEAD(&counter->list);
    359	counter->aging = aging;
    360
    361	if (aging) {
    362		u32 id = counter->id;
    363
    364		counter->cache.lastuse = jiffies;
    365		counter->lastbytes = counter->cache.bytes;
    366		counter->lastpackets = counter->cache.packets;
    367
    368		idr_preload(GFP_KERNEL);
    369		spin_lock(&fc_stats->counters_idr_lock);
    370
    371		err = idr_alloc_u32(&fc_stats->counters_idr, counter, &id, id,
    372				    GFP_NOWAIT);
    373
    374		spin_unlock(&fc_stats->counters_idr_lock);
    375		idr_preload_end();
    376		if (err)
    377			goto err_out_alloc;
    378
    379		llist_add(&counter->addlist, &fc_stats->addlist);
    380	}
    381
    382	return counter;
    383
    384err_out_alloc:
    385	mlx5_fc_release(dev, counter);
    386	return ERR_PTR(err);
    387}
    388
    389struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
    390{
    391	struct mlx5_fc *counter = mlx5_fc_create_ex(dev, aging);
    392	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
    393
    394	if (aging)
    395		mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
    396	return counter;
    397}
    398EXPORT_SYMBOL(mlx5_fc_create);
    399
    400u32 mlx5_fc_id(struct mlx5_fc *counter)
    401{
    402	return counter->id;
    403}
    404EXPORT_SYMBOL(mlx5_fc_id);
    405
    406void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
    407{
    408	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
    409
    410	if (!counter)
    411		return;
    412
    413	if (counter->aging) {
    414		llist_add(&counter->dellist, &fc_stats->dellist);
    415		mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
    416		return;
    417	}
    418
    419	mlx5_fc_release(dev, counter);
    420}
    421EXPORT_SYMBOL(mlx5_fc_destroy);
    422
    423int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
    424{
    425	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
    426	int init_bulk_len;
    427	int init_out_len;
    428
    429	spin_lock_init(&fc_stats->counters_idr_lock);
    430	idr_init(&fc_stats->counters_idr);
    431	INIT_LIST_HEAD(&fc_stats->counters);
    432	init_llist_head(&fc_stats->addlist);
    433	init_llist_head(&fc_stats->dellist);
    434
    435	init_bulk_len = get_init_bulk_query_len(dev);
    436	init_out_len = mlx5_cmd_fc_get_bulk_query_out_len(init_bulk_len);
    437	fc_stats->bulk_query_out = kzalloc(init_out_len, GFP_KERNEL);
    438	if (!fc_stats->bulk_query_out)
    439		return -ENOMEM;
    440	fc_stats->bulk_query_len = init_bulk_len;
    441
    442	fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
    443	if (!fc_stats->wq)
    444		goto err_wq_create;
    445
    446	fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD;
    447	INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work);
    448
    449	mlx5_fc_pool_init(&fc_stats->fc_pool, dev);
    450	return 0;
    451
    452err_wq_create:
    453	kfree(fc_stats->bulk_query_out);
    454	return -ENOMEM;
    455}
    456
    457void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
    458{
    459	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
    460	struct llist_node *tmplist;
    461	struct mlx5_fc *counter;
    462	struct mlx5_fc *tmp;
    463
    464	cancel_delayed_work_sync(&dev->priv.fc_stats.work);
    465	destroy_workqueue(dev->priv.fc_stats.wq);
    466	dev->priv.fc_stats.wq = NULL;
    467
    468	tmplist = llist_del_all(&fc_stats->addlist);
    469	llist_for_each_entry_safe(counter, tmp, tmplist, addlist)
    470		mlx5_fc_release(dev, counter);
    471
    472	list_for_each_entry_safe(counter, tmp, &fc_stats->counters, list)
    473		mlx5_fc_release(dev, counter);
    474
    475	mlx5_fc_pool_cleanup(&fc_stats->fc_pool);
    476	idr_destroy(&fc_stats->counters_idr);
    477	kfree(fc_stats->bulk_query_out);
    478}
    479
    480int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
    481		  u64 *packets, u64 *bytes)
    482{
    483	return mlx5_cmd_fc_query(dev, counter->id, packets, bytes);
    484}
    485EXPORT_SYMBOL(mlx5_fc_query);
    486
    487u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter)
    488{
    489	return counter->cache.lastuse;
    490}
    491
    492void mlx5_fc_query_cached(struct mlx5_fc *counter,
    493			  u64 *bytes, u64 *packets, u64 *lastuse)
    494{
    495	struct mlx5_fc_cache c;
    496
    497	c = counter->cache;
    498
    499	*bytes = c.bytes - counter->lastbytes;
    500	*packets = c.packets - counter->lastpackets;
    501	*lastuse = c.lastuse;
    502
    503	counter->lastbytes = c.bytes;
    504	counter->lastpackets = c.packets;
    505}
    506
    507void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev,
    508			      struct delayed_work *dwork,
    509			      unsigned long delay)
    510{
    511	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
    512
    513	queue_delayed_work(fc_stats->wq, dwork, delay);
    514}
    515
    516void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
    517				      unsigned long interval)
    518{
    519	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
    520
    521	fc_stats->sampling_interval = min_t(unsigned long, interval,
    522					    fc_stats->sampling_interval);
    523}
    524
    525/* Flow counter bluks */
    526
    527struct mlx5_fc_bulk {
    528	struct list_head pool_list;
    529	u32 base_id;
    530	int bulk_len;
    531	unsigned long *bitmask;
    532	struct mlx5_fc fcs[];
    533};
    534
    535static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk,
    536			 u32 id)
    537{
    538	counter->bulk = bulk;
    539	counter->id = id;
    540}
    541
    542static int mlx5_fc_bulk_get_free_fcs_amount(struct mlx5_fc_bulk *bulk)
    543{
    544	return bitmap_weight(bulk->bitmask, bulk->bulk_len);
    545}
    546
    547static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev)
    548{
    549	enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask;
    550	struct mlx5_fc_bulk *bulk;
    551	int err = -ENOMEM;
    552	int bulk_len;
    553	u32 base_id;
    554	int i;
    555
    556	alloc_bitmask = MLX5_CAP_GEN(dev, flow_counter_bulk_alloc);
    557	bulk_len = alloc_bitmask > 0 ? MLX5_FC_BULK_NUM_FCS(alloc_bitmask) : 1;
    558
    559	bulk = kvzalloc(struct_size(bulk, fcs, bulk_len), GFP_KERNEL);
    560	if (!bulk)
    561		goto err_alloc_bulk;
    562
    563	bulk->bitmask = kvcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long),
    564				 GFP_KERNEL);
    565	if (!bulk->bitmask)
    566		goto err_alloc_bitmask;
    567
    568	err = mlx5_cmd_fc_bulk_alloc(dev, alloc_bitmask, &base_id);
    569	if (err)
    570		goto err_mlx5_cmd_bulk_alloc;
    571
    572	bulk->base_id = base_id;
    573	bulk->bulk_len = bulk_len;
    574	for (i = 0; i < bulk_len; i++) {
    575		mlx5_fc_init(&bulk->fcs[i], bulk, base_id + i);
    576		set_bit(i, bulk->bitmask);
    577	}
    578
    579	return bulk;
    580
    581err_mlx5_cmd_bulk_alloc:
    582	kvfree(bulk->bitmask);
    583err_alloc_bitmask:
    584	kvfree(bulk);
    585err_alloc_bulk:
    586	return ERR_PTR(err);
    587}
    588
    589static int
    590mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk)
    591{
    592	if (mlx5_fc_bulk_get_free_fcs_amount(bulk) < bulk->bulk_len) {
    593		mlx5_core_err(dev, "Freeing bulk before all counters were released\n");
    594		return -EBUSY;
    595	}
    596
    597	mlx5_cmd_fc_free(dev, bulk->base_id);
    598	kvfree(bulk->bitmask);
    599	kvfree(bulk);
    600
    601	return 0;
    602}
    603
    604static struct mlx5_fc *mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk)
    605{
    606	int free_fc_index = find_first_bit(bulk->bitmask, bulk->bulk_len);
    607
    608	if (free_fc_index >= bulk->bulk_len)
    609		return ERR_PTR(-ENOSPC);
    610
    611	clear_bit(free_fc_index, bulk->bitmask);
    612	return &bulk->fcs[free_fc_index];
    613}
    614
    615static int mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc)
    616{
    617	int fc_index = fc->id - bulk->base_id;
    618
    619	if (test_bit(fc_index, bulk->bitmask))
    620		return -EINVAL;
    621
    622	set_bit(fc_index, bulk->bitmask);
    623	return 0;
    624}
    625
    626/* Flow counters pool API */
    627
    628static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev)
    629{
    630	fc_pool->dev = dev;
    631	mutex_init(&fc_pool->pool_lock);
    632	INIT_LIST_HEAD(&fc_pool->fully_used);
    633	INIT_LIST_HEAD(&fc_pool->partially_used);
    634	INIT_LIST_HEAD(&fc_pool->unused);
    635	fc_pool->available_fcs = 0;
    636	fc_pool->used_fcs = 0;
    637	fc_pool->threshold = 0;
    638}
    639
    640static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool)
    641{
    642	struct mlx5_core_dev *dev = fc_pool->dev;
    643	struct mlx5_fc_bulk *bulk;
    644	struct mlx5_fc_bulk *tmp;
    645
    646	list_for_each_entry_safe(bulk, tmp, &fc_pool->fully_used, pool_list)
    647		mlx5_fc_bulk_destroy(dev, bulk);
    648	list_for_each_entry_safe(bulk, tmp, &fc_pool->partially_used, pool_list)
    649		mlx5_fc_bulk_destroy(dev, bulk);
    650	list_for_each_entry_safe(bulk, tmp, &fc_pool->unused, pool_list)
    651		mlx5_fc_bulk_destroy(dev, bulk);
    652}
    653
    654static void mlx5_fc_pool_update_threshold(struct mlx5_fc_pool *fc_pool)
    655{
    656	fc_pool->threshold = min_t(int, MLX5_FC_POOL_MAX_THRESHOLD,
    657				   fc_pool->used_fcs / MLX5_FC_POOL_USED_BUFF_RATIO);
    658}
    659
    660static struct mlx5_fc_bulk *
    661mlx5_fc_pool_alloc_new_bulk(struct mlx5_fc_pool *fc_pool)
    662{
    663	struct mlx5_core_dev *dev = fc_pool->dev;
    664	struct mlx5_fc_bulk *new_bulk;
    665
    666	new_bulk = mlx5_fc_bulk_create(dev);
    667	if (!IS_ERR(new_bulk))
    668		fc_pool->available_fcs += new_bulk->bulk_len;
    669	mlx5_fc_pool_update_threshold(fc_pool);
    670	return new_bulk;
    671}
    672
    673static void
    674mlx5_fc_pool_free_bulk(struct mlx5_fc_pool *fc_pool, struct mlx5_fc_bulk *bulk)
    675{
    676	struct mlx5_core_dev *dev = fc_pool->dev;
    677
    678	fc_pool->available_fcs -= bulk->bulk_len;
    679	mlx5_fc_bulk_destroy(dev, bulk);
    680	mlx5_fc_pool_update_threshold(fc_pool);
    681}
    682
    683static struct mlx5_fc *
    684mlx5_fc_pool_acquire_from_list(struct list_head *src_list,
    685			       struct list_head *next_list,
    686			       bool move_non_full_bulk)
    687{
    688	struct mlx5_fc_bulk *bulk;
    689	struct mlx5_fc *fc;
    690
    691	if (list_empty(src_list))
    692		return ERR_PTR(-ENODATA);
    693
    694	bulk = list_first_entry(src_list, struct mlx5_fc_bulk, pool_list);
    695	fc = mlx5_fc_bulk_acquire_fc(bulk);
    696	if (move_non_full_bulk || mlx5_fc_bulk_get_free_fcs_amount(bulk) == 0)
    697		list_move(&bulk->pool_list, next_list);
    698	return fc;
    699}
    700
    701static struct mlx5_fc *
    702mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool)
    703{
    704	struct mlx5_fc_bulk *new_bulk;
    705	struct mlx5_fc *fc;
    706
    707	mutex_lock(&fc_pool->pool_lock);
    708
    709	fc = mlx5_fc_pool_acquire_from_list(&fc_pool->partially_used,
    710					    &fc_pool->fully_used, false);
    711	if (IS_ERR(fc))
    712		fc = mlx5_fc_pool_acquire_from_list(&fc_pool->unused,
    713						    &fc_pool->partially_used,
    714						    true);
    715	if (IS_ERR(fc)) {
    716		new_bulk = mlx5_fc_pool_alloc_new_bulk(fc_pool);
    717		if (IS_ERR(new_bulk)) {
    718			fc = ERR_CAST(new_bulk);
    719			goto out;
    720		}
    721		fc = mlx5_fc_bulk_acquire_fc(new_bulk);
    722		list_add(&new_bulk->pool_list, &fc_pool->partially_used);
    723	}
    724	fc_pool->available_fcs--;
    725	fc_pool->used_fcs++;
    726
    727out:
    728	mutex_unlock(&fc_pool->pool_lock);
    729	return fc;
    730}
    731
    732static void
    733mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc)
    734{
    735	struct mlx5_core_dev *dev = fc_pool->dev;
    736	struct mlx5_fc_bulk *bulk = fc->bulk;
    737	int bulk_free_fcs_amount;
    738
    739	mutex_lock(&fc_pool->pool_lock);
    740
    741	if (mlx5_fc_bulk_release_fc(bulk, fc)) {
    742		mlx5_core_warn(dev, "Attempted to release a counter which is not acquired\n");
    743		goto unlock;
    744	}
    745
    746	fc_pool->available_fcs++;
    747	fc_pool->used_fcs--;
    748
    749	bulk_free_fcs_amount = mlx5_fc_bulk_get_free_fcs_amount(bulk);
    750	if (bulk_free_fcs_amount == 1)
    751		list_move_tail(&bulk->pool_list, &fc_pool->partially_used);
    752	if (bulk_free_fcs_amount == bulk->bulk_len) {
    753		list_del(&bulk->pool_list);
    754		if (fc_pool->available_fcs > fc_pool->threshold)
    755			mlx5_fc_pool_free_bulk(fc_pool, bulk);
    756		else
    757			list_add(&bulk->pool_list, &fc_pool->unused);
    758	}
    759
    760unlock:
    761	mutex_unlock(&fc_pool->pool_lock);
    762}