cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

en_main.c (149475B)


      1/*
      2 * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
      3 *
      4 * This software is available to you under a choice of one of two
      5 * licenses.  You may choose to be licensed under the terms of the GNU
      6 * General Public License (GPL) Version 2, available from the file
      7 * COPYING in the main directory of this source tree, or the
      8 * OpenIB.org BSD license below:
      9 *
     10 *     Redistribution and use in source and binary forms, with or
     11 *     without modification, are permitted provided that the following
     12 *     conditions are met:
     13 *
     14 *      - Redistributions of source code must retain the above
     15 *        copyright notice, this list of conditions and the following
     16 *        disclaimer.
     17 *
     18 *      - Redistributions in binary form must reproduce the above
     19 *        copyright notice, this list of conditions and the following
     20 *        disclaimer in the documentation and/or other materials
     21 *        provided with the distribution.
     22 *
     23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     30 * SOFTWARE.
     31 */
     32
     33#include <net/tc_act/tc_gact.h>
     34#include <net/pkt_cls.h>
     35#include <linux/mlx5/fs.h>
     36#include <net/vxlan.h>
     37#include <net/geneve.h>
     38#include <linux/bpf.h>
     39#include <linux/if_bridge.h>
     40#include <linux/filter.h>
     41#include <net/page_pool.h>
     42#include <net/xdp_sock_drv.h>
     43#include "eswitch.h"
     44#include "en.h"
     45#include "en/txrx.h"
     46#include "en_tc.h"
     47#include "en_rep.h"
     48#include "en_accel/ipsec.h"
     49#include "en_accel/en_accel.h"
     50#include "en_accel/ktls.h"
     51#include "lib/vxlan.h"
     52#include "lib/clock.h"
     53#include "en/port.h"
     54#include "en/xdp.h"
     55#include "lib/eq.h"
     56#include "en/monitor_stats.h"
     57#include "en/health.h"
     58#include "en/params.h"
     59#include "en/xsk/pool.h"
     60#include "en/xsk/setup.h"
     61#include "en/xsk/rx.h"
     62#include "en/xsk/tx.h"
     63#include "en/hv_vhca_stats.h"
     64#include "en/devlink.h"
     65#include "lib/mlx5.h"
     66#include "en/ptp.h"
     67#include "qos.h"
     68#include "en/trap.h"
     69
     70bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
     71{
     72	bool striding_rq_umr, inline_umr;
     73	u16 max_wqe_sz_cap;
     74
     75	striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
     76			  MLX5_CAP_ETH(mdev, reg_umr_sq);
     77	max_wqe_sz_cap = mlx5e_get_max_sq_wqebbs(mdev) * MLX5_SEND_WQE_BB;
     78	inline_umr = max_wqe_sz_cap >= MLX5E_UMR_WQE_INLINE_SZ;
     79	if (!striding_rq_umr)
     80		return false;
     81	if (!inline_umr) {
     82		mlx5_core_warn(mdev, "Cannot support Striding RQ: UMR WQE size (%d) exceeds maximum supported (%d).\n",
     83			       (int)MLX5E_UMR_WQE_INLINE_SZ, max_wqe_sz_cap);
     84		return false;
     85	}
     86	return true;
     87}
     88
     89void mlx5e_update_carrier(struct mlx5e_priv *priv)
     90{
     91	struct mlx5_core_dev *mdev = priv->mdev;
     92	u8 port_state;
     93	bool up;
     94
     95	port_state = mlx5_query_vport_state(mdev,
     96					    MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT,
     97					    0);
     98
     99	up = port_state == VPORT_STATE_UP;
    100	if (up == netif_carrier_ok(priv->netdev))
    101		netif_carrier_event(priv->netdev);
    102	if (up) {
    103		netdev_info(priv->netdev, "Link up\n");
    104		netif_carrier_on(priv->netdev);
    105	} else {
    106		netdev_info(priv->netdev, "Link down\n");
    107		netif_carrier_off(priv->netdev);
    108	}
    109}
    110
    111static void mlx5e_update_carrier_work(struct work_struct *work)
    112{
    113	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
    114					       update_carrier_work);
    115
    116	mutex_lock(&priv->state_lock);
    117	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
    118		if (priv->profile->update_carrier)
    119			priv->profile->update_carrier(priv);
    120	mutex_unlock(&priv->state_lock);
    121}
    122
    123static void mlx5e_update_stats_work(struct work_struct *work)
    124{
    125	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
    126					       update_stats_work);
    127
    128	mutex_lock(&priv->state_lock);
    129	priv->profile->update_stats(priv);
    130	mutex_unlock(&priv->state_lock);
    131}
    132
    133void mlx5e_queue_update_stats(struct mlx5e_priv *priv)
    134{
    135	if (!priv->profile->update_stats)
    136		return;
    137
    138	if (unlikely(test_bit(MLX5E_STATE_DESTROYING, &priv->state)))
    139		return;
    140
    141	queue_work(priv->wq, &priv->update_stats_work);
    142}
    143
    144static int async_event(struct notifier_block *nb, unsigned long event, void *data)
    145{
    146	struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
    147	struct mlx5_eqe   *eqe = data;
    148
    149	if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
    150		return NOTIFY_DONE;
    151
    152	switch (eqe->sub_type) {
    153	case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
    154	case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
    155		queue_work(priv->wq, &priv->update_carrier_work);
    156		break;
    157	default:
    158		return NOTIFY_DONE;
    159	}
    160
    161	return NOTIFY_OK;
    162}
    163
    164static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
    165{
    166	priv->events_nb.notifier_call = async_event;
    167	mlx5_notifier_register(priv->mdev, &priv->events_nb);
    168}
    169
    170static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
    171{
    172	mlx5_notifier_unregister(priv->mdev, &priv->events_nb);
    173}
    174
    175static int blocking_event(struct notifier_block *nb, unsigned long event, void *data)
    176{
    177	struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, blocking_events_nb);
    178	int err;
    179
    180	switch (event) {
    181	case MLX5_DRIVER_EVENT_TYPE_TRAP:
    182		err = mlx5e_handle_trap_event(priv, data);
    183		break;
    184	default:
    185		netdev_warn(priv->netdev, "Sync event: Unknown event %ld\n", event);
    186		err = -EINVAL;
    187	}
    188	return err;
    189}
    190
    191static void mlx5e_enable_blocking_events(struct mlx5e_priv *priv)
    192{
    193	priv->blocking_events_nb.notifier_call = blocking_event;
    194	mlx5_blocking_notifier_register(priv->mdev, &priv->blocking_events_nb);
    195}
    196
    197static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv)
    198{
    199	mlx5_blocking_notifier_unregister(priv->mdev, &priv->blocking_events_nb);
    200}
    201
    202static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
    203				       struct mlx5e_icosq *sq,
    204				       struct mlx5e_umr_wqe *wqe)
    205{
    206	struct mlx5_wqe_ctrl_seg      *cseg = &wqe->ctrl;
    207	struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
    208	u8 ds_cnt = DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_DS);
    209
    210	cseg->qpn_ds    = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
    211				      ds_cnt);
    212	cseg->umr_mkey  = rq->mkey_be;
    213
    214	ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
    215	ucseg->xlt_octowords =
    216		cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE));
    217	ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
    218}
    219
    220static int mlx5e_rq_shampo_hd_alloc(struct mlx5e_rq *rq, int node)
    221{
    222	rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo),
    223					 GFP_KERNEL, node);
    224	if (!rq->mpwqe.shampo)
    225		return -ENOMEM;
    226	return 0;
    227}
    228
    229static void mlx5e_rq_shampo_hd_free(struct mlx5e_rq *rq)
    230{
    231	kvfree(rq->mpwqe.shampo);
    232}
    233
    234static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node)
    235{
    236	struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
    237
    238	shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL,
    239					    node);
    240	if (!shampo->bitmap)
    241		return -ENOMEM;
    242
    243	shampo->info = kvzalloc_node(array_size(shampo->hd_per_wq,
    244						sizeof(*shampo->info)),
    245				     GFP_KERNEL, node);
    246	if (!shampo->info) {
    247		kvfree(shampo->bitmap);
    248		return -ENOMEM;
    249	}
    250	return 0;
    251}
    252
    253static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq)
    254{
    255	kvfree(rq->mpwqe.shampo->bitmap);
    256	kvfree(rq->mpwqe.shampo->info);
    257}
    258
    259static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
    260{
    261	int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
    262
    263	rq->mpwqe.info = kvzalloc_node(array_size(wq_sz,
    264						  sizeof(*rq->mpwqe.info)),
    265				       GFP_KERNEL, node);
    266	if (!rq->mpwqe.info)
    267		return -ENOMEM;
    268
    269	mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe);
    270
    271	return 0;
    272}
    273
    274static int mlx5e_create_umr_mtt_mkey(struct mlx5_core_dev *mdev,
    275				     u64 npages, u8 page_shift, u32 *umr_mkey,
    276				     dma_addr_t filler_addr)
    277{
    278	struct mlx5_mtt *mtt;
    279	int inlen;
    280	void *mkc;
    281	u32 *in;
    282	int err;
    283	int i;
    284
    285	inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + sizeof(*mtt) * npages;
    286
    287	in = kvzalloc(inlen, GFP_KERNEL);
    288	if (!in)
    289		return -ENOMEM;
    290
    291	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
    292
    293	MLX5_SET(mkc, mkc, free, 1);
    294	MLX5_SET(mkc, mkc, umr_en, 1);
    295	MLX5_SET(mkc, mkc, lw, 1);
    296	MLX5_SET(mkc, mkc, lr, 1);
    297	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
    298	mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
    299	MLX5_SET(mkc, mkc, qpn, 0xffffff);
    300	MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
    301	MLX5_SET64(mkc, mkc, len, npages << page_shift);
    302	MLX5_SET(mkc, mkc, translations_octword_size,
    303		 MLX5_MTT_OCTW(npages));
    304	MLX5_SET(mkc, mkc, log_page_size, page_shift);
    305	MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
    306		 MLX5_MTT_OCTW(npages));
    307
    308	/* Initialize the mkey with all MTTs pointing to a default
    309	 * page (filler_addr). When the channels are activated, UMR
    310	 * WQEs will redirect the RX WQEs to the actual memory from
    311	 * the RQ's pool, while the gaps (wqe_overflow) remain mapped
    312	 * to the default page.
    313	 */
    314	mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
    315	for (i = 0 ; i < npages ; i++)
    316		mtt[i].ptag = cpu_to_be64(filler_addr);
    317
    318	err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
    319
    320	kvfree(in);
    321	return err;
    322}
    323
    324static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev,
    325				     u64 nentries,
    326				     u32 *umr_mkey)
    327{
    328	int inlen;
    329	void *mkc;
    330	u32 *in;
    331	int err;
    332
    333	inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
    334
    335	in = kvzalloc(inlen, GFP_KERNEL);
    336	if (!in)
    337		return -ENOMEM;
    338
    339	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
    340
    341	MLX5_SET(mkc, mkc, free, 1);
    342	MLX5_SET(mkc, mkc, umr_en, 1);
    343	MLX5_SET(mkc, mkc, lw, 1);
    344	MLX5_SET(mkc, mkc, lr, 1);
    345	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
    346	mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
    347	MLX5_SET(mkc, mkc, qpn, 0xffffff);
    348	MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
    349	MLX5_SET(mkc, mkc, translations_octword_size, nentries);
    350	MLX5_SET(mkc, mkc, length64, 1);
    351	err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
    352
    353	kvfree(in);
    354	return err;
    355}
    356
    357static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq)
    358{
    359	u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq));
    360
    361	return mlx5e_create_umr_mtt_mkey(mdev, num_mtts, PAGE_SHIFT,
    362					 &rq->umr_mkey, rq->wqe_overflow.addr);
    363}
    364
    365static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev,
    366				       struct mlx5e_rq *rq)
    367{
    368	u32 max_klm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size));
    369
    370	if (max_klm_size < rq->mpwqe.shampo->hd_per_wq) {
    371		mlx5_core_err(mdev, "max klm list size 0x%x is smaller than shampo header buffer list size 0x%x\n",
    372			      max_klm_size, rq->mpwqe.shampo->hd_per_wq);
    373		return -EINVAL;
    374	}
    375	return mlx5e_create_umr_klm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq,
    376					 &rq->mpwqe.shampo->mkey);
    377}
    378
    379static u64 mlx5e_get_mpwqe_offset(u16 wqe_ix)
    380{
    381	return MLX5E_REQUIRED_MTTS(wqe_ix) << PAGE_SHIFT;
    382}
    383
    384static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
    385{
    386	struct mlx5e_wqe_frag_info next_frag = {};
    387	struct mlx5e_wqe_frag_info *prev = NULL;
    388	int i;
    389
    390	next_frag.di = &rq->wqe.di[0];
    391
    392	for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
    393		struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
    394		struct mlx5e_wqe_frag_info *frag =
    395			&rq->wqe.frags[i << rq->wqe.info.log_num_frags];
    396		int f;
    397
    398		for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) {
    399			if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) {
    400				next_frag.di++;
    401				next_frag.offset = 0;
    402				if (prev)
    403					prev->last_in_page = true;
    404			}
    405			*frag = next_frag;
    406
    407			/* prepare next */
    408			next_frag.offset += frag_info[f].frag_stride;
    409			prev = frag;
    410		}
    411	}
    412
    413	if (prev)
    414		prev->last_in_page = true;
    415}
    416
    417int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node)
    418{
    419	int len = wq_sz << rq->wqe.info.log_num_frags;
    420
    421	rq->wqe.di = kvzalloc_node(array_size(len, sizeof(*rq->wqe.di)), GFP_KERNEL, node);
    422	if (!rq->wqe.di)
    423		return -ENOMEM;
    424
    425	mlx5e_init_frags_partition(rq);
    426
    427	return 0;
    428}
    429
    430void mlx5e_free_di_list(struct mlx5e_rq *rq)
    431{
    432	kvfree(rq->wqe.di);
    433}
    434
    435static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
    436{
    437	struct mlx5e_rq *rq = container_of(recover_work, struct mlx5e_rq, recover_work);
    438
    439	mlx5e_reporter_rq_cqe_err(rq);
    440}
    441
    442static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
    443{
    444	rq->wqe_overflow.page = alloc_page(GFP_KERNEL);
    445	if (!rq->wqe_overflow.page)
    446		return -ENOMEM;
    447
    448	rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0,
    449					     PAGE_SIZE, rq->buff.map_dir);
    450	if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) {
    451		__free_page(rq->wqe_overflow.page);
    452		return -ENOMEM;
    453	}
    454	return 0;
    455}
    456
    457static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
    458{
    459	 dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE,
    460			rq->buff.map_dir);
    461	 __free_page(rq->wqe_overflow.page);
    462}
    463
    464static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
    465			     struct mlx5e_rq *rq)
    466{
    467	struct mlx5_core_dev *mdev = c->mdev;
    468	int err;
    469
    470	rq->wq_type      = params->rq_wq_type;
    471	rq->pdev         = c->pdev;
    472	rq->netdev       = c->netdev;
    473	rq->priv         = c->priv;
    474	rq->tstamp       = c->tstamp;
    475	rq->clock        = &mdev->clock;
    476	rq->icosq        = &c->icosq;
    477	rq->ix           = c->ix;
    478	rq->channel      = c;
    479	rq->mdev         = mdev;
    480	rq->hw_mtu       = MLX5E_SW2HW_MTU(params, params->sw_mtu);
    481	rq->xdpsq        = &c->rq_xdpsq;
    482	rq->stats        = &c->priv->channel_stats[c->ix]->rq;
    483	rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
    484	err = mlx5e_rq_set_handlers(rq, params, NULL);
    485	if (err)
    486		return err;
    487
    488	return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0);
    489}
    490
    491static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
    492				struct mlx5e_params *params,
    493				struct mlx5e_rq_param *rqp,
    494				struct mlx5e_rq *rq,
    495				u32 *pool_size,
    496				int node)
    497{
    498	void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq);
    499	int wq_size;
    500	int err;
    501
    502	if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
    503		return 0;
    504	err = mlx5e_rq_shampo_hd_alloc(rq, node);
    505	if (err)
    506		goto out;
    507	rq->mpwqe.shampo->hd_per_wq =
    508		mlx5e_shampo_hd_per_wq(mdev, params, rqp);
    509	err = mlx5e_create_rq_hd_umr_mkey(mdev, rq);
    510	if (err)
    511		goto err_shampo_hd;
    512	err = mlx5e_rq_shampo_hd_info_alloc(rq, node);
    513	if (err)
    514		goto err_shampo_info;
    515	rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node);
    516	if (!rq->hw_gro_data) {
    517		err = -ENOMEM;
    518		goto err_hw_gro_data;
    519	}
    520	rq->mpwqe.shampo->key =
    521		cpu_to_be32(rq->mpwqe.shampo->mkey);
    522	rq->mpwqe.shampo->hd_per_wqe =
    523		mlx5e_shampo_hd_per_wqe(mdev, params, rqp);
    524	wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
    525	*pool_size += (rq->mpwqe.shampo->hd_per_wqe * wq_size) /
    526		     MLX5E_SHAMPO_WQ_HEADER_PER_PAGE;
    527	return 0;
    528
    529err_hw_gro_data:
    530	mlx5e_rq_shampo_hd_info_free(rq);
    531err_shampo_info:
    532	mlx5_core_destroy_mkey(mdev, rq->mpwqe.shampo->mkey);
    533err_shampo_hd:
    534	mlx5e_rq_shampo_hd_free(rq);
    535out:
    536	return err;
    537}
    538
    539static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq)
    540{
    541	if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
    542		return;
    543
    544	kvfree(rq->hw_gro_data);
    545	mlx5e_rq_shampo_hd_info_free(rq);
    546	mlx5_core_destroy_mkey(rq->mdev, rq->mpwqe.shampo->mkey);
    547	mlx5e_rq_shampo_hd_free(rq);
    548}
    549
    550static int mlx5e_alloc_rq(struct mlx5e_params *params,
    551			  struct mlx5e_xsk_param *xsk,
    552			  struct mlx5e_rq_param *rqp,
    553			  int node, struct mlx5e_rq *rq)
    554{
    555	struct page_pool_params pp_params = { 0 };
    556	struct mlx5_core_dev *mdev = rq->mdev;
    557	void *rqc = rqp->rqc;
    558	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
    559	u32 pool_size;
    560	int wq_sz;
    561	int err;
    562	int i;
    563
    564	rqp->wq.db_numa_node = node;
    565	INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);
    566
    567	if (params->xdp_prog)
    568		bpf_prog_inc(params->xdp_prog);
    569	RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog);
    570
    571	rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
    572	rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
    573	pool_size = 1 << params->log_rq_mtu_frames;
    574
    575	switch (rq->wq_type) {
    576	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
    577		err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
    578					&rq->wq_ctrl);
    579		if (err)
    580			goto err_rq_xdp_prog;
    581
    582		err = mlx5e_alloc_mpwqe_rq_drop_page(rq);
    583		if (err)
    584			goto err_rq_wq_destroy;
    585
    586		rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR];
    587
    588		wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
    589
    590		pool_size = MLX5_MPWRQ_PAGES_PER_WQE <<
    591			mlx5e_mpwqe_get_log_rq_size(params, xsk);
    592
    593		rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
    594		rq->mpwqe.num_strides =
    595			BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
    596		rq->mpwqe.min_wqe_bulk = mlx5e_mpwqe_get_min_wqe_bulk(wq_sz);
    597
    598		rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz);
    599
    600		err = mlx5e_create_rq_umr_mkey(mdev, rq);
    601		if (err)
    602			goto err_rq_drop_page;
    603		rq->mkey_be = cpu_to_be32(rq->umr_mkey);
    604
    605		err = mlx5e_rq_alloc_mpwqe_info(rq, node);
    606		if (err)
    607			goto err_rq_mkey;
    608
    609		err = mlx5_rq_shampo_alloc(mdev, params, rqp, rq, &pool_size, node);
    610		if (err)
    611			goto err_free_by_rq_type;
    612
    613		break;
    614	default: /* MLX5_WQ_TYPE_CYCLIC */
    615		err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
    616					 &rq->wq_ctrl);
    617		if (err)
    618			goto err_rq_xdp_prog;
    619
    620		rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
    621
    622		wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
    623
    624		rq->wqe.info = rqp->frags_info;
    625		rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride;
    626
    627		rq->wqe.frags =
    628			kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
    629					(wq_sz << rq->wqe.info.log_num_frags)),
    630				      GFP_KERNEL, node);
    631		if (!rq->wqe.frags) {
    632			err = -ENOMEM;
    633			goto err_rq_wq_destroy;
    634		}
    635
    636		err = mlx5e_init_di_list(rq, wq_sz, node);
    637		if (err)
    638			goto err_rq_frags;
    639
    640		rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey);
    641	}
    642
    643	if (xsk) {
    644		err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
    645						 MEM_TYPE_XSK_BUFF_POOL, NULL);
    646		xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq);
    647	} else {
    648		/* Create a page_pool and register it with rxq */
    649		pp_params.order     = 0;
    650		pp_params.flags     = 0; /* No-internal DMA mapping in page_pool */
    651		pp_params.pool_size = pool_size;
    652		pp_params.nid       = node;
    653		pp_params.dev       = rq->pdev;
    654		pp_params.dma_dir   = rq->buff.map_dir;
    655
    656		/* page_pool can be used even when there is no rq->xdp_prog,
    657		 * given page_pool does not handle DMA mapping there is no
    658		 * required state to clear. And page_pool gracefully handle
    659		 * elevated refcnt.
    660		 */
    661		rq->page_pool = page_pool_create(&pp_params);
    662		if (IS_ERR(rq->page_pool)) {
    663			err = PTR_ERR(rq->page_pool);
    664			rq->page_pool = NULL;
    665			goto err_free_shampo;
    666		}
    667		if (xdp_rxq_info_is_reg(&rq->xdp_rxq))
    668			err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
    669							 MEM_TYPE_PAGE_POOL, rq->page_pool);
    670	}
    671	if (err)
    672		goto err_free_shampo;
    673
    674	for (i = 0; i < wq_sz; i++) {
    675		if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
    676			struct mlx5e_rx_wqe_ll *wqe =
    677				mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i);
    678			u32 byte_count =
    679				rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
    680			u64 dma_offset = mlx5e_get_mpwqe_offset(i);
    681			u16 headroom = test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) ?
    682				       0 : rq->buff.headroom;
    683
    684			wqe->data[0].addr = cpu_to_be64(dma_offset + headroom);
    685			wqe->data[0].byte_count = cpu_to_be32(byte_count);
    686			wqe->data[0].lkey = rq->mkey_be;
    687		} else {
    688			struct mlx5e_rx_wqe_cyc *wqe =
    689				mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i);
    690			int f;
    691
    692			for (f = 0; f < rq->wqe.info.num_frags; f++) {
    693				u32 frag_size = rq->wqe.info.arr[f].frag_size |
    694					MLX5_HW_START_PADDING;
    695
    696				wqe->data[f].byte_count = cpu_to_be32(frag_size);
    697				wqe->data[f].lkey = rq->mkey_be;
    698			}
    699			/* check if num_frags is not a pow of two */
    700			if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) {
    701				wqe->data[f].byte_count = 0;
    702				wqe->data[f].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
    703				wqe->data[f].addr = 0;
    704			}
    705		}
    706	}
    707
    708	INIT_WORK(&rq->dim.work, mlx5e_rx_dim_work);
    709
    710	switch (params->rx_cq_moderation.cq_period_mode) {
    711	case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
    712		rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
    713		break;
    714	case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
    715	default:
    716		rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
    717	}
    718
    719	rq->page_cache.head = 0;
    720	rq->page_cache.tail = 0;
    721
    722	return 0;
    723
    724err_free_shampo:
    725	mlx5e_rq_free_shampo(rq);
    726err_free_by_rq_type:
    727	switch (rq->wq_type) {
    728	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
    729		kvfree(rq->mpwqe.info);
    730err_rq_mkey:
    731		mlx5_core_destroy_mkey(mdev, rq->umr_mkey);
    732err_rq_drop_page:
    733		mlx5e_free_mpwqe_rq_drop_page(rq);
    734		break;
    735	default: /* MLX5_WQ_TYPE_CYCLIC */
    736		mlx5e_free_di_list(rq);
    737err_rq_frags:
    738		kvfree(rq->wqe.frags);
    739	}
    740err_rq_wq_destroy:
    741	mlx5_wq_destroy(&rq->wq_ctrl);
    742err_rq_xdp_prog:
    743	if (params->xdp_prog)
    744		bpf_prog_put(params->xdp_prog);
    745
    746	return err;
    747}
    748
    749static void mlx5e_free_rq(struct mlx5e_rq *rq)
    750{
    751	struct bpf_prog *old_prog;
    752	int i;
    753
    754	if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) {
    755		old_prog = rcu_dereference_protected(rq->xdp_prog,
    756						     lockdep_is_held(&rq->priv->state_lock));
    757		if (old_prog)
    758			bpf_prog_put(old_prog);
    759	}
    760
    761	switch (rq->wq_type) {
    762	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
    763		kvfree(rq->mpwqe.info);
    764		mlx5_core_destroy_mkey(rq->mdev, rq->umr_mkey);
    765		mlx5e_free_mpwqe_rq_drop_page(rq);
    766		mlx5e_rq_free_shampo(rq);
    767		break;
    768	default: /* MLX5_WQ_TYPE_CYCLIC */
    769		kvfree(rq->wqe.frags);
    770		mlx5e_free_di_list(rq);
    771	}
    772
    773	for (i = rq->page_cache.head; i != rq->page_cache.tail;
    774	     i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
    775		struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i];
    776
    777		/* With AF_XDP, page_cache is not used, so this loop is not
    778		 * entered, and it's safe to call mlx5e_page_release_dynamic
    779		 * directly.
    780		 */
    781		mlx5e_page_release_dynamic(rq, dma_info->page, false);
    782	}
    783
    784	xdp_rxq_info_unreg(&rq->xdp_rxq);
    785	page_pool_destroy(rq->page_pool);
    786	mlx5_wq_destroy(&rq->wq_ctrl);
    787}
    788
    789int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
    790{
    791	struct mlx5_core_dev *mdev = rq->mdev;
    792	u8 ts_format;
    793	void *in;
    794	void *rqc;
    795	void *wq;
    796	int inlen;
    797	int err;
    798
    799	inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
    800		sizeof(u64) * rq->wq_ctrl.buf.npages;
    801	in = kvzalloc(inlen, GFP_KERNEL);
    802	if (!in)
    803		return -ENOMEM;
    804
    805	ts_format = mlx5_is_real_time_rq(mdev) ?
    806			    MLX5_TIMESTAMP_FORMAT_REAL_TIME :
    807			    MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
    808	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
    809	wq  = MLX5_ADDR_OF(rqc, rqc, wq);
    810
    811	memcpy(rqc, param->rqc, sizeof(param->rqc));
    812
    813	MLX5_SET(rqc,  rqc, cqn,		rq->cq.mcq.cqn);
    814	MLX5_SET(rqc,  rqc, state,		MLX5_RQC_STATE_RST);
    815	MLX5_SET(rqc,  rqc, ts_format,		ts_format);
    816	MLX5_SET(wq,   wq,  log_wq_pg_sz,	rq->wq_ctrl.buf.page_shift -
    817						MLX5_ADAPTER_PAGE_SHIFT);
    818	MLX5_SET64(wq, wq,  dbr_addr,		rq->wq_ctrl.db.dma);
    819
    820	if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
    821		MLX5_SET(wq, wq, log_headers_buffer_entry_num,
    822			 order_base_2(rq->mpwqe.shampo->hd_per_wq));
    823		MLX5_SET(wq, wq, headers_mkey, rq->mpwqe.shampo->mkey);
    824	}
    825
    826	mlx5_fill_page_frag_array(&rq->wq_ctrl.buf,
    827				  (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
    828
    829	err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
    830
    831	kvfree(in);
    832
    833	return err;
    834}
    835
    836int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
    837{
    838	struct mlx5_core_dev *mdev = rq->mdev;
    839
    840	void *in;
    841	void *rqc;
    842	int inlen;
    843	int err;
    844
    845	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
    846	in = kvzalloc(inlen, GFP_KERNEL);
    847	if (!in)
    848		return -ENOMEM;
    849
    850	if (curr_state == MLX5_RQC_STATE_RST && next_state == MLX5_RQC_STATE_RDY)
    851		mlx5e_rqwq_reset(rq);
    852
    853	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
    854
    855	MLX5_SET(modify_rq_in, in, rq_state, curr_state);
    856	MLX5_SET(rqc, rqc, state, next_state);
    857
    858	err = mlx5_core_modify_rq(mdev, rq->rqn, in);
    859
    860	kvfree(in);
    861
    862	return err;
    863}
    864
    865static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable)
    866{
    867	struct mlx5_core_dev *mdev = rq->mdev;
    868
    869	void *in;
    870	void *rqc;
    871	int inlen;
    872	int err;
    873
    874	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
    875	in = kvzalloc(inlen, GFP_KERNEL);
    876	if (!in)
    877		return -ENOMEM;
    878
    879	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
    880
    881	MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
    882	MLX5_SET64(modify_rq_in, in, modify_bitmask,
    883		   MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_SCATTER_FCS);
    884	MLX5_SET(rqc, rqc, scatter_fcs, enable);
    885	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
    886
    887	err = mlx5_core_modify_rq(mdev, rq->rqn, in);
    888
    889	kvfree(in);
    890
    891	return err;
    892}
    893
    894static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
    895{
    896	struct mlx5_core_dev *mdev = rq->mdev;
    897	void *in;
    898	void *rqc;
    899	int inlen;
    900	int err;
    901
    902	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
    903	in = kvzalloc(inlen, GFP_KERNEL);
    904	if (!in)
    905		return -ENOMEM;
    906
    907	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
    908
    909	MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
    910	MLX5_SET64(modify_rq_in, in, modify_bitmask,
    911		   MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
    912	MLX5_SET(rqc, rqc, vsd, vsd);
    913	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
    914
    915	err = mlx5_core_modify_rq(mdev, rq->rqn, in);
    916
    917	kvfree(in);
    918
    919	return err;
    920}
    921
    922void mlx5e_destroy_rq(struct mlx5e_rq *rq)
    923{
    924	mlx5_core_destroy_rq(rq->mdev, rq->rqn);
    925}
    926
    927int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
    928{
    929	unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time);
    930
    931	u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5e_rqwq_get_size(rq));
    932
    933	do {
    934		if (mlx5e_rqwq_get_cur_sz(rq) >= min_wqes)
    935			return 0;
    936
    937		msleep(20);
    938	} while (time_before(jiffies, exp_time));
    939
    940	netdev_warn(rq->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
    941		    rq->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
    942
    943	mlx5e_reporter_rx_timeout(rq);
    944	return -ETIMEDOUT;
    945}
    946
    947void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq)
    948{
    949	struct mlx5_wq_ll *wq;
    950	u16 head;
    951	int i;
    952
    953	if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
    954		return;
    955
    956	wq = &rq->mpwqe.wq;
    957	head = wq->head;
    958
    959	/* Outstanding UMR WQEs (in progress) start at wq->head */
    960	for (i = 0; i < rq->mpwqe.umr_in_progress; i++) {
    961		rq->dealloc_wqe(rq, head);
    962		head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
    963	}
    964
    965	if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
    966		u16 len;
    967
    968		len = (rq->mpwqe.shampo->pi - rq->mpwqe.shampo->ci) &
    969		      (rq->mpwqe.shampo->hd_per_wq - 1);
    970		mlx5e_shampo_dealloc_hd(rq, len, rq->mpwqe.shampo->ci, false);
    971		rq->mpwqe.shampo->pi = rq->mpwqe.shampo->ci;
    972	}
    973
    974	rq->mpwqe.actual_wq_head = wq->head;
    975	rq->mpwqe.umr_in_progress = 0;
    976	rq->mpwqe.umr_completed = 0;
    977}
    978
    979void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
    980{
    981	__be16 wqe_ix_be;
    982	u16 wqe_ix;
    983
    984	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
    985		struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
    986
    987		mlx5e_free_rx_in_progress_descs(rq);
    988
    989		while (!mlx5_wq_ll_is_empty(wq)) {
    990			struct mlx5e_rx_wqe_ll *wqe;
    991
    992			wqe_ix_be = *wq->tail_next;
    993			wqe_ix    = be16_to_cpu(wqe_ix_be);
    994			wqe       = mlx5_wq_ll_get_wqe(wq, wqe_ix);
    995			rq->dealloc_wqe(rq, wqe_ix);
    996			mlx5_wq_ll_pop(wq, wqe_ix_be,
    997				       &wqe->next.next_wqe_index);
    998		}
    999
   1000		if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
   1001			mlx5e_shampo_dealloc_hd(rq, rq->mpwqe.shampo->hd_per_wq,
   1002						0, true);
   1003	} else {
   1004		struct mlx5_wq_cyc *wq = &rq->wqe.wq;
   1005
   1006		while (!mlx5_wq_cyc_is_empty(wq)) {
   1007			wqe_ix = mlx5_wq_cyc_get_tail(wq);
   1008			rq->dealloc_wqe(rq, wqe_ix);
   1009			mlx5_wq_cyc_pop(wq);
   1010		}
   1011	}
   1012
   1013}
   1014
   1015int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
   1016		  struct mlx5e_xsk_param *xsk, int node,
   1017		  struct mlx5e_rq *rq)
   1018{
   1019	struct mlx5_core_dev *mdev = rq->mdev;
   1020	int err;
   1021
   1022	if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
   1023		__set_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state);
   1024
   1025	err = mlx5e_alloc_rq(params, xsk, param, node, rq);
   1026	if (err)
   1027		return err;
   1028
   1029	err = mlx5e_create_rq(rq, param);
   1030	if (err)
   1031		goto err_free_rq;
   1032
   1033	err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
   1034	if (err)
   1035		goto err_destroy_rq;
   1036
   1037	if (MLX5_CAP_ETH(mdev, cqe_checksum_full))
   1038		__set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state);
   1039
   1040	if (params->rx_dim_enabled)
   1041		__set_bit(MLX5E_RQ_STATE_AM, &rq->state);
   1042
   1043	/* We disable csum_complete when XDP is enabled since
   1044	 * XDP programs might manipulate packets which will render
   1045	 * skb->checksum incorrect.
   1046	 */
   1047	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || params->xdp_prog)
   1048		__set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state);
   1049
   1050	/* For CQE compression on striding RQ, use stride index provided by
   1051	 * HW if capability is supported.
   1052	 */
   1053	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) &&
   1054	    MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index))
   1055		__set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state);
   1056
   1057	return 0;
   1058
   1059err_destroy_rq:
   1060	mlx5e_destroy_rq(rq);
   1061err_free_rq:
   1062	mlx5e_free_rq(rq);
   1063
   1064	return err;
   1065}
   1066
   1067void mlx5e_activate_rq(struct mlx5e_rq *rq)
   1068{
   1069	set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
   1070}
   1071
   1072void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
   1073{
   1074	clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
   1075	synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
   1076}
   1077
   1078void mlx5e_close_rq(struct mlx5e_rq *rq)
   1079{
   1080	cancel_work_sync(&rq->dim.work);
   1081	cancel_work_sync(&rq->recover_work);
   1082	mlx5e_destroy_rq(rq);
   1083	mlx5e_free_rx_descs(rq);
   1084	mlx5e_free_rq(rq);
   1085}
   1086
   1087static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq)
   1088{
   1089	kvfree(sq->db.xdpi_fifo.xi);
   1090	kvfree(sq->db.wqe_info);
   1091}
   1092
   1093static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
   1094{
   1095	struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
   1096	int wq_sz        = mlx5_wq_cyc_get_size(&sq->wq);
   1097	int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
   1098	size_t size;
   1099
   1100	size = array_size(sizeof(*xdpi_fifo->xi), dsegs_per_wq);
   1101	xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa);
   1102	if (!xdpi_fifo->xi)
   1103		return -ENOMEM;
   1104
   1105	xdpi_fifo->pc   = &sq->xdpi_fifo_pc;
   1106	xdpi_fifo->cc   = &sq->xdpi_fifo_cc;
   1107	xdpi_fifo->mask = dsegs_per_wq - 1;
   1108
   1109	return 0;
   1110}
   1111
   1112static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
   1113{
   1114	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
   1115	size_t size;
   1116	int err;
   1117
   1118	size = array_size(sizeof(*sq->db.wqe_info), wq_sz);
   1119	sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa);
   1120	if (!sq->db.wqe_info)
   1121		return -ENOMEM;
   1122
   1123	err = mlx5e_alloc_xdpsq_fifo(sq, numa);
   1124	if (err) {
   1125		mlx5e_free_xdpsq_db(sq);
   1126		return err;
   1127	}
   1128
   1129	return 0;
   1130}
   1131
   1132static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
   1133			     struct mlx5e_params *params,
   1134			     struct xsk_buff_pool *xsk_pool,
   1135			     struct mlx5e_sq_param *param,
   1136			     struct mlx5e_xdpsq *sq,
   1137			     bool is_redirect)
   1138{
   1139	void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
   1140	struct mlx5_core_dev *mdev = c->mdev;
   1141	struct mlx5_wq_cyc *wq = &sq->wq;
   1142	int err;
   1143
   1144	sq->pdev      = c->pdev;
   1145	sq->mkey_be   = c->mkey_be;
   1146	sq->channel   = c;
   1147	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
   1148	sq->min_inline_mode = params->tx_min_inline_mode;
   1149	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
   1150	sq->xsk_pool  = xsk_pool;
   1151
   1152	sq->stats = sq->xsk_pool ?
   1153		&c->priv->channel_stats[c->ix]->xsksq :
   1154		is_redirect ?
   1155			&c->priv->channel_stats[c->ix]->xdpsq :
   1156			&c->priv->channel_stats[c->ix]->rq_xdpsq;
   1157	sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
   1158	sq->stop_room = MLX5E_STOP_ROOM(sq->max_sq_wqebbs);
   1159	sq->max_sq_mpw_wqebbs = mlx5e_get_sw_max_sq_mpw_wqebbs(sq->max_sq_wqebbs);
   1160
   1161	param->wq.db_numa_node = cpu_to_node(c->cpu);
   1162	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
   1163	if (err)
   1164		return err;
   1165	wq->db = &wq->db[MLX5_SND_DBR];
   1166
   1167	err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
   1168	if (err)
   1169		goto err_sq_wq_destroy;
   1170
   1171	return 0;
   1172
   1173err_sq_wq_destroy:
   1174	mlx5_wq_destroy(&sq->wq_ctrl);
   1175
   1176	return err;
   1177}
   1178
   1179static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq)
   1180{
   1181	mlx5e_free_xdpsq_db(sq);
   1182	mlx5_wq_destroy(&sq->wq_ctrl);
   1183}
   1184
   1185static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq)
   1186{
   1187	kvfree(sq->db.wqe_info);
   1188}
   1189
   1190static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa)
   1191{
   1192	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
   1193	size_t size;
   1194
   1195	size = array_size(wq_sz, sizeof(*sq->db.wqe_info));
   1196	sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa);
   1197	if (!sq->db.wqe_info)
   1198		return -ENOMEM;
   1199
   1200	return 0;
   1201}
   1202
   1203static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work)
   1204{
   1205	struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq,
   1206					      recover_work);
   1207
   1208	mlx5e_reporter_icosq_cqe_err(sq);
   1209}
   1210
   1211static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work)
   1212{
   1213	struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq,
   1214					      recover_work);
   1215
   1216	/* Not implemented yet. */
   1217
   1218	netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n");
   1219}
   1220
   1221static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
   1222			     struct mlx5e_sq_param *param,
   1223			     struct mlx5e_icosq *sq,
   1224			     work_func_t recover_work_func)
   1225{
   1226	void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
   1227	struct mlx5_core_dev *mdev = c->mdev;
   1228	struct mlx5_wq_cyc *wq = &sq->wq;
   1229	int err;
   1230
   1231	sq->channel   = c;
   1232	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
   1233	sq->reserved_room = param->stop_room;
   1234	sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
   1235
   1236	param->wq.db_numa_node = cpu_to_node(c->cpu);
   1237	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
   1238	if (err)
   1239		return err;
   1240	wq->db = &wq->db[MLX5_SND_DBR];
   1241
   1242	err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
   1243	if (err)
   1244		goto err_sq_wq_destroy;
   1245
   1246	INIT_WORK(&sq->recover_work, recover_work_func);
   1247
   1248	return 0;
   1249
   1250err_sq_wq_destroy:
   1251	mlx5_wq_destroy(&sq->wq_ctrl);
   1252
   1253	return err;
   1254}
   1255
   1256static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
   1257{
   1258	mlx5e_free_icosq_db(sq);
   1259	mlx5_wq_destroy(&sq->wq_ctrl);
   1260}
   1261
   1262void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
   1263{
   1264	kvfree(sq->db.wqe_info);
   1265	kvfree(sq->db.skb_fifo.fifo);
   1266	kvfree(sq->db.dma_fifo);
   1267}
   1268
   1269int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
   1270{
   1271	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
   1272	int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
   1273
   1274	sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
   1275						   sizeof(*sq->db.dma_fifo)),
   1276					GFP_KERNEL, numa);
   1277	sq->db.skb_fifo.fifo = kvzalloc_node(array_size(df_sz,
   1278							sizeof(*sq->db.skb_fifo.fifo)),
   1279					GFP_KERNEL, numa);
   1280	sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
   1281						   sizeof(*sq->db.wqe_info)),
   1282					GFP_KERNEL, numa);
   1283	if (!sq->db.dma_fifo || !sq->db.skb_fifo.fifo || !sq->db.wqe_info) {
   1284		mlx5e_free_txqsq_db(sq);
   1285		return -ENOMEM;
   1286	}
   1287
   1288	sq->dma_fifo_mask = df_sz - 1;
   1289
   1290	sq->db.skb_fifo.pc   = &sq->skb_fifo_pc;
   1291	sq->db.skb_fifo.cc   = &sq->skb_fifo_cc;
   1292	sq->db.skb_fifo.mask = df_sz - 1;
   1293
   1294	return 0;
   1295}
   1296
   1297static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
   1298			     int txq_ix,
   1299			     struct mlx5e_params *params,
   1300			     struct mlx5e_sq_param *param,
   1301			     struct mlx5e_txqsq *sq,
   1302			     int tc)
   1303{
   1304	void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
   1305	struct mlx5_core_dev *mdev = c->mdev;
   1306	struct mlx5_wq_cyc *wq = &sq->wq;
   1307	int err;
   1308
   1309	sq->pdev      = c->pdev;
   1310	sq->clock     = &mdev->clock;
   1311	sq->mkey_be   = c->mkey_be;
   1312	sq->netdev    = c->netdev;
   1313	sq->mdev      = c->mdev;
   1314	sq->priv      = c->priv;
   1315	sq->ch_ix     = c->ix;
   1316	sq->txq_ix    = txq_ix;
   1317	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
   1318	sq->min_inline_mode = params->tx_min_inline_mode;
   1319	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
   1320	sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
   1321	sq->max_sq_mpw_wqebbs = mlx5e_get_sw_max_sq_mpw_wqebbs(sq->max_sq_wqebbs);
   1322	INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
   1323	if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
   1324		set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
   1325	if (mlx5_ipsec_device_caps(c->priv->mdev))
   1326		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
   1327	if (param->is_mpw)
   1328		set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state);
   1329	sq->stop_room = param->stop_room;
   1330	sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev);
   1331
   1332	param->wq.db_numa_node = cpu_to_node(c->cpu);
   1333	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
   1334	if (err)
   1335		return err;
   1336	wq->db    = &wq->db[MLX5_SND_DBR];
   1337
   1338	err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
   1339	if (err)
   1340		goto err_sq_wq_destroy;
   1341
   1342	INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work);
   1343	sq->dim.mode = params->tx_cq_moderation.cq_period_mode;
   1344
   1345	return 0;
   1346
   1347err_sq_wq_destroy:
   1348	mlx5_wq_destroy(&sq->wq_ctrl);
   1349
   1350	return err;
   1351}
   1352
   1353void mlx5e_free_txqsq(struct mlx5e_txqsq *sq)
   1354{
   1355	mlx5e_free_txqsq_db(sq);
   1356	mlx5_wq_destroy(&sq->wq_ctrl);
   1357}
   1358
   1359static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
   1360			   struct mlx5e_sq_param *param,
   1361			   struct mlx5e_create_sq_param *csp,
   1362			   u32 *sqn)
   1363{
   1364	u8 ts_format;
   1365	void *in;
   1366	void *sqc;
   1367	void *wq;
   1368	int inlen;
   1369	int err;
   1370
   1371	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
   1372		sizeof(u64) * csp->wq_ctrl->buf.npages;
   1373	in = kvzalloc(inlen, GFP_KERNEL);
   1374	if (!in)
   1375		return -ENOMEM;
   1376
   1377	ts_format = mlx5_is_real_time_sq(mdev) ?
   1378			    MLX5_TIMESTAMP_FORMAT_REAL_TIME :
   1379			    MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
   1380	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
   1381	wq = MLX5_ADDR_OF(sqc, sqc, wq);
   1382
   1383	memcpy(sqc, param->sqc, sizeof(param->sqc));
   1384	MLX5_SET(sqc,  sqc, tis_lst_sz, csp->tis_lst_sz);
   1385	MLX5_SET(sqc,  sqc, tis_num_0, csp->tisn);
   1386	MLX5_SET(sqc,  sqc, cqn, csp->cqn);
   1387	MLX5_SET(sqc,  sqc, ts_cqe_to_dest_cqn, csp->ts_cqe_to_dest_cqn);
   1388	MLX5_SET(sqc,  sqc, ts_format, ts_format);
   1389
   1390
   1391	if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
   1392		MLX5_SET(sqc,  sqc, min_wqe_inline_mode, csp->min_inline_mode);
   1393
   1394	MLX5_SET(sqc,  sqc, state, MLX5_SQC_STATE_RST);
   1395	MLX5_SET(sqc,  sqc, flush_in_error_en, 1);
   1396
   1397	MLX5_SET(wq,   wq, wq_type,       MLX5_WQ_TYPE_CYCLIC);
   1398	MLX5_SET(wq,   wq, uar_page,      mdev->mlx5e_res.hw_objs.bfreg.index);
   1399	MLX5_SET(wq,   wq, log_wq_pg_sz,  csp->wq_ctrl->buf.page_shift -
   1400					  MLX5_ADAPTER_PAGE_SHIFT);
   1401	MLX5_SET64(wq, wq, dbr_addr,      csp->wq_ctrl->db.dma);
   1402
   1403	mlx5_fill_page_frag_array(&csp->wq_ctrl->buf,
   1404				  (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
   1405
   1406	err = mlx5_core_create_sq(mdev, in, inlen, sqn);
   1407
   1408	kvfree(in);
   1409
   1410	return err;
   1411}
   1412
   1413int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
   1414		    struct mlx5e_modify_sq_param *p)
   1415{
   1416	u64 bitmask = 0;
   1417	void *in;
   1418	void *sqc;
   1419	int inlen;
   1420	int err;
   1421
   1422	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
   1423	in = kvzalloc(inlen, GFP_KERNEL);
   1424	if (!in)
   1425		return -ENOMEM;
   1426
   1427	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
   1428
   1429	MLX5_SET(modify_sq_in, in, sq_state, p->curr_state);
   1430	MLX5_SET(sqc, sqc, state, p->next_state);
   1431	if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) {
   1432		bitmask |= 1;
   1433		MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index);
   1434	}
   1435	if (p->qos_update && p->next_state == MLX5_SQC_STATE_RDY) {
   1436		bitmask |= 1 << 2;
   1437		MLX5_SET(sqc, sqc, qos_queue_group_id, p->qos_queue_group_id);
   1438	}
   1439	MLX5_SET64(modify_sq_in, in, modify_bitmask, bitmask);
   1440
   1441	err = mlx5_core_modify_sq(mdev, sqn, in);
   1442
   1443	kvfree(in);
   1444
   1445	return err;
   1446}
   1447
   1448static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn)
   1449{
   1450	mlx5_core_destroy_sq(mdev, sqn);
   1451}
   1452
   1453int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
   1454			struct mlx5e_sq_param *param,
   1455			struct mlx5e_create_sq_param *csp,
   1456			u16 qos_queue_group_id,
   1457			u32 *sqn)
   1458{
   1459	struct mlx5e_modify_sq_param msp = {0};
   1460	int err;
   1461
   1462	err = mlx5e_create_sq(mdev, param, csp, sqn);
   1463	if (err)
   1464		return err;
   1465
   1466	msp.curr_state = MLX5_SQC_STATE_RST;
   1467	msp.next_state = MLX5_SQC_STATE_RDY;
   1468	if (qos_queue_group_id) {
   1469		msp.qos_update = true;
   1470		msp.qos_queue_group_id = qos_queue_group_id;
   1471	}
   1472	err = mlx5e_modify_sq(mdev, *sqn, &msp);
   1473	if (err)
   1474		mlx5e_destroy_sq(mdev, *sqn);
   1475
   1476	return err;
   1477}
   1478
   1479static int mlx5e_set_sq_maxrate(struct net_device *dev,
   1480				struct mlx5e_txqsq *sq, u32 rate);
   1481
   1482int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
   1483		     struct mlx5e_params *params, struct mlx5e_sq_param *param,
   1484		     struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id,
   1485		     struct mlx5e_sq_stats *sq_stats)
   1486{
   1487	struct mlx5e_create_sq_param csp = {};
   1488	u32 tx_rate;
   1489	int err;
   1490
   1491	err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq, tc);
   1492	if (err)
   1493		return err;
   1494
   1495	sq->stats = sq_stats;
   1496
   1497	csp.tisn            = tisn;
   1498	csp.tis_lst_sz      = 1;
   1499	csp.cqn             = sq->cq.mcq.cqn;
   1500	csp.wq_ctrl         = &sq->wq_ctrl;
   1501	csp.min_inline_mode = sq->min_inline_mode;
   1502	err = mlx5e_create_sq_rdy(c->mdev, param, &csp, qos_queue_group_id, &sq->sqn);
   1503	if (err)
   1504		goto err_free_txqsq;
   1505
   1506	tx_rate = c->priv->tx_rates[sq->txq_ix];
   1507	if (tx_rate)
   1508		mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate);
   1509
   1510	if (params->tx_dim_enabled)
   1511		sq->state |= BIT(MLX5E_SQ_STATE_AM);
   1512
   1513	return 0;
   1514
   1515err_free_txqsq:
   1516	mlx5e_free_txqsq(sq);
   1517
   1518	return err;
   1519}
   1520
   1521void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
   1522{
   1523	sq->txq = netdev_get_tx_queue(sq->netdev, sq->txq_ix);
   1524	set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
   1525	netdev_tx_reset_queue(sq->txq);
   1526	netif_tx_start_queue(sq->txq);
   1527}
   1528
   1529void mlx5e_tx_disable_queue(struct netdev_queue *txq)
   1530{
   1531	__netif_tx_lock_bh(txq);
   1532	netif_tx_stop_queue(txq);
   1533	__netif_tx_unlock_bh(txq);
   1534}
   1535
   1536void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
   1537{
   1538	struct mlx5_wq_cyc *wq = &sq->wq;
   1539
   1540	clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
   1541	synchronize_net(); /* Sync with NAPI to prevent netif_tx_wake_queue. */
   1542
   1543	mlx5e_tx_disable_queue(sq->txq);
   1544
   1545	/* last doorbell out, godspeed .. */
   1546	if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
   1547		u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
   1548		struct mlx5e_tx_wqe *nop;
   1549
   1550		sq->db.wqe_info[pi] = (struct mlx5e_tx_wqe_info) {
   1551			.num_wqebbs = 1,
   1552		};
   1553
   1554		nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
   1555		mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl);
   1556	}
   1557}
   1558
   1559void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
   1560{
   1561	struct mlx5_core_dev *mdev = sq->mdev;
   1562	struct mlx5_rate_limit rl = {0};
   1563
   1564	cancel_work_sync(&sq->dim.work);
   1565	cancel_work_sync(&sq->recover_work);
   1566	mlx5e_destroy_sq(mdev, sq->sqn);
   1567	if (sq->rate_limit) {
   1568		rl.rate = sq->rate_limit;
   1569		mlx5_rl_remove_rate(mdev, &rl);
   1570	}
   1571	mlx5e_free_txqsq_descs(sq);
   1572	mlx5e_free_txqsq(sq);
   1573}
   1574
   1575void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
   1576{
   1577	struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq,
   1578					      recover_work);
   1579
   1580	mlx5e_reporter_tx_err_cqe(sq);
   1581}
   1582
   1583static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
   1584			    struct mlx5e_sq_param *param, struct mlx5e_icosq *sq,
   1585			    work_func_t recover_work_func)
   1586{
   1587	struct mlx5e_create_sq_param csp = {};
   1588	int err;
   1589
   1590	err = mlx5e_alloc_icosq(c, param, sq, recover_work_func);
   1591	if (err)
   1592		return err;
   1593
   1594	csp.cqn             = sq->cq.mcq.cqn;
   1595	csp.wq_ctrl         = &sq->wq_ctrl;
   1596	csp.min_inline_mode = params->tx_min_inline_mode;
   1597	err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
   1598	if (err)
   1599		goto err_free_icosq;
   1600
   1601	if (param->is_tls) {
   1602		sq->ktls_resync = mlx5e_ktls_rx_resync_create_resp_list();
   1603		if (IS_ERR(sq->ktls_resync)) {
   1604			err = PTR_ERR(sq->ktls_resync);
   1605			goto err_destroy_icosq;
   1606		}
   1607	}
   1608	return 0;
   1609
   1610err_destroy_icosq:
   1611	mlx5e_destroy_sq(c->mdev, sq->sqn);
   1612err_free_icosq:
   1613	mlx5e_free_icosq(sq);
   1614
   1615	return err;
   1616}
   1617
   1618void mlx5e_activate_icosq(struct mlx5e_icosq *icosq)
   1619{
   1620	set_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
   1621}
   1622
   1623void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
   1624{
   1625	clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
   1626	synchronize_net(); /* Sync with NAPI. */
   1627}
   1628
   1629static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
   1630{
   1631	struct mlx5e_channel *c = sq->channel;
   1632
   1633	if (sq->ktls_resync)
   1634		mlx5e_ktls_rx_resync_destroy_resp_list(sq->ktls_resync);
   1635	mlx5e_destroy_sq(c->mdev, sq->sqn);
   1636	mlx5e_free_icosq_descs(sq);
   1637	mlx5e_free_icosq(sq);
   1638}
   1639
   1640int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
   1641		     struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
   1642		     struct mlx5e_xdpsq *sq, bool is_redirect)
   1643{
   1644	struct mlx5e_create_sq_param csp = {};
   1645	int err;
   1646
   1647	err = mlx5e_alloc_xdpsq(c, params, xsk_pool, param, sq, is_redirect);
   1648	if (err)
   1649		return err;
   1650
   1651	csp.tis_lst_sz      = 1;
   1652	csp.tisn            = c->priv->tisn[c->lag_port][0]; /* tc = 0 */
   1653	csp.cqn             = sq->cq.mcq.cqn;
   1654	csp.wq_ctrl         = &sq->wq_ctrl;
   1655	csp.min_inline_mode = sq->min_inline_mode;
   1656	set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
   1657
   1658	/* Don't enable multi buffer on XDP_REDIRECT SQ, as it's not yet
   1659	 * supported by upstream, and there is no defined trigger to allow
   1660	 * transmitting redirected multi-buffer frames.
   1661	 */
   1662	if (param->is_xdp_mb && !is_redirect)
   1663		set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state);
   1664
   1665	err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
   1666	if (err)
   1667		goto err_free_xdpsq;
   1668
   1669	mlx5e_set_xmit_fp(sq, param->is_mpw);
   1670
   1671	if (!param->is_mpw && !test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) {
   1672		unsigned int ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
   1673		unsigned int inline_hdr_sz = 0;
   1674		int i;
   1675
   1676		if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
   1677			inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
   1678			ds_cnt++;
   1679		}
   1680
   1681		/* Pre initialize fixed WQE fields */
   1682		for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
   1683			struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(&sq->wq, i);
   1684			struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
   1685			struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
   1686			struct mlx5_wqe_data_seg *dseg;
   1687
   1688			sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) {
   1689				.num_wqebbs = 1,
   1690				.num_pkts   = 1,
   1691			};
   1692
   1693			cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
   1694			eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
   1695
   1696			dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
   1697			dseg->lkey = sq->mkey_be;
   1698		}
   1699	}
   1700
   1701	return 0;
   1702
   1703err_free_xdpsq:
   1704	clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
   1705	mlx5e_free_xdpsq(sq);
   1706
   1707	return err;
   1708}
   1709
   1710void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
   1711{
   1712	struct mlx5e_channel *c = sq->channel;
   1713
   1714	clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
   1715	synchronize_net(); /* Sync with NAPI. */
   1716
   1717	mlx5e_destroy_sq(c->mdev, sq->sqn);
   1718	mlx5e_free_xdpsq_descs(sq);
   1719	mlx5e_free_xdpsq(sq);
   1720}
   1721
   1722static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv,
   1723				 struct mlx5e_cq_param *param,
   1724				 struct mlx5e_cq *cq)
   1725{
   1726	struct mlx5_core_dev *mdev = priv->mdev;
   1727	struct mlx5_core_cq *mcq = &cq->mcq;
   1728	int err;
   1729	u32 i;
   1730
   1731	err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
   1732			       &cq->wq_ctrl);
   1733	if (err)
   1734		return err;
   1735
   1736	mcq->cqe_sz     = 64;
   1737	mcq->set_ci_db  = cq->wq_ctrl.db.db;
   1738	mcq->arm_db     = cq->wq_ctrl.db.db + 1;
   1739	*mcq->set_ci_db = 0;
   1740	*mcq->arm_db    = 0;
   1741	mcq->vector     = param->eq_ix;
   1742	mcq->comp       = mlx5e_completion_event;
   1743	mcq->event      = mlx5e_cq_error_event;
   1744
   1745	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
   1746		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
   1747
   1748		cqe->op_own = 0xf1;
   1749	}
   1750
   1751	cq->mdev = mdev;
   1752	cq->netdev = priv->netdev;
   1753	cq->priv = priv;
   1754
   1755	return 0;
   1756}
   1757
   1758static int mlx5e_alloc_cq(struct mlx5e_priv *priv,
   1759			  struct mlx5e_cq_param *param,
   1760			  struct mlx5e_create_cq_param *ccp,
   1761			  struct mlx5e_cq *cq)
   1762{
   1763	int err;
   1764
   1765	param->wq.buf_numa_node = ccp->node;
   1766	param->wq.db_numa_node  = ccp->node;
   1767	param->eq_ix            = ccp->ix;
   1768
   1769	err = mlx5e_alloc_cq_common(priv, param, cq);
   1770
   1771	cq->napi     = ccp->napi;
   1772	cq->ch_stats = ccp->ch_stats;
   1773
   1774	return err;
   1775}
   1776
   1777static void mlx5e_free_cq(struct mlx5e_cq *cq)
   1778{
   1779	mlx5_wq_destroy(&cq->wq_ctrl);
   1780}
   1781
   1782static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
   1783{
   1784	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
   1785	struct mlx5_core_dev *mdev = cq->mdev;
   1786	struct mlx5_core_cq *mcq = &cq->mcq;
   1787
   1788	void *in;
   1789	void *cqc;
   1790	int inlen;
   1791	int eqn;
   1792	int err;
   1793
   1794	err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn);
   1795	if (err)
   1796		return err;
   1797
   1798	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
   1799		sizeof(u64) * cq->wq_ctrl.buf.npages;
   1800	in = kvzalloc(inlen, GFP_KERNEL);
   1801	if (!in)
   1802		return -ENOMEM;
   1803
   1804	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
   1805
   1806	memcpy(cqc, param->cqc, sizeof(param->cqc));
   1807
   1808	mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
   1809				  (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
   1810
   1811	MLX5_SET(cqc,   cqc, cq_period_mode, param->cq_period_mode);
   1812	MLX5_SET(cqc,   cqc, c_eqn_or_apu_element, eqn);
   1813	MLX5_SET(cqc,   cqc, uar_page,      mdev->priv.uar->index);
   1814	MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
   1815					    MLX5_ADAPTER_PAGE_SHIFT);
   1816	MLX5_SET64(cqc, cqc, dbr_addr,      cq->wq_ctrl.db.dma);
   1817
   1818	err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
   1819
   1820	kvfree(in);
   1821
   1822	if (err)
   1823		return err;
   1824
   1825	mlx5e_cq_arm(cq);
   1826
   1827	return 0;
   1828}
   1829
   1830static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
   1831{
   1832	mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
   1833}
   1834
   1835int mlx5e_open_cq(struct mlx5e_priv *priv, struct dim_cq_moder moder,
   1836		  struct mlx5e_cq_param *param, struct mlx5e_create_cq_param *ccp,
   1837		  struct mlx5e_cq *cq)
   1838{
   1839	struct mlx5_core_dev *mdev = priv->mdev;
   1840	int err;
   1841
   1842	err = mlx5e_alloc_cq(priv, param, ccp, cq);
   1843	if (err)
   1844		return err;
   1845
   1846	err = mlx5e_create_cq(cq, param);
   1847	if (err)
   1848		goto err_free_cq;
   1849
   1850	if (MLX5_CAP_GEN(mdev, cq_moderation))
   1851		mlx5_core_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts);
   1852	return 0;
   1853
   1854err_free_cq:
   1855	mlx5e_free_cq(cq);
   1856
   1857	return err;
   1858}
   1859
   1860void mlx5e_close_cq(struct mlx5e_cq *cq)
   1861{
   1862	mlx5e_destroy_cq(cq);
   1863	mlx5e_free_cq(cq);
   1864}
   1865
   1866static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
   1867			     struct mlx5e_params *params,
   1868			     struct mlx5e_create_cq_param *ccp,
   1869			     struct mlx5e_channel_param *cparam)
   1870{
   1871	int err;
   1872	int tc;
   1873
   1874	for (tc = 0; tc < c->num_tc; tc++) {
   1875		err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->txq_sq.cqp,
   1876				    ccp, &c->sq[tc].cq);
   1877		if (err)
   1878			goto err_close_tx_cqs;
   1879	}
   1880
   1881	return 0;
   1882
   1883err_close_tx_cqs:
   1884	for (tc--; tc >= 0; tc--)
   1885		mlx5e_close_cq(&c->sq[tc].cq);
   1886
   1887	return err;
   1888}
   1889
   1890static void mlx5e_close_tx_cqs(struct mlx5e_channel *c)
   1891{
   1892	int tc;
   1893
   1894	for (tc = 0; tc < c->num_tc; tc++)
   1895		mlx5e_close_cq(&c->sq[tc].cq);
   1896}
   1897
   1898static int mlx5e_mqprio_txq_to_tc(struct netdev_tc_txq *tc_to_txq, unsigned int txq)
   1899{
   1900	int tc;
   1901
   1902	for (tc = 0; tc < TC_MAX_QUEUE; tc++)
   1903		if (txq - tc_to_txq[tc].offset < tc_to_txq[tc].count)
   1904			return tc;
   1905
   1906	WARN(1, "Unexpected TCs configuration. No match found for txq %u", txq);
   1907	return -ENOENT;
   1908}
   1909
   1910static int mlx5e_txq_get_qos_node_hw_id(struct mlx5e_params *params, int txq_ix,
   1911					u32 *hw_id)
   1912{
   1913	int tc;
   1914
   1915	if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL ||
   1916	    !params->mqprio.channel.rl) {
   1917		*hw_id = 0;
   1918		return 0;
   1919	}
   1920
   1921	tc = mlx5e_mqprio_txq_to_tc(params->mqprio.tc_to_txq, txq_ix);
   1922	if (tc < 0)
   1923		return tc;
   1924
   1925	return mlx5e_mqprio_rl_get_node_hw_id(params->mqprio.channel.rl, tc, hw_id);
   1926}
   1927
   1928static int mlx5e_open_sqs(struct mlx5e_channel *c,
   1929			  struct mlx5e_params *params,
   1930			  struct mlx5e_channel_param *cparam)
   1931{
   1932	int err, tc;
   1933
   1934	for (tc = 0; tc < mlx5e_get_dcb_num_tc(params); tc++) {
   1935		int txq_ix = c->ix + tc * params->num_channels;
   1936		u32 qos_queue_group_id;
   1937
   1938		err = mlx5e_txq_get_qos_node_hw_id(params, txq_ix, &qos_queue_group_id);
   1939		if (err)
   1940			goto err_close_sqs;
   1941
   1942		err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
   1943				       params, &cparam->txq_sq, &c->sq[tc], tc,
   1944				       qos_queue_group_id,
   1945				       &c->priv->channel_stats[c->ix]->sq[tc]);
   1946		if (err)
   1947			goto err_close_sqs;
   1948	}
   1949
   1950	return 0;
   1951
   1952err_close_sqs:
   1953	for (tc--; tc >= 0; tc--)
   1954		mlx5e_close_txqsq(&c->sq[tc]);
   1955
   1956	return err;
   1957}
   1958
   1959static void mlx5e_close_sqs(struct mlx5e_channel *c)
   1960{
   1961	int tc;
   1962
   1963	for (tc = 0; tc < c->num_tc; tc++)
   1964		mlx5e_close_txqsq(&c->sq[tc]);
   1965}
   1966
   1967static int mlx5e_set_sq_maxrate(struct net_device *dev,
   1968				struct mlx5e_txqsq *sq, u32 rate)
   1969{
   1970	struct mlx5e_priv *priv = netdev_priv(dev);
   1971	struct mlx5_core_dev *mdev = priv->mdev;
   1972	struct mlx5e_modify_sq_param msp = {0};
   1973	struct mlx5_rate_limit rl = {0};
   1974	u16 rl_index = 0;
   1975	int err;
   1976
   1977	if (rate == sq->rate_limit)
   1978		/* nothing to do */
   1979		return 0;
   1980
   1981	if (sq->rate_limit) {
   1982		rl.rate = sq->rate_limit;
   1983		/* remove current rl index to free space to next ones */
   1984		mlx5_rl_remove_rate(mdev, &rl);
   1985	}
   1986
   1987	sq->rate_limit = 0;
   1988
   1989	if (rate) {
   1990		rl.rate = rate;
   1991		err = mlx5_rl_add_rate(mdev, &rl_index, &rl);
   1992		if (err) {
   1993			netdev_err(dev, "Failed configuring rate %u: %d\n",
   1994				   rate, err);
   1995			return err;
   1996		}
   1997	}
   1998
   1999	msp.curr_state = MLX5_SQC_STATE_RDY;
   2000	msp.next_state = MLX5_SQC_STATE_RDY;
   2001	msp.rl_index   = rl_index;
   2002	msp.rl_update  = true;
   2003	err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
   2004	if (err) {
   2005		netdev_err(dev, "Failed configuring rate %u: %d\n",
   2006			   rate, err);
   2007		/* remove the rate from the table */
   2008		if (rate)
   2009			mlx5_rl_remove_rate(mdev, &rl);
   2010		return err;
   2011	}
   2012
   2013	sq->rate_limit = rate;
   2014	return 0;
   2015}
   2016
   2017static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
   2018{
   2019	struct mlx5e_priv *priv = netdev_priv(dev);
   2020	struct mlx5_core_dev *mdev = priv->mdev;
   2021	struct mlx5e_txqsq *sq = priv->txq2sq[index];
   2022	int err = 0;
   2023
   2024	if (!mlx5_rl_is_supported(mdev)) {
   2025		netdev_err(dev, "Rate limiting is not supported on this device\n");
   2026		return -EINVAL;
   2027	}
   2028
   2029	/* rate is given in Mb/sec, HW config is in Kb/sec */
   2030	rate = rate << 10;
   2031
   2032	/* Check whether rate in valid range, 0 is always valid */
   2033	if (rate && !mlx5_rl_is_in_range(mdev, rate)) {
   2034		netdev_err(dev, "TX rate %u, is not in range\n", rate);
   2035		return -ERANGE;
   2036	}
   2037
   2038	mutex_lock(&priv->state_lock);
   2039	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
   2040		err = mlx5e_set_sq_maxrate(dev, sq, rate);
   2041	if (!err)
   2042		priv->tx_rates[index] = rate;
   2043	mutex_unlock(&priv->state_lock);
   2044
   2045	return err;
   2046}
   2047
   2048static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
   2049			     struct mlx5e_rq_param *rq_params)
   2050{
   2051	int err;
   2052
   2053	err = mlx5e_init_rxq_rq(c, params, &c->rq);
   2054	if (err)
   2055		return err;
   2056
   2057	return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), &c->rq);
   2058}
   2059
   2060static int mlx5e_open_queues(struct mlx5e_channel *c,
   2061			     struct mlx5e_params *params,
   2062			     struct mlx5e_channel_param *cparam)
   2063{
   2064	struct dim_cq_moder icocq_moder = {0, 0};
   2065	struct mlx5e_create_cq_param ccp;
   2066	int err;
   2067
   2068	mlx5e_build_create_cq_param(&ccp, c);
   2069
   2070	err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp,
   2071			    &c->async_icosq.cq);
   2072	if (err)
   2073		return err;
   2074
   2075	err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp,
   2076			    &c->icosq.cq);
   2077	if (err)
   2078		goto err_close_async_icosq_cq;
   2079
   2080	err = mlx5e_open_tx_cqs(c, params, &ccp, cparam);
   2081	if (err)
   2082		goto err_close_icosq_cq;
   2083
   2084	err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp, &ccp,
   2085			    &c->xdpsq.cq);
   2086	if (err)
   2087		goto err_close_tx_cqs;
   2088
   2089	err = mlx5e_open_cq(c->priv, params->rx_cq_moderation, &cparam->rq.cqp, &ccp,
   2090			    &c->rq.cq);
   2091	if (err)
   2092		goto err_close_xdp_tx_cqs;
   2093
   2094	err = c->xdp ? mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp,
   2095				     &ccp, &c->rq_xdpsq.cq) : 0;
   2096	if (err)
   2097		goto err_close_rx_cq;
   2098
   2099	spin_lock_init(&c->async_icosq_lock);
   2100
   2101	err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq,
   2102			       mlx5e_async_icosq_err_cqe_work);
   2103	if (err)
   2104		goto err_close_xdpsq_cq;
   2105
   2106	mutex_init(&c->icosq_recovery_lock);
   2107
   2108	err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq,
   2109			       mlx5e_icosq_err_cqe_work);
   2110	if (err)
   2111		goto err_close_async_icosq;
   2112
   2113	err = mlx5e_open_sqs(c, params, cparam);
   2114	if (err)
   2115		goto err_close_icosq;
   2116
   2117	err = mlx5e_open_rxq_rq(c, params, &cparam->rq);
   2118	if (err)
   2119		goto err_close_sqs;
   2120
   2121	if (c->xdp) {
   2122		err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL,
   2123				       &c->rq_xdpsq, false);
   2124		if (err)
   2125			goto err_close_rq;
   2126	}
   2127
   2128	err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true);
   2129	if (err)
   2130		goto err_close_xdp_sq;
   2131
   2132	return 0;
   2133
   2134err_close_xdp_sq:
   2135	if (c->xdp)
   2136		mlx5e_close_xdpsq(&c->rq_xdpsq);
   2137
   2138err_close_rq:
   2139	mlx5e_close_rq(&c->rq);
   2140
   2141err_close_sqs:
   2142	mlx5e_close_sqs(c);
   2143
   2144err_close_icosq:
   2145	mlx5e_close_icosq(&c->icosq);
   2146
   2147err_close_async_icosq:
   2148	mlx5e_close_icosq(&c->async_icosq);
   2149
   2150err_close_xdpsq_cq:
   2151	if (c->xdp)
   2152		mlx5e_close_cq(&c->rq_xdpsq.cq);
   2153
   2154err_close_rx_cq:
   2155	mlx5e_close_cq(&c->rq.cq);
   2156
   2157err_close_xdp_tx_cqs:
   2158	mlx5e_close_cq(&c->xdpsq.cq);
   2159
   2160err_close_tx_cqs:
   2161	mlx5e_close_tx_cqs(c);
   2162
   2163err_close_icosq_cq:
   2164	mlx5e_close_cq(&c->icosq.cq);
   2165
   2166err_close_async_icosq_cq:
   2167	mlx5e_close_cq(&c->async_icosq.cq);
   2168
   2169	return err;
   2170}
   2171
   2172static void mlx5e_close_queues(struct mlx5e_channel *c)
   2173{
   2174	mlx5e_close_xdpsq(&c->xdpsq);
   2175	if (c->xdp)
   2176		mlx5e_close_xdpsq(&c->rq_xdpsq);
   2177	/* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */
   2178	cancel_work_sync(&c->icosq.recover_work);
   2179	mlx5e_close_rq(&c->rq);
   2180	mlx5e_close_sqs(c);
   2181	mlx5e_close_icosq(&c->icosq);
   2182	mutex_destroy(&c->icosq_recovery_lock);
   2183	mlx5e_close_icosq(&c->async_icosq);
   2184	if (c->xdp)
   2185		mlx5e_close_cq(&c->rq_xdpsq.cq);
   2186	mlx5e_close_cq(&c->rq.cq);
   2187	mlx5e_close_cq(&c->xdpsq.cq);
   2188	mlx5e_close_tx_cqs(c);
   2189	mlx5e_close_cq(&c->icosq.cq);
   2190	mlx5e_close_cq(&c->async_icosq.cq);
   2191}
   2192
   2193static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix)
   2194{
   2195	u16 port_aff_bias = mlx5_core_is_pf(mdev) ? 0 : MLX5_CAP_GEN(mdev, vhca_id);
   2196
   2197	return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev);
   2198}
   2199
   2200static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu)
   2201{
   2202	if (ix > priv->stats_nch)  {
   2203		netdev_warn(priv->netdev, "Unexpected channel stats index %d > %d\n", ix,
   2204			    priv->stats_nch);
   2205		return -EINVAL;
   2206	}
   2207
   2208	if (priv->channel_stats[ix])
   2209		return 0;
   2210
   2211	/* Asymmetric dynamic memory allocation.
   2212	 * Freed in mlx5e_priv_arrays_free, not on channel closure.
   2213	 */
   2214	mlx5e_dbg(DRV, priv, "Creating channel stats %d\n", ix);
   2215	priv->channel_stats[ix] = kvzalloc_node(sizeof(**priv->channel_stats),
   2216						GFP_KERNEL, cpu_to_node(cpu));
   2217	if (!priv->channel_stats[ix])
   2218		return -ENOMEM;
   2219	priv->stats_nch++;
   2220
   2221	return 0;
   2222}
   2223
   2224void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c)
   2225{
   2226	spin_lock_bh(&c->async_icosq_lock);
   2227	mlx5e_trigger_irq(&c->async_icosq);
   2228	spin_unlock_bh(&c->async_icosq_lock);
   2229}
   2230
   2231void mlx5e_trigger_napi_sched(struct napi_struct *napi)
   2232{
   2233	local_bh_disable();
   2234	napi_schedule(napi);
   2235	local_bh_enable();
   2236}
   2237
   2238static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
   2239			      struct mlx5e_params *params,
   2240			      struct mlx5e_channel_param *cparam,
   2241			      struct xsk_buff_pool *xsk_pool,
   2242			      struct mlx5e_channel **cp)
   2243{
   2244	int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
   2245	struct net_device *netdev = priv->netdev;
   2246	struct mlx5e_xsk_param xsk;
   2247	struct mlx5e_channel *c;
   2248	unsigned int irq;
   2249	int err;
   2250
   2251	err = mlx5_vector2irqn(priv->mdev, ix, &irq);
   2252	if (err)
   2253		return err;
   2254
   2255	err = mlx5e_channel_stats_alloc(priv, ix, cpu);
   2256	if (err)
   2257		return err;
   2258
   2259	c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
   2260	if (!c)
   2261		return -ENOMEM;
   2262
   2263	c->priv     = priv;
   2264	c->mdev     = priv->mdev;
   2265	c->tstamp   = &priv->tstamp;
   2266	c->ix       = ix;
   2267	c->cpu      = cpu;
   2268	c->pdev     = mlx5_core_dma_dev(priv->mdev);
   2269	c->netdev   = priv->netdev;
   2270	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
   2271	c->num_tc   = mlx5e_get_dcb_num_tc(params);
   2272	c->xdp      = !!params->xdp_prog;
   2273	c->stats    = &priv->channel_stats[ix]->ch;
   2274	c->aff_mask = irq_get_effective_affinity_mask(irq);
   2275	c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix);
   2276
   2277	netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
   2278
   2279	err = mlx5e_open_queues(c, params, cparam);
   2280	if (unlikely(err))
   2281		goto err_napi_del;
   2282
   2283	if (xsk_pool) {
   2284		mlx5e_build_xsk_param(xsk_pool, &xsk);
   2285		err = mlx5e_open_xsk(priv, params, &xsk, xsk_pool, c);
   2286		if (unlikely(err))
   2287			goto err_close_queues;
   2288	}
   2289
   2290	*cp = c;
   2291
   2292	return 0;
   2293
   2294err_close_queues:
   2295	mlx5e_close_queues(c);
   2296
   2297err_napi_del:
   2298	netif_napi_del(&c->napi);
   2299
   2300	kvfree(c);
   2301
   2302	return err;
   2303}
   2304
   2305static void mlx5e_activate_channel(struct mlx5e_channel *c)
   2306{
   2307	int tc;
   2308
   2309	napi_enable(&c->napi);
   2310
   2311	for (tc = 0; tc < c->num_tc; tc++)
   2312		mlx5e_activate_txqsq(&c->sq[tc]);
   2313	mlx5e_activate_icosq(&c->icosq);
   2314	mlx5e_activate_icosq(&c->async_icosq);
   2315	mlx5e_activate_rq(&c->rq);
   2316
   2317	if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
   2318		mlx5e_activate_xsk(c);
   2319
   2320	mlx5e_trigger_napi_icosq(c);
   2321}
   2322
   2323static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
   2324{
   2325	int tc;
   2326
   2327	if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
   2328		mlx5e_deactivate_xsk(c);
   2329
   2330	mlx5e_deactivate_rq(&c->rq);
   2331	mlx5e_deactivate_icosq(&c->async_icosq);
   2332	mlx5e_deactivate_icosq(&c->icosq);
   2333	for (tc = 0; tc < c->num_tc; tc++)
   2334		mlx5e_deactivate_txqsq(&c->sq[tc]);
   2335	mlx5e_qos_deactivate_queues(c);
   2336
   2337	napi_disable(&c->napi);
   2338}
   2339
   2340static void mlx5e_close_channel(struct mlx5e_channel *c)
   2341{
   2342	if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
   2343		mlx5e_close_xsk(c);
   2344	mlx5e_close_queues(c);
   2345	mlx5e_qos_close_queues(c);
   2346	netif_napi_del(&c->napi);
   2347
   2348	kvfree(c);
   2349}
   2350
   2351int mlx5e_open_channels(struct mlx5e_priv *priv,
   2352			struct mlx5e_channels *chs)
   2353{
   2354	struct mlx5e_channel_param *cparam;
   2355	int err = -ENOMEM;
   2356	int i;
   2357
   2358	chs->num = chs->params.num_channels;
   2359
   2360	chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL);
   2361	cparam = kvzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL);
   2362	if (!chs->c || !cparam)
   2363		goto err_free;
   2364
   2365	err = mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam);
   2366	if (err)
   2367		goto err_free;
   2368
   2369	for (i = 0; i < chs->num; i++) {
   2370		struct xsk_buff_pool *xsk_pool = NULL;
   2371
   2372		if (chs->params.xdp_prog)
   2373			xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i);
   2374
   2375		err = mlx5e_open_channel(priv, i, &chs->params, cparam, xsk_pool, &chs->c[i]);
   2376		if (err)
   2377			goto err_close_channels;
   2378	}
   2379
   2380	if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || chs->params.ptp_rx) {
   2381		err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp);
   2382		if (err)
   2383			goto err_close_channels;
   2384	}
   2385
   2386	err = mlx5e_qos_open_queues(priv, chs);
   2387	if (err)
   2388		goto err_close_ptp;
   2389
   2390	mlx5e_health_channels_update(priv);
   2391	kvfree(cparam);
   2392	return 0;
   2393
   2394err_close_ptp:
   2395	if (chs->ptp)
   2396		mlx5e_ptp_close(chs->ptp);
   2397
   2398err_close_channels:
   2399	for (i--; i >= 0; i--)
   2400		mlx5e_close_channel(chs->c[i]);
   2401
   2402err_free:
   2403	kfree(chs->c);
   2404	kvfree(cparam);
   2405	chs->num = 0;
   2406	return err;
   2407}
   2408
   2409static void mlx5e_activate_channels(struct mlx5e_channels *chs)
   2410{
   2411	int i;
   2412
   2413	for (i = 0; i < chs->num; i++)
   2414		mlx5e_activate_channel(chs->c[i]);
   2415
   2416	if (chs->ptp)
   2417		mlx5e_ptp_activate_channel(chs->ptp);
   2418}
   2419
   2420#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */
   2421
   2422static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
   2423{
   2424	int err = 0;
   2425	int i;
   2426
   2427	for (i = 0; i < chs->num; i++) {
   2428		int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT;
   2429
   2430		err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, timeout);
   2431
   2432		/* Don't wait on the XSK RQ, because the newer xdpsock sample
   2433		 * doesn't provide any Fill Ring entries at the setup stage.
   2434		 */
   2435	}
   2436
   2437	return err ? -ETIMEDOUT : 0;
   2438}
   2439
   2440static void mlx5e_deactivate_channels(struct mlx5e_channels *chs)
   2441{
   2442	int i;
   2443
   2444	if (chs->ptp)
   2445		mlx5e_ptp_deactivate_channel(chs->ptp);
   2446
   2447	for (i = 0; i < chs->num; i++)
   2448		mlx5e_deactivate_channel(chs->c[i]);
   2449}
   2450
   2451void mlx5e_close_channels(struct mlx5e_channels *chs)
   2452{
   2453	int i;
   2454
   2455	if (chs->ptp) {
   2456		mlx5e_ptp_close(chs->ptp);
   2457		chs->ptp = NULL;
   2458	}
   2459	for (i = 0; i < chs->num; i++)
   2460		mlx5e_close_channel(chs->c[i]);
   2461
   2462	kfree(chs->c);
   2463	chs->num = 0;
   2464}
   2465
   2466static int mlx5e_modify_tirs_packet_merge(struct mlx5e_priv *priv)
   2467{
   2468	struct mlx5e_rx_res *res = priv->rx_res;
   2469
   2470	return mlx5e_rx_res_packet_merge_set_param(res, &priv->channels.params.packet_merge);
   2471}
   2472
   2473static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_packet_merge);
   2474
   2475static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
   2476			 struct mlx5e_params *params, u16 mtu)
   2477{
   2478	u16 hw_mtu = MLX5E_SW2HW_MTU(params, mtu);
   2479	int err;
   2480
   2481	err = mlx5_set_port_mtu(mdev, hw_mtu, 1);
   2482	if (err)
   2483		return err;
   2484
   2485	/* Update vport context MTU */
   2486	mlx5_modify_nic_vport_mtu(mdev, hw_mtu);
   2487	return 0;
   2488}
   2489
   2490static void mlx5e_query_mtu(struct mlx5_core_dev *mdev,
   2491			    struct mlx5e_params *params, u16 *mtu)
   2492{
   2493	u16 hw_mtu = 0;
   2494	int err;
   2495
   2496	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
   2497	if (err || !hw_mtu) /* fallback to port oper mtu */
   2498		mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1);
   2499
   2500	*mtu = MLX5E_HW2SW_MTU(params, hw_mtu);
   2501}
   2502
   2503int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
   2504{
   2505	struct mlx5e_params *params = &priv->channels.params;
   2506	struct net_device *netdev = priv->netdev;
   2507	struct mlx5_core_dev *mdev = priv->mdev;
   2508	u16 mtu;
   2509	int err;
   2510
   2511	err = mlx5e_set_mtu(mdev, params, params->sw_mtu);
   2512	if (err)
   2513		return err;
   2514
   2515	mlx5e_query_mtu(mdev, params, &mtu);
   2516	if (mtu != params->sw_mtu)
   2517		netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n",
   2518			    __func__, mtu, params->sw_mtu);
   2519
   2520	params->sw_mtu = mtu;
   2521	return 0;
   2522}
   2523
   2524MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_set_dev_port_mtu);
   2525
   2526void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv)
   2527{
   2528	struct mlx5e_params *params = &priv->channels.params;
   2529	struct net_device *netdev   = priv->netdev;
   2530	struct mlx5_core_dev *mdev  = priv->mdev;
   2531	u16 max_mtu;
   2532
   2533	/* MTU range: 68 - hw-specific max */
   2534	netdev->min_mtu = ETH_MIN_MTU;
   2535
   2536	mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
   2537	netdev->max_mtu = min_t(unsigned int, MLX5E_HW2SW_MTU(params, max_mtu),
   2538				ETH_MAX_MTU);
   2539}
   2540
   2541static int mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc,
   2542				struct netdev_tc_txq *tc_to_txq)
   2543{
   2544	int tc, err;
   2545
   2546	netdev_reset_tc(netdev);
   2547
   2548	if (ntc == 1)
   2549		return 0;
   2550
   2551	err = netdev_set_num_tc(netdev, ntc);
   2552	if (err) {
   2553		netdev_WARN(netdev, "netdev_set_num_tc failed (%d), ntc = %d\n", err, ntc);
   2554		return err;
   2555	}
   2556
   2557	for (tc = 0; tc < ntc; tc++) {
   2558		u16 count, offset;
   2559
   2560		count = tc_to_txq[tc].count;
   2561		offset = tc_to_txq[tc].offset;
   2562		netdev_set_tc_queue(netdev, tc, count, offset);
   2563	}
   2564
   2565	return 0;
   2566}
   2567
   2568int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv)
   2569{
   2570	int qos_queues, nch, ntc, num_txqs, err;
   2571
   2572	qos_queues = mlx5e_qos_cur_leaf_nodes(priv);
   2573
   2574	nch = priv->channels.params.num_channels;
   2575	ntc = mlx5e_get_dcb_num_tc(&priv->channels.params);
   2576	num_txqs = nch * ntc + qos_queues;
   2577	if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS))
   2578		num_txqs += ntc;
   2579
   2580	mlx5e_dbg(DRV, priv, "Setting num_txqs %d\n", num_txqs);
   2581	err = netif_set_real_num_tx_queues(priv->netdev, num_txqs);
   2582	if (err)
   2583		netdev_warn(priv->netdev, "netif_set_real_num_tx_queues failed, %d\n", err);
   2584
   2585	return err;
   2586}
   2587
   2588static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
   2589{
   2590	struct netdev_tc_txq old_tc_to_txq[TC_MAX_QUEUE], *tc_to_txq;
   2591	struct net_device *netdev = priv->netdev;
   2592	int old_num_txqs, old_ntc;
   2593	int num_rxqs, nch, ntc;
   2594	int err;
   2595	int i;
   2596
   2597	old_num_txqs = netdev->real_num_tx_queues;
   2598	old_ntc = netdev->num_tc ? : 1;
   2599	for (i = 0; i < ARRAY_SIZE(old_tc_to_txq); i++)
   2600		old_tc_to_txq[i] = netdev->tc_to_txq[i];
   2601
   2602	nch = priv->channels.params.num_channels;
   2603	ntc = priv->channels.params.mqprio.num_tc;
   2604	num_rxqs = nch * priv->profile->rq_groups;
   2605	tc_to_txq = priv->channels.params.mqprio.tc_to_txq;
   2606
   2607	err = mlx5e_netdev_set_tcs(netdev, nch, ntc, tc_to_txq);
   2608	if (err)
   2609		goto err_out;
   2610	err = mlx5e_update_tx_netdev_queues(priv);
   2611	if (err)
   2612		goto err_tcs;
   2613	err = netif_set_real_num_rx_queues(netdev, num_rxqs);
   2614	if (err) {
   2615		netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err);
   2616		goto err_txqs;
   2617	}
   2618	if (priv->mqprio_rl != priv->channels.params.mqprio.channel.rl) {
   2619		if (priv->mqprio_rl) {
   2620			mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
   2621			mlx5e_mqprio_rl_free(priv->mqprio_rl);
   2622		}
   2623		priv->mqprio_rl = priv->channels.params.mqprio.channel.rl;
   2624	}
   2625
   2626	return 0;
   2627
   2628err_txqs:
   2629	/* netif_set_real_num_rx_queues could fail only when nch increased. Only
   2630	 * one of nch and ntc is changed in this function. That means, the call
   2631	 * to netif_set_real_num_tx_queues below should not fail, because it
   2632	 * decreases the number of TX queues.
   2633	 */
   2634	WARN_ON_ONCE(netif_set_real_num_tx_queues(netdev, old_num_txqs));
   2635
   2636err_tcs:
   2637	WARN_ON_ONCE(mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc,
   2638					  old_tc_to_txq));
   2639err_out:
   2640	return err;
   2641}
   2642
   2643static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_update_netdev_queues);
   2644
   2645static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv,
   2646					   struct mlx5e_params *params)
   2647{
   2648	struct mlx5_core_dev *mdev = priv->mdev;
   2649	int num_comp_vectors, ix, irq;
   2650
   2651	num_comp_vectors = mlx5_comp_vectors_count(mdev);
   2652
   2653	for (ix = 0; ix < params->num_channels; ix++) {
   2654		cpumask_clear(priv->scratchpad.cpumask);
   2655
   2656		for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) {
   2657			int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(mdev, irq));
   2658
   2659			cpumask_set_cpu(cpu, priv->scratchpad.cpumask);
   2660		}
   2661
   2662		netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix);
   2663	}
   2664}
   2665
   2666static int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
   2667{
   2668	u16 count = priv->channels.params.num_channels;
   2669	int err;
   2670
   2671	err = mlx5e_update_netdev_queues(priv);
   2672	if (err)
   2673		return err;
   2674
   2675	mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params);
   2676
   2677	/* This function may be called on attach, before priv->rx_res is created. */
   2678	if (!netif_is_rxfh_configured(priv->netdev) && priv->rx_res)
   2679		mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count);
   2680
   2681	return 0;
   2682}
   2683
   2684MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_num_channels_changed);
   2685
   2686static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
   2687{
   2688	int i, ch, tc, num_tc;
   2689
   2690	ch = priv->channels.num;
   2691	num_tc = mlx5e_get_dcb_num_tc(&priv->channels.params);
   2692
   2693	for (i = 0; i < ch; i++) {
   2694		for (tc = 0; tc < num_tc; tc++) {
   2695			struct mlx5e_channel *c = priv->channels.c[i];
   2696			struct mlx5e_txqsq *sq = &c->sq[tc];
   2697
   2698			priv->txq2sq[sq->txq_ix] = sq;
   2699		}
   2700	}
   2701
   2702	if (!priv->channels.ptp)
   2703		goto out;
   2704
   2705	if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state))
   2706		goto out;
   2707
   2708	for (tc = 0; tc < num_tc; tc++) {
   2709		struct mlx5e_ptp *c = priv->channels.ptp;
   2710		struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq;
   2711
   2712		priv->txq2sq[sq->txq_ix] = sq;
   2713	}
   2714
   2715out:
   2716	/* Make the change to txq2sq visible before the queue is started.
   2717	 * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
   2718	 * which pairs with this barrier.
   2719	 */
   2720	smp_wmb();
   2721}
   2722
   2723void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
   2724{
   2725	mlx5e_build_txq_maps(priv);
   2726	mlx5e_activate_channels(&priv->channels);
   2727	mlx5e_qos_activate_queues(priv);
   2728	mlx5e_xdp_tx_enable(priv);
   2729
   2730	/* dev_watchdog() wants all TX queues to be started when the carrier is
   2731	 * OK, including the ones in range real_num_tx_queues..num_tx_queues-1.
   2732	 * Make it happy to avoid TX timeout false alarms.
   2733	 */
   2734	netif_tx_start_all_queues(priv->netdev);
   2735
   2736	if (mlx5e_is_vport_rep(priv))
   2737		mlx5e_add_sqs_fwd_rules(priv);
   2738
   2739	mlx5e_wait_channels_min_rx_wqes(&priv->channels);
   2740
   2741	if (priv->rx_res)
   2742		mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels);
   2743}
   2744
   2745void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
   2746{
   2747	if (priv->rx_res)
   2748		mlx5e_rx_res_channels_deactivate(priv->rx_res);
   2749
   2750	if (mlx5e_is_vport_rep(priv))
   2751		mlx5e_remove_sqs_fwd_rules(priv);
   2752
   2753	/* The results of ndo_select_queue are unreliable, while netdev config
   2754	 * is being changed (real_num_tx_queues, num_tc). Stop all queues to
   2755	 * prevent ndo_start_xmit from being called, so that it can assume that
   2756	 * the selected queue is always valid.
   2757	 */
   2758	netif_tx_disable(priv->netdev);
   2759
   2760	mlx5e_xdp_tx_disable(priv);
   2761	mlx5e_deactivate_channels(&priv->channels);
   2762}
   2763
   2764static int mlx5e_switch_priv_params(struct mlx5e_priv *priv,
   2765				    struct mlx5e_params *new_params,
   2766				    mlx5e_fp_preactivate preactivate,
   2767				    void *context)
   2768{
   2769	struct mlx5e_params old_params;
   2770
   2771	old_params = priv->channels.params;
   2772	priv->channels.params = *new_params;
   2773
   2774	if (preactivate) {
   2775		int err;
   2776
   2777		err = preactivate(priv, context);
   2778		if (err) {
   2779			priv->channels.params = old_params;
   2780			return err;
   2781		}
   2782	}
   2783
   2784	return 0;
   2785}
   2786
   2787static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
   2788				      struct mlx5e_channels *new_chs,
   2789				      mlx5e_fp_preactivate preactivate,
   2790				      void *context)
   2791{
   2792	struct net_device *netdev = priv->netdev;
   2793	struct mlx5e_channels old_chs;
   2794	int carrier_ok;
   2795	int err = 0;
   2796
   2797	carrier_ok = netif_carrier_ok(netdev);
   2798	netif_carrier_off(netdev);
   2799
   2800	mlx5e_deactivate_priv_channels(priv);
   2801
   2802	old_chs = priv->channels;
   2803	priv->channels = *new_chs;
   2804
   2805	/* New channels are ready to roll, call the preactivate hook if needed
   2806	 * to modify HW settings or update kernel parameters.
   2807	 */
   2808	if (preactivate) {
   2809		err = preactivate(priv, context);
   2810		if (err) {
   2811			priv->channels = old_chs;
   2812			goto out;
   2813		}
   2814	}
   2815
   2816	mlx5e_close_channels(&old_chs);
   2817	priv->profile->update_rx(priv);
   2818
   2819	mlx5e_selq_apply(&priv->selq);
   2820out:
   2821	mlx5e_activate_priv_channels(priv);
   2822
   2823	/* return carrier back if needed */
   2824	if (carrier_ok)
   2825		netif_carrier_on(netdev);
   2826
   2827	return err;
   2828}
   2829
   2830int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
   2831			     struct mlx5e_params *params,
   2832			     mlx5e_fp_preactivate preactivate,
   2833			     void *context, bool reset)
   2834{
   2835	struct mlx5e_channels new_chs = {};
   2836	int err;
   2837
   2838	reset &= test_bit(MLX5E_STATE_OPENED, &priv->state);
   2839	if (!reset)
   2840		return mlx5e_switch_priv_params(priv, params, preactivate, context);
   2841
   2842	new_chs.params = *params;
   2843
   2844	mlx5e_selq_prepare(&priv->selq, &new_chs.params, !!priv->htb.maj_id);
   2845
   2846	err = mlx5e_open_channels(priv, &new_chs);
   2847	if (err)
   2848		goto err_cancel_selq;
   2849
   2850	err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context);
   2851	if (err)
   2852		goto err_close;
   2853
   2854	return 0;
   2855
   2856err_close:
   2857	mlx5e_close_channels(&new_chs);
   2858
   2859err_cancel_selq:
   2860	mlx5e_selq_cancel(&priv->selq);
   2861	return err;
   2862}
   2863
   2864int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv)
   2865{
   2866	return mlx5e_safe_switch_params(priv, &priv->channels.params, NULL, NULL, true);
   2867}
   2868
   2869void mlx5e_timestamp_init(struct mlx5e_priv *priv)
   2870{
   2871	priv->tstamp.tx_type   = HWTSTAMP_TX_OFF;
   2872	priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE;
   2873}
   2874
   2875static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev,
   2876				     enum mlx5_port_status state)
   2877{
   2878	struct mlx5_eswitch *esw = mdev->priv.eswitch;
   2879	int vport_admin_state;
   2880
   2881	mlx5_set_port_admin_status(mdev, state);
   2882
   2883	if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS ||
   2884	    !MLX5_CAP_GEN(mdev, uplink_follow))
   2885		return;
   2886
   2887	if (state == MLX5_PORT_UP)
   2888		vport_admin_state = MLX5_VPORT_ADMIN_STATE_AUTO;
   2889	else
   2890		vport_admin_state = MLX5_VPORT_ADMIN_STATE_DOWN;
   2891
   2892	mlx5_eswitch_set_vport_state(esw, MLX5_VPORT_UPLINK, vport_admin_state);
   2893}
   2894
   2895int mlx5e_open_locked(struct net_device *netdev)
   2896{
   2897	struct mlx5e_priv *priv = netdev_priv(netdev);
   2898	int err;
   2899
   2900	mlx5e_selq_prepare(&priv->selq, &priv->channels.params, !!priv->htb.maj_id);
   2901
   2902	set_bit(MLX5E_STATE_OPENED, &priv->state);
   2903
   2904	err = mlx5e_open_channels(priv, &priv->channels);
   2905	if (err)
   2906		goto err_clear_state_opened_flag;
   2907
   2908	priv->profile->update_rx(priv);
   2909	mlx5e_selq_apply(&priv->selq);
   2910	mlx5e_activate_priv_channels(priv);
   2911	mlx5e_apply_traps(priv, true);
   2912	if (priv->profile->update_carrier)
   2913		priv->profile->update_carrier(priv);
   2914
   2915	mlx5e_queue_update_stats(priv);
   2916	return 0;
   2917
   2918err_clear_state_opened_flag:
   2919	clear_bit(MLX5E_STATE_OPENED, &priv->state);
   2920	mlx5e_selq_cancel(&priv->selq);
   2921	return err;
   2922}
   2923
   2924int mlx5e_open(struct net_device *netdev)
   2925{
   2926	struct mlx5e_priv *priv = netdev_priv(netdev);
   2927	int err;
   2928
   2929	mutex_lock(&priv->state_lock);
   2930	err = mlx5e_open_locked(netdev);
   2931	if (!err)
   2932		mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_UP);
   2933	mutex_unlock(&priv->state_lock);
   2934
   2935	return err;
   2936}
   2937
   2938int mlx5e_close_locked(struct net_device *netdev)
   2939{
   2940	struct mlx5e_priv *priv = netdev_priv(netdev);
   2941
   2942	/* May already be CLOSED in case a previous configuration operation
   2943	 * (e.g RX/TX queue size change) that involves close&open failed.
   2944	 */
   2945	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
   2946		return 0;
   2947
   2948	mlx5e_apply_traps(priv, false);
   2949	clear_bit(MLX5E_STATE_OPENED, &priv->state);
   2950
   2951	netif_carrier_off(priv->netdev);
   2952	mlx5e_deactivate_priv_channels(priv);
   2953	mlx5e_close_channels(&priv->channels);
   2954
   2955	return 0;
   2956}
   2957
   2958int mlx5e_close(struct net_device *netdev)
   2959{
   2960	struct mlx5e_priv *priv = netdev_priv(netdev);
   2961	int err;
   2962
   2963	if (!netif_device_present(netdev))
   2964		return -ENODEV;
   2965
   2966	mutex_lock(&priv->state_lock);
   2967	mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_DOWN);
   2968	err = mlx5e_close_locked(netdev);
   2969	mutex_unlock(&priv->state_lock);
   2970
   2971	return err;
   2972}
   2973
   2974static void mlx5e_free_drop_rq(struct mlx5e_rq *rq)
   2975{
   2976	mlx5_wq_destroy(&rq->wq_ctrl);
   2977}
   2978
   2979static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev,
   2980			       struct mlx5e_rq *rq,
   2981			       struct mlx5e_rq_param *param)
   2982{
   2983	void *rqc = param->rqc;
   2984	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
   2985	int err;
   2986
   2987	param->wq.db_numa_node = param->wq.buf_numa_node;
   2988
   2989	err = mlx5_wq_cyc_create(mdev, &param->wq, rqc_wq, &rq->wqe.wq,
   2990				 &rq->wq_ctrl);
   2991	if (err)
   2992		return err;
   2993
   2994	/* Mark as unused given "Drop-RQ" packets never reach XDP */
   2995	xdp_rxq_info_unused(&rq->xdp_rxq);
   2996
   2997	rq->mdev = mdev;
   2998
   2999	return 0;
   3000}
   3001
   3002static int mlx5e_alloc_drop_cq(struct mlx5e_priv *priv,
   3003			       struct mlx5e_cq *cq,
   3004			       struct mlx5e_cq_param *param)
   3005{
   3006	struct mlx5_core_dev *mdev = priv->mdev;
   3007
   3008	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
   3009	param->wq.db_numa_node  = dev_to_node(mlx5_core_dma_dev(mdev));
   3010
   3011	return mlx5e_alloc_cq_common(priv, param, cq);
   3012}
   3013
   3014int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
   3015		       struct mlx5e_rq *drop_rq)
   3016{
   3017	struct mlx5_core_dev *mdev = priv->mdev;
   3018	struct mlx5e_cq_param cq_param = {};
   3019	struct mlx5e_rq_param rq_param = {};
   3020	struct mlx5e_cq *cq = &drop_rq->cq;
   3021	int err;
   3022
   3023	mlx5e_build_drop_rq_param(mdev, priv->drop_rq_q_counter, &rq_param);
   3024
   3025	err = mlx5e_alloc_drop_cq(priv, cq, &cq_param);
   3026	if (err)
   3027		return err;
   3028
   3029	err = mlx5e_create_cq(cq, &cq_param);
   3030	if (err)
   3031		goto err_free_cq;
   3032
   3033	err = mlx5e_alloc_drop_rq(mdev, drop_rq, &rq_param);
   3034	if (err)
   3035		goto err_destroy_cq;
   3036
   3037	err = mlx5e_create_rq(drop_rq, &rq_param);
   3038	if (err)
   3039		goto err_free_rq;
   3040
   3041	err = mlx5e_modify_rq_state(drop_rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
   3042	if (err)
   3043		mlx5_core_warn(priv->mdev, "modify_rq_state failed, rx_if_down_packets won't be counted %d\n", err);
   3044
   3045	return 0;
   3046
   3047err_free_rq:
   3048	mlx5e_free_drop_rq(drop_rq);
   3049
   3050err_destroy_cq:
   3051	mlx5e_destroy_cq(cq);
   3052
   3053err_free_cq:
   3054	mlx5e_free_cq(cq);
   3055
   3056	return err;
   3057}
   3058
   3059void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
   3060{
   3061	mlx5e_destroy_rq(drop_rq);
   3062	mlx5e_free_drop_rq(drop_rq);
   3063	mlx5e_destroy_cq(&drop_rq->cq);
   3064	mlx5e_free_cq(&drop_rq->cq);
   3065}
   3066
   3067int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn)
   3068{
   3069	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
   3070
   3071	MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
   3072
   3073	if (MLX5_GET(tisc, tisc, tls_en))
   3074		MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn);
   3075
   3076	if (mlx5_lag_is_lacp_owner(mdev))
   3077		MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
   3078
   3079	return mlx5_core_create_tis(mdev, in, tisn);
   3080}
   3081
   3082void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn)
   3083{
   3084	mlx5_core_destroy_tis(mdev, tisn);
   3085}
   3086
   3087void mlx5e_destroy_tises(struct mlx5e_priv *priv)
   3088{
   3089	int tc, i;
   3090
   3091	for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++)
   3092		for (tc = 0; tc < priv->profile->max_tc; tc++)
   3093			mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]);
   3094}
   3095
   3096static bool mlx5e_lag_should_assign_affinity(struct mlx5_core_dev *mdev)
   3097{
   3098	return MLX5_CAP_GEN(mdev, lag_tx_port_affinity) && mlx5e_get_num_lag_ports(mdev) > 1;
   3099}
   3100
   3101int mlx5e_create_tises(struct mlx5e_priv *priv)
   3102{
   3103	int tc, i;
   3104	int err;
   3105
   3106	for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) {
   3107		for (tc = 0; tc < priv->profile->max_tc; tc++) {
   3108			u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
   3109			void *tisc;
   3110
   3111			tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
   3112
   3113			MLX5_SET(tisc, tisc, prio, tc << 1);
   3114
   3115			if (mlx5e_lag_should_assign_affinity(priv->mdev))
   3116				MLX5_SET(tisc, tisc, lag_tx_port_affinity, i + 1);
   3117
   3118			err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[i][tc]);
   3119			if (err)
   3120				goto err_close_tises;
   3121		}
   3122	}
   3123
   3124	return 0;
   3125
   3126err_close_tises:
   3127	for (; i >= 0; i--) {
   3128		for (tc--; tc >= 0; tc--)
   3129			mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]);
   3130		tc = priv->profile->max_tc;
   3131	}
   3132
   3133	return err;
   3134}
   3135
   3136static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
   3137{
   3138	mlx5e_destroy_tises(priv);
   3139}
   3140
   3141static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
   3142{
   3143	int err = 0;
   3144	int i;
   3145
   3146	for (i = 0; i < chs->num; i++) {
   3147		err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable);
   3148		if (err)
   3149			return err;
   3150	}
   3151
   3152	return 0;
   3153}
   3154
   3155static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
   3156{
   3157	int err;
   3158	int i;
   3159
   3160	for (i = 0; i < chs->num; i++) {
   3161		err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd);
   3162		if (err)
   3163			return err;
   3164	}
   3165	if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state))
   3166		return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd);
   3167
   3168	return 0;
   3169}
   3170
   3171static void mlx5e_mqprio_build_default_tc_to_txq(struct netdev_tc_txq *tc_to_txq,
   3172						 int ntc, int nch)
   3173{
   3174	int tc;
   3175
   3176	memset(tc_to_txq, 0, sizeof(*tc_to_txq) * TC_MAX_QUEUE);
   3177
   3178	/* Map netdev TCs to offset 0.
   3179	 * We have our own UP to TXQ mapping for DCB mode of QoS
   3180	 */
   3181	for (tc = 0; tc < ntc; tc++) {
   3182		tc_to_txq[tc] = (struct netdev_tc_txq) {
   3183			.count = nch,
   3184			.offset = 0,
   3185		};
   3186	}
   3187}
   3188
   3189static void mlx5e_mqprio_build_tc_to_txq(struct netdev_tc_txq *tc_to_txq,
   3190					 struct tc_mqprio_qopt *qopt)
   3191{
   3192	int tc;
   3193
   3194	for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
   3195		tc_to_txq[tc] = (struct netdev_tc_txq) {
   3196			.count = qopt->count[tc],
   3197			.offset = qopt->offset[tc],
   3198		};
   3199	}
   3200}
   3201
   3202static void mlx5e_params_mqprio_dcb_set(struct mlx5e_params *params, u8 num_tc)
   3203{
   3204	params->mqprio.mode = TC_MQPRIO_MODE_DCB;
   3205	params->mqprio.num_tc = num_tc;
   3206	params->mqprio.channel.rl = NULL;
   3207	mlx5e_mqprio_build_default_tc_to_txq(params->mqprio.tc_to_txq, num_tc,
   3208					     params->num_channels);
   3209}
   3210
   3211static void mlx5e_params_mqprio_channel_set(struct mlx5e_params *params,
   3212					    struct tc_mqprio_qopt *qopt,
   3213					    struct mlx5e_mqprio_rl *rl)
   3214{
   3215	params->mqprio.mode = TC_MQPRIO_MODE_CHANNEL;
   3216	params->mqprio.num_tc = qopt->num_tc;
   3217	params->mqprio.channel.rl = rl;
   3218	mlx5e_mqprio_build_tc_to_txq(params->mqprio.tc_to_txq, qopt);
   3219}
   3220
   3221static void mlx5e_params_mqprio_reset(struct mlx5e_params *params)
   3222{
   3223	mlx5e_params_mqprio_dcb_set(params, 1);
   3224}
   3225
   3226static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
   3227				     struct tc_mqprio_qopt *mqprio)
   3228{
   3229	struct mlx5e_params new_params;
   3230	u8 tc = mqprio->num_tc;
   3231	int err;
   3232
   3233	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
   3234
   3235	if (tc && tc != MLX5E_MAX_NUM_TC)
   3236		return -EINVAL;
   3237
   3238	new_params = priv->channels.params;
   3239	mlx5e_params_mqprio_dcb_set(&new_params, tc ? tc : 1);
   3240
   3241	err = mlx5e_safe_switch_params(priv, &new_params,
   3242				       mlx5e_num_channels_changed_ctx, NULL, true);
   3243
   3244	priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
   3245				    mlx5e_get_dcb_num_tc(&priv->channels.params));
   3246	return err;
   3247}
   3248
   3249static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv,
   3250					 struct tc_mqprio_qopt_offload *mqprio)
   3251{
   3252	struct net_device *netdev = priv->netdev;
   3253	struct mlx5e_ptp *ptp_channel;
   3254	int agg_count = 0;
   3255	int i;
   3256
   3257	ptp_channel = priv->channels.ptp;
   3258	if (ptp_channel && test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state)) {
   3259		netdev_err(netdev,
   3260			   "Cannot activate MQPRIO mode channel since it conflicts with TX port TS\n");
   3261		return -EINVAL;
   3262	}
   3263
   3264	if (mqprio->qopt.offset[0] != 0 || mqprio->qopt.num_tc < 1 ||
   3265	    mqprio->qopt.num_tc > MLX5E_MAX_NUM_MQPRIO_CH_TC)
   3266		return -EINVAL;
   3267
   3268	for (i = 0; i < mqprio->qopt.num_tc; i++) {
   3269		if (!mqprio->qopt.count[i]) {
   3270			netdev_err(netdev, "Zero size for queue-group (%d) is not supported\n", i);
   3271			return -EINVAL;
   3272		}
   3273		if (mqprio->min_rate[i]) {
   3274			netdev_err(netdev, "Min tx rate is not supported\n");
   3275			return -EINVAL;
   3276		}
   3277
   3278		if (mqprio->max_rate[i]) {
   3279			int err;
   3280
   3281			err = mlx5e_qos_bytes_rate_check(priv->mdev, mqprio->max_rate[i]);
   3282			if (err)
   3283				return err;
   3284		}
   3285
   3286		if (mqprio->qopt.offset[i] != agg_count) {
   3287			netdev_err(netdev, "Discontinuous queues config is not supported\n");
   3288			return -EINVAL;
   3289		}
   3290		agg_count += mqprio->qopt.count[i];
   3291	}
   3292
   3293	if (priv->channels.params.num_channels != agg_count) {
   3294		netdev_err(netdev, "Num of queues (%d) does not match available (%d)\n",
   3295			   agg_count, priv->channels.params.num_channels);
   3296		return -EINVAL;
   3297	}
   3298
   3299	return 0;
   3300}
   3301
   3302static bool mlx5e_mqprio_rate_limit(struct tc_mqprio_qopt_offload *mqprio)
   3303{
   3304	int tc;
   3305
   3306	for (tc = 0; tc < mqprio->qopt.num_tc; tc++)
   3307		if (mqprio->max_rate[tc])
   3308			return true;
   3309	return false;
   3310}
   3311
   3312static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv,
   3313					 struct tc_mqprio_qopt_offload *mqprio)
   3314{
   3315	mlx5e_fp_preactivate preactivate;
   3316	struct mlx5e_params new_params;
   3317	struct mlx5e_mqprio_rl *rl;
   3318	bool nch_changed;
   3319	int err;
   3320
   3321	err = mlx5e_mqprio_channel_validate(priv, mqprio);
   3322	if (err)
   3323		return err;
   3324
   3325	rl = NULL;
   3326	if (mlx5e_mqprio_rate_limit(mqprio)) {
   3327		rl = mlx5e_mqprio_rl_alloc();
   3328		if (!rl)
   3329			return -ENOMEM;
   3330		err = mlx5e_mqprio_rl_init(rl, priv->mdev, mqprio->qopt.num_tc,
   3331					   mqprio->max_rate);
   3332		if (err) {
   3333			mlx5e_mqprio_rl_free(rl);
   3334			return err;
   3335		}
   3336	}
   3337
   3338	new_params = priv->channels.params;
   3339	mlx5e_params_mqprio_channel_set(&new_params, &mqprio->qopt, rl);
   3340
   3341	nch_changed = mlx5e_get_dcb_num_tc(&priv->channels.params) > 1;
   3342	preactivate = nch_changed ? mlx5e_num_channels_changed_ctx :
   3343		mlx5e_update_netdev_queues_ctx;
   3344	err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, true);
   3345	if (err && rl) {
   3346		mlx5e_mqprio_rl_cleanup(rl);
   3347		mlx5e_mqprio_rl_free(rl);
   3348	}
   3349
   3350	return err;
   3351}
   3352
   3353static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
   3354				 struct tc_mqprio_qopt_offload *mqprio)
   3355{
   3356	/* MQPRIO is another toplevel qdisc that can't be attached
   3357	 * simultaneously with the offloaded HTB.
   3358	 */
   3359	if (WARN_ON(priv->htb.maj_id))
   3360		return -EINVAL;
   3361
   3362	switch (mqprio->mode) {
   3363	case TC_MQPRIO_MODE_DCB:
   3364		return mlx5e_setup_tc_mqprio_dcb(priv, &mqprio->qopt);
   3365	case TC_MQPRIO_MODE_CHANNEL:
   3366		return mlx5e_setup_tc_mqprio_channel(priv, mqprio);
   3367	default:
   3368		return -EOPNOTSUPP;
   3369	}
   3370}
   3371
   3372static int mlx5e_setup_tc_htb(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb)
   3373{
   3374	int res;
   3375
   3376	switch (htb->command) {
   3377	case TC_HTB_CREATE:
   3378		return mlx5e_htb_root_add(priv, htb->parent_classid, htb->classid,
   3379					  htb->extack);
   3380	case TC_HTB_DESTROY:
   3381		return mlx5e_htb_root_del(priv);
   3382	case TC_HTB_LEAF_ALLOC_QUEUE:
   3383		res = mlx5e_htb_leaf_alloc_queue(priv, htb->classid, htb->parent_classid,
   3384						 htb->rate, htb->ceil, htb->extack);
   3385		if (res < 0)
   3386			return res;
   3387		htb->qid = res;
   3388		return 0;
   3389	case TC_HTB_LEAF_TO_INNER:
   3390		return mlx5e_htb_leaf_to_inner(priv, htb->parent_classid, htb->classid,
   3391					       htb->rate, htb->ceil, htb->extack);
   3392	case TC_HTB_LEAF_DEL:
   3393		return mlx5e_htb_leaf_del(priv, &htb->classid, htb->extack);
   3394	case TC_HTB_LEAF_DEL_LAST:
   3395	case TC_HTB_LEAF_DEL_LAST_FORCE:
   3396		return mlx5e_htb_leaf_del_last(priv, htb->classid,
   3397					       htb->command == TC_HTB_LEAF_DEL_LAST_FORCE,
   3398					       htb->extack);
   3399	case TC_HTB_NODE_MODIFY:
   3400		return mlx5e_htb_node_modify(priv, htb->classid, htb->rate, htb->ceil,
   3401					     htb->extack);
   3402	case TC_HTB_LEAF_QUERY_QUEUE:
   3403		res = mlx5e_get_txq_by_classid(priv, htb->classid);
   3404		if (res < 0)
   3405			return res;
   3406		htb->qid = res;
   3407		return 0;
   3408	default:
   3409		return -EOPNOTSUPP;
   3410	}
   3411}
   3412
   3413static LIST_HEAD(mlx5e_block_cb_list);
   3414
   3415static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
   3416			  void *type_data)
   3417{
   3418	struct mlx5e_priv *priv = netdev_priv(dev);
   3419	bool tc_unbind = false;
   3420	int err;
   3421
   3422	if (type == TC_SETUP_BLOCK &&
   3423	    ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND)
   3424		tc_unbind = true;
   3425
   3426	if (!netif_device_present(dev) && !tc_unbind)
   3427		return -ENODEV;
   3428
   3429	switch (type) {
   3430	case TC_SETUP_BLOCK: {
   3431		struct flow_block_offload *f = type_data;
   3432
   3433		f->unlocked_driver_cb = true;
   3434		return flow_block_cb_setup_simple(type_data,
   3435						  &mlx5e_block_cb_list,
   3436						  mlx5e_setup_tc_block_cb,
   3437						  priv, priv, true);
   3438	}
   3439	case TC_SETUP_QDISC_MQPRIO:
   3440		mutex_lock(&priv->state_lock);
   3441		err = mlx5e_setup_tc_mqprio(priv, type_data);
   3442		mutex_unlock(&priv->state_lock);
   3443		return err;
   3444	case TC_SETUP_QDISC_HTB:
   3445		mutex_lock(&priv->state_lock);
   3446		err = mlx5e_setup_tc_htb(priv, type_data);
   3447		mutex_unlock(&priv->state_lock);
   3448		return err;
   3449	default:
   3450		return -EOPNOTSUPP;
   3451	}
   3452}
   3453
   3454void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
   3455{
   3456	int i;
   3457
   3458	for (i = 0; i < priv->stats_nch; i++) {
   3459		struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i];
   3460		struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
   3461		struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
   3462		int j;
   3463
   3464		s->rx_packets   += rq_stats->packets + xskrq_stats->packets;
   3465		s->rx_bytes     += rq_stats->bytes + xskrq_stats->bytes;
   3466		s->multicast    += rq_stats->mcast_packets + xskrq_stats->mcast_packets;
   3467
   3468		for (j = 0; j < priv->max_opened_tc; j++) {
   3469			struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
   3470
   3471			s->tx_packets    += sq_stats->packets;
   3472			s->tx_bytes      += sq_stats->bytes;
   3473			s->tx_dropped    += sq_stats->dropped;
   3474		}
   3475	}
   3476	if (priv->tx_ptp_opened) {
   3477		for (i = 0; i < priv->max_opened_tc; i++) {
   3478			struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[i];
   3479
   3480			s->tx_packets    += sq_stats->packets;
   3481			s->tx_bytes      += sq_stats->bytes;
   3482			s->tx_dropped    += sq_stats->dropped;
   3483		}
   3484	}
   3485	if (priv->rx_ptp_opened) {
   3486		struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq;
   3487
   3488		s->rx_packets   += rq_stats->packets;
   3489		s->rx_bytes     += rq_stats->bytes;
   3490		s->multicast    += rq_stats->mcast_packets;
   3491	}
   3492}
   3493
   3494void
   3495mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
   3496{
   3497	struct mlx5e_priv *priv = netdev_priv(dev);
   3498	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
   3499
   3500	if (!netif_device_present(dev))
   3501		return;
   3502
   3503	/* In switchdev mode, monitor counters doesn't monitor
   3504	 * rx/tx stats of 802_3. The update stats mechanism
   3505	 * should keep the 802_3 layout counters updated
   3506	 */
   3507	if (!mlx5e_monitor_counter_supported(priv) ||
   3508	    mlx5e_is_uplink_rep(priv)) {
   3509		/* update HW stats in background for next time */
   3510		mlx5e_queue_update_stats(priv);
   3511	}
   3512
   3513	if (mlx5e_is_uplink_rep(priv)) {
   3514		struct mlx5e_vport_stats *vstats = &priv->stats.vport;
   3515
   3516		stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
   3517		stats->rx_bytes   = PPORT_802_3_GET(pstats, a_octets_received_ok);
   3518		stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
   3519		stats->tx_bytes   = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
   3520
   3521		/* vport multicast also counts packets that are dropped due to steering
   3522		 * or rx out of buffer
   3523		 */
   3524		stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets);
   3525	} else {
   3526		mlx5e_fold_sw_stats64(priv, stats);
   3527	}
   3528
   3529	stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
   3530
   3531	stats->rx_length_errors =
   3532		PPORT_802_3_GET(pstats, a_in_range_length_errors) +
   3533		PPORT_802_3_GET(pstats, a_out_of_range_length_field) +
   3534		PPORT_802_3_GET(pstats, a_frame_too_long_errors);
   3535	stats->rx_crc_errors =
   3536		PPORT_802_3_GET(pstats, a_frame_check_sequence_errors);
   3537	stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors);
   3538	stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards);
   3539	stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors +
   3540			   stats->rx_frame_errors;
   3541	stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
   3542}
   3543
   3544static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv)
   3545{
   3546	if (mlx5e_is_uplink_rep(priv))
   3547		return; /* no rx mode for uplink rep */
   3548
   3549	queue_work(priv->wq, &priv->set_rx_mode_work);
   3550}
   3551
   3552static void mlx5e_set_rx_mode(struct net_device *dev)
   3553{
   3554	struct mlx5e_priv *priv = netdev_priv(dev);
   3555
   3556	mlx5e_nic_set_rx_mode(priv);
   3557}
   3558
   3559static int mlx5e_set_mac(struct net_device *netdev, void *addr)
   3560{
   3561	struct mlx5e_priv *priv = netdev_priv(netdev);
   3562	struct sockaddr *saddr = addr;
   3563
   3564	if (!is_valid_ether_addr(saddr->sa_data))
   3565		return -EADDRNOTAVAIL;
   3566
   3567	netif_addr_lock_bh(netdev);
   3568	eth_hw_addr_set(netdev, saddr->sa_data);
   3569	netif_addr_unlock_bh(netdev);
   3570
   3571	mlx5e_nic_set_rx_mode(priv);
   3572
   3573	return 0;
   3574}
   3575
   3576#define MLX5E_SET_FEATURE(features, feature, enable)	\
   3577	do {						\
   3578		if (enable)				\
   3579			*features |= feature;		\
   3580		else					\
   3581			*features &= ~feature;		\
   3582	} while (0)
   3583
   3584typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable);
   3585
   3586static int set_feature_lro(struct net_device *netdev, bool enable)
   3587{
   3588	struct mlx5e_priv *priv = netdev_priv(netdev);
   3589	struct mlx5_core_dev *mdev = priv->mdev;
   3590	struct mlx5e_params *cur_params;
   3591	struct mlx5e_params new_params;
   3592	bool reset = true;
   3593	int err = 0;
   3594
   3595	mutex_lock(&priv->state_lock);
   3596
   3597	if (enable && priv->xsk.refcnt) {
   3598		netdev_warn(netdev, "LRO is incompatible with AF_XDP (%u XSKs are active)\n",
   3599			    priv->xsk.refcnt);
   3600		err = -EINVAL;
   3601		goto out;
   3602	}
   3603
   3604	cur_params = &priv->channels.params;
   3605	if (enable && !MLX5E_GET_PFLAG(cur_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
   3606		netdev_warn(netdev, "can't set LRO with legacy RQ\n");
   3607		err = -EINVAL;
   3608		goto out;
   3609	}
   3610
   3611	new_params = *cur_params;
   3612
   3613	if (enable)
   3614		new_params.packet_merge.type = MLX5E_PACKET_MERGE_LRO;
   3615	else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)
   3616		new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE;
   3617	else
   3618		goto out;
   3619
   3620	if (!(cur_params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO &&
   3621	      new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)) {
   3622		if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
   3623			if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
   3624			    mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL))
   3625				reset = false;
   3626		}
   3627	}
   3628
   3629	err = mlx5e_safe_switch_params(priv, &new_params,
   3630				       mlx5e_modify_tirs_packet_merge_ctx, NULL, reset);
   3631out:
   3632	mutex_unlock(&priv->state_lock);
   3633	return err;
   3634}
   3635
   3636static int set_feature_hw_gro(struct net_device *netdev, bool enable)
   3637{
   3638	struct mlx5e_priv *priv = netdev_priv(netdev);
   3639	struct mlx5e_params new_params;
   3640	bool reset = true;
   3641	int err = 0;
   3642
   3643	mutex_lock(&priv->state_lock);
   3644	new_params = priv->channels.params;
   3645
   3646	if (enable) {
   3647		new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO;
   3648		new_params.packet_merge.shampo.match_criteria_type =
   3649			MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED;
   3650		new_params.packet_merge.shampo.alignment_granularity =
   3651			MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE;
   3652	} else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
   3653		new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE;
   3654	} else {
   3655		goto out;
   3656	}
   3657
   3658	err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
   3659out:
   3660	mutex_unlock(&priv->state_lock);
   3661	return err;
   3662}
   3663
   3664static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
   3665{
   3666	struct mlx5e_priv *priv = netdev_priv(netdev);
   3667
   3668	if (enable)
   3669		mlx5e_enable_cvlan_filter(priv);
   3670	else
   3671		mlx5e_disable_cvlan_filter(priv);
   3672
   3673	return 0;
   3674}
   3675
   3676static int set_feature_hw_tc(struct net_device *netdev, bool enable)
   3677{
   3678	struct mlx5e_priv *priv = netdev_priv(netdev);
   3679
   3680#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
   3681	if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) {
   3682		netdev_err(netdev,
   3683			   "Active offloaded tc filters, can't turn hw_tc_offload off\n");
   3684		return -EINVAL;
   3685	}
   3686#endif
   3687
   3688	if (!enable && priv->htb.maj_id) {
   3689		netdev_err(netdev, "Active HTB offload, can't turn hw_tc_offload off\n");
   3690		return -EINVAL;
   3691	}
   3692
   3693	return 0;
   3694}
   3695
   3696static int set_feature_rx_all(struct net_device *netdev, bool enable)
   3697{
   3698	struct mlx5e_priv *priv = netdev_priv(netdev);
   3699	struct mlx5_core_dev *mdev = priv->mdev;
   3700
   3701	return mlx5_set_port_fcs(mdev, !enable);
   3702}
   3703
   3704static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable)
   3705{
   3706	u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {};
   3707	bool supported, curr_state;
   3708	int err;
   3709
   3710	if (!MLX5_CAP_GEN(mdev, ports_check))
   3711		return 0;
   3712
   3713	err = mlx5_query_ports_check(mdev, in, sizeof(in));
   3714	if (err)
   3715		return err;
   3716
   3717	supported = MLX5_GET(pcmr_reg, in, rx_ts_over_crc_cap);
   3718	curr_state = MLX5_GET(pcmr_reg, in, rx_ts_over_crc);
   3719
   3720	if (!supported || enable == curr_state)
   3721		return 0;
   3722
   3723	MLX5_SET(pcmr_reg, in, local_port, 1);
   3724	MLX5_SET(pcmr_reg, in, rx_ts_over_crc, enable);
   3725
   3726	return mlx5_set_ports_check(mdev, in, sizeof(in));
   3727}
   3728
   3729static int set_feature_rx_fcs(struct net_device *netdev, bool enable)
   3730{
   3731	struct mlx5e_priv *priv = netdev_priv(netdev);
   3732	struct mlx5e_channels *chs = &priv->channels;
   3733	struct mlx5_core_dev *mdev = priv->mdev;
   3734	int err;
   3735
   3736	mutex_lock(&priv->state_lock);
   3737
   3738	if (enable) {
   3739		err = mlx5e_set_rx_port_ts(mdev, false);
   3740		if (err)
   3741			goto out;
   3742
   3743		chs->params.scatter_fcs_en = true;
   3744		err = mlx5e_modify_channels_scatter_fcs(chs, true);
   3745		if (err) {
   3746			chs->params.scatter_fcs_en = false;
   3747			mlx5e_set_rx_port_ts(mdev, true);
   3748		}
   3749	} else {
   3750		chs->params.scatter_fcs_en = false;
   3751		err = mlx5e_modify_channels_scatter_fcs(chs, false);
   3752		if (err) {
   3753			chs->params.scatter_fcs_en = true;
   3754			goto out;
   3755		}
   3756		err = mlx5e_set_rx_port_ts(mdev, true);
   3757		if (err) {
   3758			mlx5_core_warn(mdev, "Failed to set RX port timestamp %d\n", err);
   3759			err = 0;
   3760		}
   3761	}
   3762
   3763out:
   3764	mutex_unlock(&priv->state_lock);
   3765	return err;
   3766}
   3767
   3768static int set_feature_rx_vlan(struct net_device *netdev, bool enable)
   3769{
   3770	struct mlx5e_priv *priv = netdev_priv(netdev);
   3771	int err = 0;
   3772
   3773	mutex_lock(&priv->state_lock);
   3774
   3775	priv->channels.params.vlan_strip_disable = !enable;
   3776	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
   3777		goto unlock;
   3778
   3779	err = mlx5e_modify_channels_vsd(&priv->channels, !enable);
   3780	if (err)
   3781		priv->channels.params.vlan_strip_disable = enable;
   3782
   3783unlock:
   3784	mutex_unlock(&priv->state_lock);
   3785
   3786	return err;
   3787}
   3788
   3789#ifdef CONFIG_MLX5_EN_ARFS
   3790static int set_feature_arfs(struct net_device *netdev, bool enable)
   3791{
   3792	struct mlx5e_priv *priv = netdev_priv(netdev);
   3793	int err;
   3794
   3795	if (enable)
   3796		err = mlx5e_arfs_enable(priv);
   3797	else
   3798		err = mlx5e_arfs_disable(priv);
   3799
   3800	return err;
   3801}
   3802#endif
   3803
   3804static int mlx5e_handle_feature(struct net_device *netdev,
   3805				netdev_features_t *features,
   3806				netdev_features_t feature,
   3807				mlx5e_feature_handler feature_handler)
   3808{
   3809	netdev_features_t changes = *features ^ netdev->features;
   3810	bool enable = !!(*features & feature);
   3811	int err;
   3812
   3813	if (!(changes & feature))
   3814		return 0;
   3815
   3816	err = feature_handler(netdev, enable);
   3817	if (err) {
   3818		MLX5E_SET_FEATURE(features, feature, !enable);
   3819		netdev_err(netdev, "%s feature %pNF failed, err %d\n",
   3820			   enable ? "Enable" : "Disable", &feature, err);
   3821		return err;
   3822	}
   3823
   3824	return 0;
   3825}
   3826
   3827int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
   3828{
   3829	netdev_features_t oper_features = features;
   3830	int err = 0;
   3831
   3832#define MLX5E_HANDLE_FEATURE(feature, handler) \
   3833	mlx5e_handle_feature(netdev, &oper_features, feature, handler)
   3834
   3835	err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
   3836	err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro);
   3837	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
   3838				    set_feature_cvlan_filter);
   3839	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc);
   3840	err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all);
   3841	err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs);
   3842	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan);
   3843#ifdef CONFIG_MLX5_EN_ARFS
   3844	err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs);
   3845#endif
   3846	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TLS_RX, mlx5e_ktls_set_feature_rx);
   3847
   3848	if (err) {
   3849		netdev->features = oper_features;
   3850		return -EINVAL;
   3851	}
   3852
   3853	return 0;
   3854}
   3855
   3856static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev,
   3857						       netdev_features_t features)
   3858{
   3859	features &= ~NETIF_F_HW_TLS_RX;
   3860	if (netdev->features & NETIF_F_HW_TLS_RX)
   3861		netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n");
   3862
   3863	features &= ~NETIF_F_HW_TLS_TX;
   3864	if (netdev->features & NETIF_F_HW_TLS_TX)
   3865		netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n");
   3866
   3867	features &= ~NETIF_F_NTUPLE;
   3868	if (netdev->features & NETIF_F_NTUPLE)
   3869		netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n");
   3870
   3871	features &= ~NETIF_F_GRO_HW;
   3872	if (netdev->features & NETIF_F_GRO_HW)
   3873		netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n");
   3874
   3875	return features;
   3876}
   3877
   3878static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
   3879					    netdev_features_t features)
   3880{
   3881	struct mlx5e_priv *priv = netdev_priv(netdev);
   3882	struct mlx5e_params *params;
   3883
   3884	mutex_lock(&priv->state_lock);
   3885	params = &priv->channels.params;
   3886	if (!priv->fs.vlan ||
   3887	    !bitmap_empty(mlx5e_vlan_get_active_svlans(priv->fs.vlan), VLAN_N_VID)) {
   3888		/* HW strips the outer C-tag header, this is a problem
   3889		 * for S-tag traffic.
   3890		 */
   3891		features &= ~NETIF_F_HW_VLAN_CTAG_RX;
   3892		if (!params->vlan_strip_disable)
   3893			netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n");
   3894	}
   3895
   3896	if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
   3897		if (features & NETIF_F_LRO) {
   3898			netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n");
   3899			features &= ~NETIF_F_LRO;
   3900		}
   3901		if (features & NETIF_F_GRO_HW) {
   3902			netdev_warn(netdev, "Disabling HW-GRO, not supported in legacy RQ\n");
   3903			features &= ~NETIF_F_GRO_HW;
   3904		}
   3905	}
   3906
   3907	if (params->xdp_prog) {
   3908		if (features & NETIF_F_LRO) {
   3909			netdev_warn(netdev, "LRO is incompatible with XDP\n");
   3910			features &= ~NETIF_F_LRO;
   3911		}
   3912		if (features & NETIF_F_GRO_HW) {
   3913			netdev_warn(netdev, "HW GRO is incompatible with XDP\n");
   3914			features &= ~NETIF_F_GRO_HW;
   3915		}
   3916	}
   3917
   3918	if (priv->xsk.refcnt) {
   3919		if (features & NETIF_F_GRO_HW) {
   3920			netdev_warn(netdev, "HW GRO is incompatible with AF_XDP (%u XSKs are active)\n",
   3921				    priv->xsk.refcnt);
   3922			features &= ~NETIF_F_GRO_HW;
   3923		}
   3924	}
   3925
   3926	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
   3927		features &= ~NETIF_F_RXHASH;
   3928		if (netdev->features & NETIF_F_RXHASH)
   3929			netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
   3930
   3931		if (features & NETIF_F_GRO_HW) {
   3932			netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n");
   3933			features &= ~NETIF_F_GRO_HW;
   3934		}
   3935	}
   3936
   3937	if (mlx5e_is_uplink_rep(priv))
   3938		features = mlx5e_fix_uplink_rep_features(netdev, features);
   3939
   3940	mutex_unlock(&priv->state_lock);
   3941
   3942	return features;
   3943}
   3944
   3945static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
   3946				   struct mlx5e_channels *chs,
   3947				   struct mlx5e_params *new_params,
   3948				   struct mlx5_core_dev *mdev)
   3949{
   3950	u16 ix;
   3951
   3952	for (ix = 0; ix < chs->params.num_channels; ix++) {
   3953		struct xsk_buff_pool *xsk_pool =
   3954			mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix);
   3955		struct mlx5e_xsk_param xsk;
   3956
   3957		if (!xsk_pool)
   3958			continue;
   3959
   3960		mlx5e_build_xsk_param(xsk_pool, &xsk);
   3961
   3962		if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) {
   3963			u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk);
   3964			int max_mtu_frame, max_mtu_page, max_mtu;
   3965
   3966			/* Two criteria must be met:
   3967			 * 1. HW MTU + all headrooms <= XSK frame size.
   3968			 * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE.
   3969			 */
   3970			max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr);
   3971			max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk);
   3972			max_mtu = min(max_mtu_frame, max_mtu_page);
   3973
   3974			netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u. Try MTU <= %d\n",
   3975				   new_params->sw_mtu, ix, max_mtu);
   3976			return false;
   3977		}
   3978	}
   3979
   3980	return true;
   3981}
   3982
   3983static bool mlx5e_params_validate_xdp(struct net_device *netdev, struct mlx5e_params *params)
   3984{
   3985	bool is_linear;
   3986
   3987	/* No XSK params: AF_XDP can't be enabled yet at the point of setting
   3988	 * the XDP program.
   3989	 */
   3990	is_linear = mlx5e_rx_is_linear_skb(params, NULL);
   3991
   3992	if (!is_linear && params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
   3993		netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n",
   3994			    params->sw_mtu,
   3995			    mlx5e_xdp_max_mtu(params, NULL));
   3996		return false;
   3997	}
   3998	if (!is_linear && !params->xdp_prog->aux->xdp_has_frags) {
   3999		netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n",
   4000			    params->sw_mtu,
   4001			    mlx5e_xdp_max_mtu(params, NULL));
   4002		return false;
   4003	}
   4004
   4005	return true;
   4006}
   4007
   4008int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
   4009		     mlx5e_fp_preactivate preactivate)
   4010{
   4011	struct mlx5e_priv *priv = netdev_priv(netdev);
   4012	struct mlx5e_params new_params;
   4013	struct mlx5e_params *params;
   4014	bool reset = true;
   4015	int err = 0;
   4016
   4017	mutex_lock(&priv->state_lock);
   4018
   4019	params = &priv->channels.params;
   4020
   4021	new_params = *params;
   4022	new_params.sw_mtu = new_mtu;
   4023	err = mlx5e_validate_params(priv->mdev, &new_params);
   4024	if (err)
   4025		goto out;
   4026
   4027	if (new_params.xdp_prog && !mlx5e_params_validate_xdp(netdev, &new_params)) {
   4028		err = -EINVAL;
   4029		goto out;
   4030	}
   4031
   4032	if (priv->xsk.refcnt &&
   4033	    !mlx5e_xsk_validate_mtu(netdev, &priv->channels,
   4034				    &new_params, priv->mdev)) {
   4035		err = -EINVAL;
   4036		goto out;
   4037	}
   4038
   4039	if (params->packet_merge.type == MLX5E_PACKET_MERGE_LRO)
   4040		reset = false;
   4041
   4042	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
   4043		bool is_linear_old = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, params, NULL);
   4044		bool is_linear_new = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev,
   4045								  &new_params, NULL);
   4046		u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL);
   4047		u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_params, NULL);
   4048
   4049		/* Always reset in linear mode - hw_mtu is used in data path.
   4050		 * Check that the mode was non-linear and didn't change.
   4051		 * If XSK is active, XSK RQs are linear.
   4052		 */
   4053		if (!is_linear_old && !is_linear_new && !priv->xsk.refcnt &&
   4054		    ppw_old == ppw_new)
   4055			reset = false;
   4056	}
   4057
   4058	err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, reset);
   4059
   4060out:
   4061	netdev->mtu = params->sw_mtu;
   4062	mutex_unlock(&priv->state_lock);
   4063	return err;
   4064}
   4065
   4066static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu)
   4067{
   4068	return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx);
   4069}
   4070
   4071int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx)
   4072{
   4073	bool set  = *(bool *)ctx;
   4074
   4075	return mlx5e_ptp_rx_manage_fs(priv, set);
   4076}
   4077
   4078static int mlx5e_hwstamp_config_no_ptp_rx(struct mlx5e_priv *priv, bool rx_filter)
   4079{
   4080	bool rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
   4081	int err;
   4082
   4083	if (!rx_filter)
   4084		/* Reset CQE compression to Admin default */
   4085		return mlx5e_modify_rx_cqe_compression_locked(priv, rx_cqe_compress_def, false);
   4086
   4087	if (!MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
   4088		return 0;
   4089
   4090	/* Disable CQE compression */
   4091	netdev_warn(priv->netdev, "Disabling RX cqe compression\n");
   4092	err = mlx5e_modify_rx_cqe_compression_locked(priv, false, true);
   4093	if (err)
   4094		netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
   4095
   4096	return err;
   4097}
   4098
   4099static int mlx5e_hwstamp_config_ptp_rx(struct mlx5e_priv *priv, bool ptp_rx)
   4100{
   4101	struct mlx5e_params new_params;
   4102
   4103	if (ptp_rx == priv->channels.params.ptp_rx)
   4104		return 0;
   4105
   4106	new_params = priv->channels.params;
   4107	new_params.ptp_rx = ptp_rx;
   4108	return mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx,
   4109					&new_params.ptp_rx, true);
   4110}
   4111
   4112int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
   4113{
   4114	struct hwtstamp_config config;
   4115	bool rx_cqe_compress_def;
   4116	bool ptp_rx;
   4117	int err;
   4118
   4119	if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) ||
   4120	    (mlx5_clock_get_ptp_index(priv->mdev) == -1))
   4121		return -EOPNOTSUPP;
   4122
   4123	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
   4124		return -EFAULT;
   4125
   4126	/* TX HW timestamp */
   4127	switch (config.tx_type) {
   4128	case HWTSTAMP_TX_OFF:
   4129	case HWTSTAMP_TX_ON:
   4130		break;
   4131	default:
   4132		return -ERANGE;
   4133	}
   4134
   4135	mutex_lock(&priv->state_lock);
   4136	rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
   4137
   4138	/* RX HW timestamp */
   4139	switch (config.rx_filter) {
   4140	case HWTSTAMP_FILTER_NONE:
   4141		ptp_rx = false;
   4142		break;
   4143	case HWTSTAMP_FILTER_ALL:
   4144	case HWTSTAMP_FILTER_SOME:
   4145	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
   4146	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
   4147	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
   4148	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
   4149	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
   4150	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
   4151	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
   4152	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
   4153	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
   4154	case HWTSTAMP_FILTER_PTP_V2_EVENT:
   4155	case HWTSTAMP_FILTER_PTP_V2_SYNC:
   4156	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
   4157	case HWTSTAMP_FILTER_NTP_ALL:
   4158		config.rx_filter = HWTSTAMP_FILTER_ALL;
   4159		/* ptp_rx is set if both HW TS is set and CQE
   4160		 * compression is set
   4161		 */
   4162		ptp_rx = rx_cqe_compress_def;
   4163		break;
   4164	default:
   4165		err = -ERANGE;
   4166		goto err_unlock;
   4167	}
   4168
   4169	if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX))
   4170		err = mlx5e_hwstamp_config_no_ptp_rx(priv,
   4171						     config.rx_filter != HWTSTAMP_FILTER_NONE);
   4172	else
   4173		err = mlx5e_hwstamp_config_ptp_rx(priv, ptp_rx);
   4174	if (err)
   4175		goto err_unlock;
   4176
   4177	memcpy(&priv->tstamp, &config, sizeof(config));
   4178	mutex_unlock(&priv->state_lock);
   4179
   4180	/* might need to fix some features */
   4181	netdev_update_features(priv->netdev);
   4182
   4183	return copy_to_user(ifr->ifr_data, &config,
   4184			    sizeof(config)) ? -EFAULT : 0;
   4185err_unlock:
   4186	mutex_unlock(&priv->state_lock);
   4187	return err;
   4188}
   4189
   4190int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr)
   4191{
   4192	struct hwtstamp_config *cfg = &priv->tstamp;
   4193
   4194	if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
   4195		return -EOPNOTSUPP;
   4196
   4197	return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0;
   4198}
   4199
   4200static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
   4201{
   4202	struct mlx5e_priv *priv = netdev_priv(dev);
   4203
   4204	switch (cmd) {
   4205	case SIOCSHWTSTAMP:
   4206		return mlx5e_hwstamp_set(priv, ifr);
   4207	case SIOCGHWTSTAMP:
   4208		return mlx5e_hwstamp_get(priv, ifr);
   4209	default:
   4210		return -EOPNOTSUPP;
   4211	}
   4212}
   4213
   4214#ifdef CONFIG_MLX5_ESWITCH
   4215int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
   4216{
   4217	struct mlx5e_priv *priv = netdev_priv(dev);
   4218	struct mlx5_core_dev *mdev = priv->mdev;
   4219
   4220	return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac);
   4221}
   4222
   4223static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
   4224			     __be16 vlan_proto)
   4225{
   4226	struct mlx5e_priv *priv = netdev_priv(dev);
   4227	struct mlx5_core_dev *mdev = priv->mdev;
   4228
   4229	if (vlan_proto != htons(ETH_P_8021Q))
   4230		return -EPROTONOSUPPORT;
   4231
   4232	return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1,
   4233					   vlan, qos);
   4234}
   4235
   4236static int mlx5e_set_vf_spoofchk(struct net_device *dev, int vf, bool setting)
   4237{
   4238	struct mlx5e_priv *priv = netdev_priv(dev);
   4239	struct mlx5_core_dev *mdev = priv->mdev;
   4240
   4241	return mlx5_eswitch_set_vport_spoofchk(mdev->priv.eswitch, vf + 1, setting);
   4242}
   4243
   4244static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting)
   4245{
   4246	struct mlx5e_priv *priv = netdev_priv(dev);
   4247	struct mlx5_core_dev *mdev = priv->mdev;
   4248
   4249	return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting);
   4250}
   4251
   4252int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
   4253		      int max_tx_rate)
   4254{
   4255	struct mlx5e_priv *priv = netdev_priv(dev);
   4256	struct mlx5_core_dev *mdev = priv->mdev;
   4257
   4258	return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1,
   4259					   max_tx_rate, min_tx_rate);
   4260}
   4261
   4262static int mlx5_vport_link2ifla(u8 esw_link)
   4263{
   4264	switch (esw_link) {
   4265	case MLX5_VPORT_ADMIN_STATE_DOWN:
   4266		return IFLA_VF_LINK_STATE_DISABLE;
   4267	case MLX5_VPORT_ADMIN_STATE_UP:
   4268		return IFLA_VF_LINK_STATE_ENABLE;
   4269	}
   4270	return IFLA_VF_LINK_STATE_AUTO;
   4271}
   4272
   4273static int mlx5_ifla_link2vport(u8 ifla_link)
   4274{
   4275	switch (ifla_link) {
   4276	case IFLA_VF_LINK_STATE_DISABLE:
   4277		return MLX5_VPORT_ADMIN_STATE_DOWN;
   4278	case IFLA_VF_LINK_STATE_ENABLE:
   4279		return MLX5_VPORT_ADMIN_STATE_UP;
   4280	}
   4281	return MLX5_VPORT_ADMIN_STATE_AUTO;
   4282}
   4283
   4284static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
   4285				   int link_state)
   4286{
   4287	struct mlx5e_priv *priv = netdev_priv(dev);
   4288	struct mlx5_core_dev *mdev = priv->mdev;
   4289
   4290	if (mlx5e_is_uplink_rep(priv))
   4291		return -EOPNOTSUPP;
   4292
   4293	return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1,
   4294					    mlx5_ifla_link2vport(link_state));
   4295}
   4296
   4297int mlx5e_get_vf_config(struct net_device *dev,
   4298			int vf, struct ifla_vf_info *ivi)
   4299{
   4300	struct mlx5e_priv *priv = netdev_priv(dev);
   4301	struct mlx5_core_dev *mdev = priv->mdev;
   4302	int err;
   4303
   4304	if (!netif_device_present(dev))
   4305		return -EOPNOTSUPP;
   4306
   4307	err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi);
   4308	if (err)
   4309		return err;
   4310	ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate);
   4311	return 0;
   4312}
   4313
   4314int mlx5e_get_vf_stats(struct net_device *dev,
   4315		       int vf, struct ifla_vf_stats *vf_stats)
   4316{
   4317	struct mlx5e_priv *priv = netdev_priv(dev);
   4318	struct mlx5_core_dev *mdev = priv->mdev;
   4319
   4320	return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1,
   4321					    vf_stats);
   4322}
   4323
   4324static bool
   4325mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
   4326{
   4327	struct mlx5e_priv *priv = netdev_priv(dev);
   4328
   4329	if (!netif_device_present(dev))
   4330		return false;
   4331
   4332	if (!mlx5e_is_uplink_rep(priv))
   4333		return false;
   4334
   4335	return mlx5e_rep_has_offload_stats(dev, attr_id);
   4336}
   4337
   4338static int
   4339mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
   4340			void *sp)
   4341{
   4342	struct mlx5e_priv *priv = netdev_priv(dev);
   4343
   4344	if (!mlx5e_is_uplink_rep(priv))
   4345		return -EOPNOTSUPP;
   4346
   4347	return mlx5e_rep_get_offload_stats(attr_id, dev, sp);
   4348}
   4349#endif
   4350
   4351static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type)
   4352{
   4353	switch (proto_type) {
   4354	case IPPROTO_GRE:
   4355		return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
   4356	case IPPROTO_IPIP:
   4357	case IPPROTO_IPV6:
   4358		return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
   4359			MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_tx));
   4360	default:
   4361		return false;
   4362	}
   4363}
   4364
   4365static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct mlx5_core_dev *mdev,
   4366							   struct sk_buff *skb)
   4367{
   4368	switch (skb->inner_protocol) {
   4369	case htons(ETH_P_IP):
   4370	case htons(ETH_P_IPV6):
   4371	case htons(ETH_P_TEB):
   4372		return true;
   4373	case htons(ETH_P_MPLS_UC):
   4374	case htons(ETH_P_MPLS_MC):
   4375		return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre);
   4376	}
   4377	return false;
   4378}
   4379
   4380static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
   4381						     struct sk_buff *skb,
   4382						     netdev_features_t features)
   4383{
   4384	unsigned int offset = 0;
   4385	struct udphdr *udph;
   4386	u8 proto;
   4387	u16 port;
   4388
   4389	switch (vlan_get_protocol(skb)) {
   4390	case htons(ETH_P_IP):
   4391		proto = ip_hdr(skb)->protocol;
   4392		break;
   4393	case htons(ETH_P_IPV6):
   4394		proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
   4395		break;
   4396	default:
   4397		goto out;
   4398	}
   4399
   4400	switch (proto) {
   4401	case IPPROTO_GRE:
   4402		if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, skb))
   4403			return features;
   4404		break;
   4405	case IPPROTO_IPIP:
   4406	case IPPROTO_IPV6:
   4407		if (mlx5e_tunnel_proto_supported_tx(priv->mdev, IPPROTO_IPIP))
   4408			return features;
   4409		break;
   4410	case IPPROTO_UDP:
   4411		udph = udp_hdr(skb);
   4412		port = be16_to_cpu(udph->dest);
   4413
   4414		/* Verify if UDP port is being offloaded by HW */
   4415		if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port))
   4416			return features;
   4417
   4418#if IS_ENABLED(CONFIG_GENEVE)
   4419		/* Support Geneve offload for default UDP port */
   4420		if (port == GENEVE_UDP_PORT && mlx5_geneve_tx_allowed(priv->mdev))
   4421			return features;
   4422#endif
   4423		break;
   4424#ifdef CONFIG_MLX5_EN_IPSEC
   4425	case IPPROTO_ESP:
   4426		return mlx5e_ipsec_feature_check(skb, features);
   4427#endif
   4428	}
   4429
   4430out:
   4431	/* Disable CSUM and GSO if the udp dport is not offloaded by HW */
   4432	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
   4433}
   4434
   4435netdev_features_t mlx5e_features_check(struct sk_buff *skb,
   4436				       struct net_device *netdev,
   4437				       netdev_features_t features)
   4438{
   4439	struct mlx5e_priv *priv = netdev_priv(netdev);
   4440
   4441	features = vlan_features_check(skb, features);
   4442	features = vxlan_features_check(skb, features);
   4443
   4444	/* Validate if the tunneled packet is being offloaded by HW */
   4445	if (skb->encapsulation &&
   4446	    (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
   4447		return mlx5e_tunnel_features_check(priv, skb, features);
   4448
   4449	return features;
   4450}
   4451
   4452static void mlx5e_tx_timeout_work(struct work_struct *work)
   4453{
   4454	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
   4455					       tx_timeout_work);
   4456	struct net_device *netdev = priv->netdev;
   4457	int i;
   4458
   4459	rtnl_lock();
   4460	mutex_lock(&priv->state_lock);
   4461
   4462	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
   4463		goto unlock;
   4464
   4465	for (i = 0; i < netdev->real_num_tx_queues; i++) {
   4466		struct netdev_queue *dev_queue =
   4467			netdev_get_tx_queue(netdev, i);
   4468		struct mlx5e_txqsq *sq = priv->txq2sq[i];
   4469
   4470		if (!netif_xmit_stopped(dev_queue))
   4471			continue;
   4472
   4473		if (mlx5e_reporter_tx_timeout(sq))
   4474		/* break if tried to reopened channels */
   4475			break;
   4476	}
   4477
   4478unlock:
   4479	mutex_unlock(&priv->state_lock);
   4480	rtnl_unlock();
   4481}
   4482
   4483static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue)
   4484{
   4485	struct mlx5e_priv *priv = netdev_priv(dev);
   4486
   4487	netdev_err(dev, "TX timeout detected\n");
   4488	queue_work(priv->wq, &priv->tx_timeout_work);
   4489}
   4490
   4491static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
   4492{
   4493	struct net_device *netdev = priv->netdev;
   4494	struct mlx5e_params new_params;
   4495
   4496	if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
   4497		netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n");
   4498		return -EINVAL;
   4499	}
   4500
   4501	new_params = priv->channels.params;
   4502	new_params.xdp_prog = prog;
   4503
   4504	if (!mlx5e_params_validate_xdp(netdev, &new_params))
   4505		return -EINVAL;
   4506
   4507	return 0;
   4508}
   4509
   4510static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog)
   4511{
   4512	struct bpf_prog *old_prog;
   4513
   4514	old_prog = rcu_replace_pointer(rq->xdp_prog, prog,
   4515				       lockdep_is_held(&rq->priv->state_lock));
   4516	if (old_prog)
   4517		bpf_prog_put(old_prog);
   4518}
   4519
   4520static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
   4521{
   4522	struct mlx5e_priv *priv = netdev_priv(netdev);
   4523	struct mlx5e_params new_params;
   4524	struct bpf_prog *old_prog;
   4525	int err = 0;
   4526	bool reset;
   4527	int i;
   4528
   4529	mutex_lock(&priv->state_lock);
   4530
   4531	if (prog) {
   4532		err = mlx5e_xdp_allowed(priv, prog);
   4533		if (err)
   4534			goto unlock;
   4535	}
   4536
   4537	/* no need for full reset when exchanging programs */
   4538	reset = (!priv->channels.params.xdp_prog || !prog);
   4539
   4540	new_params = priv->channels.params;
   4541	new_params.xdp_prog = prog;
   4542	if (reset)
   4543		mlx5e_set_rq_type(priv->mdev, &new_params);
   4544	old_prog = priv->channels.params.xdp_prog;
   4545
   4546	err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
   4547	if (err)
   4548		goto unlock;
   4549
   4550	if (old_prog)
   4551		bpf_prog_put(old_prog);
   4552
   4553	if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
   4554		goto unlock;
   4555
   4556	/* exchanging programs w/o reset, we update ref counts on behalf
   4557	 * of the channels RQs here.
   4558	 */
   4559	bpf_prog_add(prog, priv->channels.num);
   4560	for (i = 0; i < priv->channels.num; i++) {
   4561		struct mlx5e_channel *c = priv->channels.c[i];
   4562
   4563		mlx5e_rq_replace_xdp_prog(&c->rq, prog);
   4564		if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) {
   4565			bpf_prog_inc(prog);
   4566			mlx5e_rq_replace_xdp_prog(&c->xskrq, prog);
   4567		}
   4568	}
   4569
   4570unlock:
   4571	mutex_unlock(&priv->state_lock);
   4572
   4573	/* Need to fix some features. */
   4574	if (!err)
   4575		netdev_update_features(netdev);
   4576
   4577	return err;
   4578}
   4579
   4580static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
   4581{
   4582	switch (xdp->command) {
   4583	case XDP_SETUP_PROG:
   4584		return mlx5e_xdp_set(dev, xdp->prog);
   4585	case XDP_SETUP_XSK_POOL:
   4586		return mlx5e_xsk_setup_pool(dev, xdp->xsk.pool,
   4587					    xdp->xsk.queue_id);
   4588	default:
   4589		return -EINVAL;
   4590	}
   4591}
   4592
   4593#ifdef CONFIG_MLX5_ESWITCH
   4594static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
   4595				struct net_device *dev, u32 filter_mask,
   4596				int nlflags)
   4597{
   4598	struct mlx5e_priv *priv = netdev_priv(dev);
   4599	struct mlx5_core_dev *mdev = priv->mdev;
   4600	u8 mode, setting;
   4601	int err;
   4602
   4603	err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting);
   4604	if (err)
   4605		return err;
   4606	mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB;
   4607	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
   4608				       mode,
   4609				       0, 0, nlflags, filter_mask, NULL);
   4610}
   4611
   4612static int mlx5e_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
   4613				u16 flags, struct netlink_ext_ack *extack)
   4614{
   4615	struct mlx5e_priv *priv = netdev_priv(dev);
   4616	struct mlx5_core_dev *mdev = priv->mdev;
   4617	struct nlattr *attr, *br_spec;
   4618	u16 mode = BRIDGE_MODE_UNDEF;
   4619	u8 setting;
   4620	int rem;
   4621
   4622	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
   4623	if (!br_spec)
   4624		return -EINVAL;
   4625
   4626	nla_for_each_nested(attr, br_spec, rem) {
   4627		if (nla_type(attr) != IFLA_BRIDGE_MODE)
   4628			continue;
   4629
   4630		if (nla_len(attr) < sizeof(mode))
   4631			return -EINVAL;
   4632
   4633		mode = nla_get_u16(attr);
   4634		if (mode > BRIDGE_MODE_VEPA)
   4635			return -EINVAL;
   4636
   4637		break;
   4638	}
   4639
   4640	if (mode == BRIDGE_MODE_UNDEF)
   4641		return -EINVAL;
   4642
   4643	setting = (mode == BRIDGE_MODE_VEPA) ?  1 : 0;
   4644	return mlx5_eswitch_set_vepa(mdev->priv.eswitch, setting);
   4645}
   4646#endif
   4647
   4648const struct net_device_ops mlx5e_netdev_ops = {
   4649	.ndo_open                = mlx5e_open,
   4650	.ndo_stop                = mlx5e_close,
   4651	.ndo_start_xmit          = mlx5e_xmit,
   4652	.ndo_setup_tc            = mlx5e_setup_tc,
   4653	.ndo_select_queue        = mlx5e_select_queue,
   4654	.ndo_get_stats64         = mlx5e_get_stats,
   4655	.ndo_set_rx_mode         = mlx5e_set_rx_mode,
   4656	.ndo_set_mac_address     = mlx5e_set_mac,
   4657	.ndo_vlan_rx_add_vid     = mlx5e_vlan_rx_add_vid,
   4658	.ndo_vlan_rx_kill_vid    = mlx5e_vlan_rx_kill_vid,
   4659	.ndo_set_features        = mlx5e_set_features,
   4660	.ndo_fix_features        = mlx5e_fix_features,
   4661	.ndo_change_mtu          = mlx5e_change_nic_mtu,
   4662	.ndo_eth_ioctl            = mlx5e_ioctl,
   4663	.ndo_set_tx_maxrate      = mlx5e_set_tx_maxrate,
   4664	.ndo_features_check      = mlx5e_features_check,
   4665	.ndo_tx_timeout          = mlx5e_tx_timeout,
   4666	.ndo_bpf		 = mlx5e_xdp,
   4667	.ndo_xdp_xmit            = mlx5e_xdp_xmit,
   4668	.ndo_xsk_wakeup          = mlx5e_xsk_wakeup,
   4669#ifdef CONFIG_MLX5_EN_ARFS
   4670	.ndo_rx_flow_steer	 = mlx5e_rx_flow_steer,
   4671#endif
   4672#ifdef CONFIG_MLX5_ESWITCH
   4673	.ndo_bridge_setlink      = mlx5e_bridge_setlink,
   4674	.ndo_bridge_getlink      = mlx5e_bridge_getlink,
   4675
   4676	/* SRIOV E-Switch NDOs */
   4677	.ndo_set_vf_mac          = mlx5e_set_vf_mac,
   4678	.ndo_set_vf_vlan         = mlx5e_set_vf_vlan,
   4679	.ndo_set_vf_spoofchk     = mlx5e_set_vf_spoofchk,
   4680	.ndo_set_vf_trust        = mlx5e_set_vf_trust,
   4681	.ndo_set_vf_rate         = mlx5e_set_vf_rate,
   4682	.ndo_get_vf_config       = mlx5e_get_vf_config,
   4683	.ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
   4684	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
   4685	.ndo_has_offload_stats   = mlx5e_has_offload_stats,
   4686	.ndo_get_offload_stats   = mlx5e_get_offload_stats,
   4687#endif
   4688	.ndo_get_devlink_port    = mlx5e_get_devlink_port,
   4689};
   4690
   4691static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
   4692{
   4693	int i;
   4694
   4695	/* The supported periods are organized in ascending order */
   4696	for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++)
   4697		if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout)
   4698			break;
   4699
   4700	return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
   4701}
   4702
   4703void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu)
   4704{
   4705	struct mlx5e_params *params = &priv->channels.params;
   4706	struct mlx5_core_dev *mdev = priv->mdev;
   4707	u8 rx_cq_period_mode;
   4708
   4709	params->sw_mtu = mtu;
   4710	params->hard_mtu = MLX5E_ETH_HARD_MTU;
   4711	params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2,
   4712				     priv->max_nch);
   4713	mlx5e_params_mqprio_reset(params);
   4714
   4715	/* SQ */
   4716	params->log_sq_size = is_kdump_kernel() ?
   4717		MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
   4718		MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
   4719	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev));
   4720
   4721	/* XDP SQ */
   4722	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev));
   4723
   4724	/* set CQE compression */
   4725	params->rx_cqe_compress_def = false;
   4726	if (MLX5_CAP_GEN(mdev, cqe_compression) &&
   4727	    MLX5_CAP_GEN(mdev, vport_group_manager))
   4728		params->rx_cqe_compress_def = slow_pci_heuristic(mdev);
   4729
   4730	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
   4731	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false);
   4732
   4733	/* RQ */
   4734	mlx5e_build_rq_params(mdev, params);
   4735
   4736	/* HW LRO */
   4737	if (MLX5_CAP_ETH(mdev, lro_cap) &&
   4738	    params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
   4739		/* No XSK params: checking the availability of striding RQ in general. */
   4740		if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
   4741			params->packet_merge.type = slow_pci_heuristic(mdev) ?
   4742				MLX5E_PACKET_MERGE_NONE : MLX5E_PACKET_MERGE_LRO;
   4743	}
   4744	params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
   4745
   4746	/* CQ moderation params */
   4747	rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
   4748			MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
   4749			MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
   4750	params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
   4751	params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
   4752	mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode);
   4753	mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
   4754
   4755	/* TX inline */
   4756	mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
   4757
   4758	params->tunneled_offload_en = mlx5_tunnel_inner_ft_supported(mdev);
   4759
   4760	/* AF_XDP */
   4761	params->xsk = xsk;
   4762
   4763	/* Do not update netdev->features directly in here
   4764	 * on mlx5e_attach_netdev() we will call mlx5e_update_features()
   4765	 * To update netdev->features please modify mlx5e_fix_features()
   4766	 */
   4767}
   4768
   4769static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
   4770{
   4771	struct mlx5e_priv *priv = netdev_priv(netdev);
   4772	u8 addr[ETH_ALEN];
   4773
   4774	mlx5_query_mac_address(priv->mdev, addr);
   4775	if (is_zero_ether_addr(addr) &&
   4776	    !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) {
   4777		eth_hw_addr_random(netdev);
   4778		mlx5_core_info(priv->mdev, "Assigned random MAC address %pM\n", netdev->dev_addr);
   4779		return;
   4780	}
   4781
   4782	eth_hw_addr_set(netdev, addr);
   4783}
   4784
   4785static int mlx5e_vxlan_set_port(struct net_device *netdev, unsigned int table,
   4786				unsigned int entry, struct udp_tunnel_info *ti)
   4787{
   4788	struct mlx5e_priv *priv = netdev_priv(netdev);
   4789
   4790	return mlx5_vxlan_add_port(priv->mdev->vxlan, ntohs(ti->port));
   4791}
   4792
   4793static int mlx5e_vxlan_unset_port(struct net_device *netdev, unsigned int table,
   4794				  unsigned int entry, struct udp_tunnel_info *ti)
   4795{
   4796	struct mlx5e_priv *priv = netdev_priv(netdev);
   4797
   4798	return mlx5_vxlan_del_port(priv->mdev->vxlan, ntohs(ti->port));
   4799}
   4800
   4801void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv)
   4802{
   4803	if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
   4804		return;
   4805
   4806	priv->nic_info.set_port = mlx5e_vxlan_set_port;
   4807	priv->nic_info.unset_port = mlx5e_vxlan_unset_port;
   4808	priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
   4809				UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN;
   4810	priv->nic_info.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN;
   4811	/* Don't count the space hard-coded to the IANA port */
   4812	priv->nic_info.tables[0].n_entries =
   4813		mlx5_vxlan_max_udp_ports(priv->mdev) - 1;
   4814
   4815	priv->netdev->udp_tunnel_nic_info = &priv->nic_info;
   4816}
   4817
   4818static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev)
   4819{
   4820	int tt;
   4821
   4822	for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
   4823		if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt)))
   4824			return true;
   4825	}
   4826	return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev));
   4827}
   4828
   4829static void mlx5e_build_nic_netdev(struct net_device *netdev)
   4830{
   4831	struct mlx5e_priv *priv = netdev_priv(netdev);
   4832	struct mlx5_core_dev *mdev = priv->mdev;
   4833	bool fcs_supported;
   4834	bool fcs_enabled;
   4835
   4836	SET_NETDEV_DEV(netdev, mdev->device);
   4837
   4838	netdev->netdev_ops = &mlx5e_netdev_ops;
   4839
   4840	mlx5e_dcbnl_build_netdev(netdev);
   4841
   4842	netdev->watchdog_timeo    = 15 * HZ;
   4843
   4844	netdev->ethtool_ops	  = &mlx5e_ethtool_ops;
   4845
   4846	netdev->vlan_features    |= NETIF_F_SG;
   4847	netdev->vlan_features    |= NETIF_F_HW_CSUM;
   4848	netdev->vlan_features    |= NETIF_F_GRO;
   4849	netdev->vlan_features    |= NETIF_F_TSO;
   4850	netdev->vlan_features    |= NETIF_F_TSO6;
   4851	netdev->vlan_features    |= NETIF_F_RXCSUM;
   4852	netdev->vlan_features    |= NETIF_F_RXHASH;
   4853	netdev->vlan_features    |= NETIF_F_GSO_PARTIAL;
   4854
   4855	netdev->mpls_features    |= NETIF_F_SG;
   4856	netdev->mpls_features    |= NETIF_F_HW_CSUM;
   4857	netdev->mpls_features    |= NETIF_F_TSO;
   4858	netdev->mpls_features    |= NETIF_F_TSO6;
   4859
   4860	netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_TX;
   4861	netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_RX;
   4862
   4863	/* Tunneled LRO is not supported in the driver, and the same RQs are
   4864	 * shared between inner and outer TIRs, so the driver can't disable LRO
   4865	 * for inner TIRs while having it enabled for outer TIRs. Due to this,
   4866	 * block LRO altogether if the firmware declares tunneled LRO support.
   4867	 */
   4868	if (!!MLX5_CAP_ETH(mdev, lro_cap) &&
   4869	    !MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) &&
   4870	    !MLX5_CAP_ETH(mdev, tunnel_lro_gre) &&
   4871	    mlx5e_check_fragmented_striding_rq_cap(mdev))
   4872		netdev->vlan_features    |= NETIF_F_LRO;
   4873
   4874	netdev->hw_features       = netdev->vlan_features;
   4875	netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_TX;
   4876	netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_RX;
   4877	netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
   4878	netdev->hw_features      |= NETIF_F_HW_VLAN_STAG_TX;
   4879
   4880	if (mlx5e_tunnel_any_tx_proto_supported(mdev)) {
   4881		netdev->hw_enc_features |= NETIF_F_HW_CSUM;
   4882		netdev->hw_enc_features |= NETIF_F_TSO;
   4883		netdev->hw_enc_features |= NETIF_F_TSO6;
   4884		netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
   4885	}
   4886
   4887	if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) {
   4888		netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL |
   4889					   NETIF_F_GSO_UDP_TUNNEL_CSUM;
   4890		netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
   4891					   NETIF_F_GSO_UDP_TUNNEL_CSUM;
   4892		netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
   4893		netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL |
   4894					 NETIF_F_GSO_UDP_TUNNEL_CSUM;
   4895	}
   4896
   4897	if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) {
   4898		netdev->hw_features     |= NETIF_F_GSO_GRE |
   4899					   NETIF_F_GSO_GRE_CSUM;
   4900		netdev->hw_enc_features |= NETIF_F_GSO_GRE |
   4901					   NETIF_F_GSO_GRE_CSUM;
   4902		netdev->gso_partial_features |= NETIF_F_GSO_GRE |
   4903						NETIF_F_GSO_GRE_CSUM;
   4904	}
   4905
   4906	if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) {
   4907		netdev->hw_features |= NETIF_F_GSO_IPXIP4 |
   4908				       NETIF_F_GSO_IPXIP6;
   4909		netdev->hw_enc_features |= NETIF_F_GSO_IPXIP4 |
   4910					   NETIF_F_GSO_IPXIP6;
   4911		netdev->gso_partial_features |= NETIF_F_GSO_IPXIP4 |
   4912						NETIF_F_GSO_IPXIP6;
   4913	}
   4914
   4915	netdev->gso_partial_features             |= NETIF_F_GSO_UDP_L4;
   4916	netdev->hw_features                      |= NETIF_F_GSO_UDP_L4;
   4917	netdev->features                         |= NETIF_F_GSO_UDP_L4;
   4918
   4919	mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled);
   4920
   4921	if (fcs_supported)
   4922		netdev->hw_features |= NETIF_F_RXALL;
   4923
   4924	if (MLX5_CAP_ETH(mdev, scatter_fcs))
   4925		netdev->hw_features |= NETIF_F_RXFCS;
   4926
   4927	if (mlx5_qos_is_supported(mdev))
   4928		netdev->hw_features |= NETIF_F_HW_TC;
   4929
   4930	netdev->features          = netdev->hw_features;
   4931
   4932	/* Defaults */
   4933	if (fcs_enabled)
   4934		netdev->features  &= ~NETIF_F_RXALL;
   4935	netdev->features  &= ~NETIF_F_LRO;
   4936	netdev->features  &= ~NETIF_F_GRO_HW;
   4937	netdev->features  &= ~NETIF_F_RXFCS;
   4938
   4939#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
   4940	if (FT_CAP(flow_modify_en) &&
   4941	    FT_CAP(modify_root) &&
   4942	    FT_CAP(identified_miss_table_mode) &&
   4943	    FT_CAP(flow_table_modify)) {
   4944#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
   4945		netdev->hw_features      |= NETIF_F_HW_TC;
   4946#endif
   4947#ifdef CONFIG_MLX5_EN_ARFS
   4948		netdev->hw_features	 |= NETIF_F_NTUPLE;
   4949#endif
   4950	}
   4951
   4952	netdev->features         |= NETIF_F_HIGHDMA;
   4953	netdev->features         |= NETIF_F_HW_VLAN_STAG_FILTER;
   4954
   4955	netdev->priv_flags       |= IFF_UNICAST_FLT;
   4956
   4957	netif_set_tso_max_size(netdev, GSO_MAX_SIZE);
   4958	mlx5e_set_netdev_dev_addr(netdev);
   4959	mlx5e_ipsec_build_netdev(priv);
   4960	mlx5e_ktls_build_netdev(priv);
   4961}
   4962
   4963void mlx5e_create_q_counters(struct mlx5e_priv *priv)
   4964{
   4965	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
   4966	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
   4967	struct mlx5_core_dev *mdev = priv->mdev;
   4968	int err;
   4969
   4970	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
   4971	err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out);
   4972	if (!err)
   4973		priv->q_counter =
   4974			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
   4975
   4976	err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out);
   4977	if (!err)
   4978		priv->drop_rq_q_counter =
   4979			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
   4980}
   4981
   4982void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
   4983{
   4984	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
   4985
   4986	MLX5_SET(dealloc_q_counter_in, in, opcode,
   4987		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
   4988	if (priv->q_counter) {
   4989		MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
   4990			 priv->q_counter);
   4991		mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in);
   4992	}
   4993
   4994	if (priv->drop_rq_q_counter) {
   4995		MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
   4996			 priv->drop_rq_q_counter);
   4997		mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in);
   4998	}
   4999}
   5000
   5001static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
   5002			  struct net_device *netdev)
   5003{
   5004	struct mlx5e_priv *priv = netdev_priv(netdev);
   5005	int err;
   5006
   5007	mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu);
   5008	mlx5e_vxlan_set_netdev_info(priv);
   5009
   5010	mlx5e_timestamp_init(priv);
   5011
   5012	err = mlx5e_fs_init(priv);
   5013	if (err) {
   5014		mlx5_core_err(mdev, "FS initialization failed, %d\n", err);
   5015		return err;
   5016	}
   5017
   5018	err = mlx5e_ipsec_init(priv);
   5019	if (err)
   5020		mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
   5021
   5022	err = mlx5e_ktls_init(priv);
   5023	if (err)
   5024		mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
   5025
   5026	mlx5e_health_create_reporters(priv);
   5027	return 0;
   5028}
   5029
   5030static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
   5031{
   5032	mlx5e_health_destroy_reporters(priv);
   5033	mlx5e_ktls_cleanup(priv);
   5034	mlx5e_ipsec_cleanup(priv);
   5035	mlx5e_fs_cleanup(priv);
   5036}
   5037
   5038static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
   5039{
   5040	struct mlx5_core_dev *mdev = priv->mdev;
   5041	enum mlx5e_rx_res_features features;
   5042	int err;
   5043
   5044	priv->rx_res = mlx5e_rx_res_alloc();
   5045	if (!priv->rx_res)
   5046		return -ENOMEM;
   5047
   5048	mlx5e_create_q_counters(priv);
   5049
   5050	err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
   5051	if (err) {
   5052		mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
   5053		goto err_destroy_q_counters;
   5054	}
   5055
   5056	features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP;
   5057	if (priv->channels.params.tunneled_offload_en)
   5058		features |= MLX5E_RX_RES_FEATURE_INNER_FT;
   5059	err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
   5060				priv->max_nch, priv->drop_rq.rqn,
   5061				&priv->channels.params.packet_merge,
   5062				priv->channels.params.num_channels);
   5063	if (err)
   5064		goto err_close_drop_rq;
   5065
   5066	err = mlx5e_create_flow_steering(priv);
   5067	if (err) {
   5068		mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
   5069		goto err_destroy_rx_res;
   5070	}
   5071
   5072	err = mlx5e_tc_nic_init(priv);
   5073	if (err)
   5074		goto err_destroy_flow_steering;
   5075
   5076	err = mlx5e_accel_init_rx(priv);
   5077	if (err)
   5078		goto err_tc_nic_cleanup;
   5079
   5080#ifdef CONFIG_MLX5_EN_ARFS
   5081	priv->netdev->rx_cpu_rmap =  mlx5_eq_table_get_rmap(priv->mdev);
   5082#endif
   5083
   5084	return 0;
   5085
   5086err_tc_nic_cleanup:
   5087	mlx5e_tc_nic_cleanup(priv);
   5088err_destroy_flow_steering:
   5089	mlx5e_destroy_flow_steering(priv);
   5090err_destroy_rx_res:
   5091	mlx5e_rx_res_destroy(priv->rx_res);
   5092err_close_drop_rq:
   5093	mlx5e_close_drop_rq(&priv->drop_rq);
   5094err_destroy_q_counters:
   5095	mlx5e_destroy_q_counters(priv);
   5096	mlx5e_rx_res_free(priv->rx_res);
   5097	priv->rx_res = NULL;
   5098	return err;
   5099}
   5100
   5101static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
   5102{
   5103	mlx5e_accel_cleanup_rx(priv);
   5104	mlx5e_tc_nic_cleanup(priv);
   5105	mlx5e_destroy_flow_steering(priv);
   5106	mlx5e_rx_res_destroy(priv->rx_res);
   5107	mlx5e_close_drop_rq(&priv->drop_rq);
   5108	mlx5e_destroy_q_counters(priv);
   5109	mlx5e_rx_res_free(priv->rx_res);
   5110	priv->rx_res = NULL;
   5111}
   5112
   5113static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
   5114{
   5115	int err;
   5116
   5117	err = mlx5e_create_tises(priv);
   5118	if (err) {
   5119		mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
   5120		return err;
   5121	}
   5122
   5123	mlx5e_dcbnl_initialize(priv);
   5124	return 0;
   5125}
   5126
   5127static void mlx5e_nic_enable(struct mlx5e_priv *priv)
   5128{
   5129	struct net_device *netdev = priv->netdev;
   5130	struct mlx5_core_dev *mdev = priv->mdev;
   5131
   5132	mlx5e_init_l2_addr(priv);
   5133
   5134	/* Marking the link as currently not needed by the Driver */
   5135	if (!netif_running(netdev))
   5136		mlx5e_modify_admin_state(mdev, MLX5_PORT_DOWN);
   5137
   5138	mlx5e_set_netdev_mtu_boundaries(priv);
   5139	mlx5e_set_dev_port_mtu(priv);
   5140
   5141	mlx5_lag_add_netdev(mdev, netdev);
   5142
   5143	mlx5e_enable_async_events(priv);
   5144	mlx5e_enable_blocking_events(priv);
   5145	if (mlx5e_monitor_counter_supported(priv))
   5146		mlx5e_monitor_counter_init(priv);
   5147
   5148	mlx5e_hv_vhca_stats_create(priv);
   5149	if (netdev->reg_state != NETREG_REGISTERED)
   5150		return;
   5151	mlx5e_dcbnl_init_app(priv);
   5152
   5153	mlx5e_nic_set_rx_mode(priv);
   5154
   5155	rtnl_lock();
   5156	if (netif_running(netdev))
   5157		mlx5e_open(netdev);
   5158	udp_tunnel_nic_reset_ntf(priv->netdev);
   5159	netif_device_attach(netdev);
   5160	rtnl_unlock();
   5161}
   5162
   5163static void mlx5e_nic_disable(struct mlx5e_priv *priv)
   5164{
   5165	struct mlx5_core_dev *mdev = priv->mdev;
   5166
   5167	if (priv->netdev->reg_state == NETREG_REGISTERED)
   5168		mlx5e_dcbnl_delete_app(priv);
   5169
   5170	rtnl_lock();
   5171	if (netif_running(priv->netdev))
   5172		mlx5e_close(priv->netdev);
   5173	netif_device_detach(priv->netdev);
   5174	rtnl_unlock();
   5175
   5176	mlx5e_nic_set_rx_mode(priv);
   5177
   5178	mlx5e_hv_vhca_stats_destroy(priv);
   5179	if (mlx5e_monitor_counter_supported(priv))
   5180		mlx5e_monitor_counter_cleanup(priv);
   5181
   5182	mlx5e_disable_blocking_events(priv);
   5183	if (priv->en_trap) {
   5184		mlx5e_deactivate_trap(priv);
   5185		mlx5e_close_trap(priv->en_trap);
   5186		priv->en_trap = NULL;
   5187	}
   5188	mlx5e_disable_async_events(priv);
   5189	mlx5_lag_remove_netdev(mdev, priv->netdev);
   5190	mlx5_vxlan_reset_to_default(mdev->vxlan);
   5191}
   5192
   5193int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
   5194{
   5195	return mlx5e_refresh_tirs(priv, false, false);
   5196}
   5197
   5198static const struct mlx5e_profile mlx5e_nic_profile = {
   5199	.init		   = mlx5e_nic_init,
   5200	.cleanup	   = mlx5e_nic_cleanup,
   5201	.init_rx	   = mlx5e_init_nic_rx,
   5202	.cleanup_rx	   = mlx5e_cleanup_nic_rx,
   5203	.init_tx	   = mlx5e_init_nic_tx,
   5204	.cleanup_tx	   = mlx5e_cleanup_nic_tx,
   5205	.enable		   = mlx5e_nic_enable,
   5206	.disable	   = mlx5e_nic_disable,
   5207	.update_rx	   = mlx5e_update_nic_rx,
   5208	.update_stats	   = mlx5e_stats_update_ndo_stats,
   5209	.update_carrier	   = mlx5e_update_carrier,
   5210	.rx_handlers       = &mlx5e_rx_handlers_nic,
   5211	.max_tc		   = MLX5E_MAX_NUM_TC,
   5212	.rq_groups	   = MLX5E_NUM_RQ_GROUPS(XSK),
   5213	.stats_grps	   = mlx5e_nic_stats_grps,
   5214	.stats_grps_num	   = mlx5e_nic_stats_grps_num,
   5215	.features          = BIT(MLX5E_PROFILE_FEATURE_PTP_RX) |
   5216		BIT(MLX5E_PROFILE_FEATURE_PTP_TX) |
   5217		BIT(MLX5E_PROFILE_FEATURE_QOS_HTB),
   5218};
   5219
   5220static int mlx5e_profile_max_num_channels(struct mlx5_core_dev *mdev,
   5221					  const struct mlx5e_profile *profile)
   5222{
   5223	int nch;
   5224
   5225	nch = mlx5e_get_max_num_channels(mdev);
   5226
   5227	if (profile->max_nch_limit)
   5228		nch = min_t(int, nch, profile->max_nch_limit(mdev));
   5229	return nch;
   5230}
   5231
   5232static unsigned int
   5233mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev,
   5234		   const struct mlx5e_profile *profile)
   5235
   5236{
   5237	unsigned int max_nch, tmp;
   5238
   5239	/* core resources */
   5240	max_nch = mlx5e_profile_max_num_channels(mdev, profile);
   5241
   5242	/* netdev rx queues */
   5243	tmp = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1);
   5244	max_nch = min_t(unsigned int, max_nch, tmp);
   5245
   5246	/* netdev tx queues */
   5247	tmp = netdev->num_tx_queues;
   5248	if (mlx5_qos_is_supported(mdev))
   5249		tmp -= mlx5e_qos_max_leaf_nodes(mdev);
   5250	if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn))
   5251		tmp -= profile->max_tc;
   5252	tmp = tmp / profile->max_tc;
   5253	max_nch = min_t(unsigned int, max_nch, tmp);
   5254
   5255	return max_nch;
   5256}
   5257
   5258int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev)
   5259{
   5260	/* Indirect TIRS: 2 sets of TTCs (inner + outer steering)
   5261	 * and 1 set of direct TIRS
   5262	 */
   5263	return 2 * MLX5E_NUM_INDIR_TIRS
   5264		+ mlx5e_profile_max_num_channels(mdev, &mlx5e_nic_profile);
   5265}
   5266
   5267/* mlx5e generic netdev management API (move to en_common.c) */
   5268int mlx5e_priv_init(struct mlx5e_priv *priv,
   5269		    const struct mlx5e_profile *profile,
   5270		    struct net_device *netdev,
   5271		    struct mlx5_core_dev *mdev)
   5272{
   5273	int nch, num_txqs, node;
   5274	int err;
   5275
   5276	num_txqs = netdev->num_tx_queues;
   5277	nch = mlx5e_calc_max_nch(mdev, netdev, profile);
   5278	node = dev_to_node(mlx5_core_dma_dev(mdev));
   5279
   5280	/* priv init */
   5281	priv->mdev        = mdev;
   5282	priv->netdev      = netdev;
   5283	priv->msglevel    = MLX5E_MSG_LEVEL;
   5284	priv->max_nch     = nch;
   5285	priv->max_opened_tc = 1;
   5286
   5287	if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL))
   5288		return -ENOMEM;
   5289
   5290	mutex_init(&priv->state_lock);
   5291
   5292	err = mlx5e_selq_init(&priv->selq, &priv->state_lock);
   5293	if (err)
   5294		goto err_free_cpumask;
   5295
   5296	hash_init(priv->htb.qos_tc2node);
   5297	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
   5298	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
   5299	INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
   5300	INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
   5301
   5302	priv->wq = create_singlethread_workqueue("mlx5e");
   5303	if (!priv->wq)
   5304		goto err_free_selq;
   5305
   5306	priv->txq2sq = kcalloc_node(num_txqs, sizeof(*priv->txq2sq), GFP_KERNEL, node);
   5307	if (!priv->txq2sq)
   5308		goto err_destroy_workqueue;
   5309
   5310	priv->tx_rates = kcalloc_node(num_txqs, sizeof(*priv->tx_rates), GFP_KERNEL, node);
   5311	if (!priv->tx_rates)
   5312		goto err_free_txq2sq;
   5313
   5314	priv->channel_stats =
   5315		kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node);
   5316	if (!priv->channel_stats)
   5317		goto err_free_tx_rates;
   5318
   5319	return 0;
   5320
   5321err_free_tx_rates:
   5322	kfree(priv->tx_rates);
   5323err_free_txq2sq:
   5324	kfree(priv->txq2sq);
   5325err_destroy_workqueue:
   5326	destroy_workqueue(priv->wq);
   5327err_free_selq:
   5328	mlx5e_selq_cleanup(&priv->selq);
   5329err_free_cpumask:
   5330	free_cpumask_var(priv->scratchpad.cpumask);
   5331	return -ENOMEM;
   5332}
   5333
   5334void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
   5335{
   5336	int i;
   5337
   5338	/* bail if change profile failed and also rollback failed */
   5339	if (!priv->mdev)
   5340		return;
   5341
   5342	for (i = 0; i < priv->stats_nch; i++)
   5343		kvfree(priv->channel_stats[i]);
   5344	kfree(priv->channel_stats);
   5345	kfree(priv->tx_rates);
   5346	kfree(priv->txq2sq);
   5347	destroy_workqueue(priv->wq);
   5348	mutex_lock(&priv->state_lock);
   5349	mlx5e_selq_cleanup(&priv->selq);
   5350	mutex_unlock(&priv->state_lock);
   5351	free_cpumask_var(priv->scratchpad.cpumask);
   5352
   5353	for (i = 0; i < priv->htb.max_qos_sqs; i++)
   5354		kfree(priv->htb.qos_sq_stats[i]);
   5355	kvfree(priv->htb.qos_sq_stats);
   5356
   5357	if (priv->mqprio_rl) {
   5358		mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
   5359		mlx5e_mqprio_rl_free(priv->mqprio_rl);
   5360	}
   5361
   5362	memset(priv, 0, sizeof(*priv));
   5363}
   5364
   5365static unsigned int mlx5e_get_max_num_txqs(struct mlx5_core_dev *mdev,
   5366					   const struct mlx5e_profile *profile)
   5367{
   5368	unsigned int nch, ptp_txqs, qos_txqs;
   5369
   5370	nch = mlx5e_profile_max_num_channels(mdev, profile);
   5371
   5372	ptp_txqs = MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn) &&
   5373		mlx5e_profile_feature_cap(profile, PTP_TX) ?
   5374		profile->max_tc : 0;
   5375
   5376	qos_txqs = mlx5_qos_is_supported(mdev) &&
   5377		mlx5e_profile_feature_cap(profile, QOS_HTB) ?
   5378		mlx5e_qos_max_leaf_nodes(mdev) : 0;
   5379
   5380	return nch * profile->max_tc + ptp_txqs + qos_txqs;
   5381}
   5382
   5383static unsigned int mlx5e_get_max_num_rxqs(struct mlx5_core_dev *mdev,
   5384					   const struct mlx5e_profile *profile)
   5385{
   5386	unsigned int nch;
   5387
   5388	nch = mlx5e_profile_max_num_channels(mdev, profile);
   5389
   5390	return nch * profile->rq_groups;
   5391}
   5392
   5393struct net_device *
   5394mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile)
   5395{
   5396	struct net_device *netdev;
   5397	unsigned int txqs, rxqs;
   5398	int err;
   5399
   5400	txqs = mlx5e_get_max_num_txqs(mdev, profile);
   5401	rxqs = mlx5e_get_max_num_rxqs(mdev, profile);
   5402
   5403	netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), txqs, rxqs);
   5404	if (!netdev) {
   5405		mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
   5406		return NULL;
   5407	}
   5408
   5409	err = mlx5e_priv_init(netdev_priv(netdev), profile, netdev, mdev);
   5410	if (err) {
   5411		mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err);
   5412		goto err_free_netdev;
   5413	}
   5414
   5415	netif_carrier_off(netdev);
   5416	netif_tx_disable(netdev);
   5417	dev_net_set(netdev, mlx5_core_net(mdev));
   5418
   5419	return netdev;
   5420
   5421err_free_netdev:
   5422	free_netdev(netdev);
   5423
   5424	return NULL;
   5425}
   5426
   5427static void mlx5e_update_features(struct net_device *netdev)
   5428{
   5429	if (netdev->reg_state != NETREG_REGISTERED)
   5430		return; /* features will be updated on netdev registration */
   5431
   5432	rtnl_lock();
   5433	netdev_update_features(netdev);
   5434	rtnl_unlock();
   5435}
   5436
   5437static void mlx5e_reset_channels(struct net_device *netdev)
   5438{
   5439	netdev_reset_tc(netdev);
   5440}
   5441
   5442int mlx5e_attach_netdev(struct mlx5e_priv *priv)
   5443{
   5444	const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED;
   5445	const struct mlx5e_profile *profile = priv->profile;
   5446	int max_nch;
   5447	int err;
   5448
   5449	clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
   5450
   5451	/* max number of channels may have changed */
   5452	max_nch = mlx5e_calc_max_nch(priv->mdev, priv->netdev, profile);
   5453	if (priv->channels.params.num_channels > max_nch) {
   5454		mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
   5455		/* Reducing the number of channels - RXFH has to be reset, and
   5456		 * mlx5e_num_channels_changed below will build the RQT.
   5457		 */
   5458		priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED;
   5459		priv->channels.params.num_channels = max_nch;
   5460		if (priv->channels.params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) {
   5461			mlx5_core_warn(priv->mdev, "MLX5E: Disabling MQPRIO channel mode\n");
   5462			mlx5e_params_mqprio_reset(&priv->channels.params);
   5463		}
   5464	}
   5465	if (max_nch != priv->max_nch) {
   5466		mlx5_core_warn(priv->mdev,
   5467			       "MLX5E: Updating max number of channels from %u to %u\n",
   5468			       priv->max_nch, max_nch);
   5469		priv->max_nch = max_nch;
   5470	}
   5471
   5472	/* 1. Set the real number of queues in the kernel the first time.
   5473	 * 2. Set our default XPS cpumask.
   5474	 * 3. Build the RQT.
   5475	 *
   5476	 * rtnl_lock is required by netif_set_real_num_*_queues in case the
   5477	 * netdev has been registered by this point (if this function was called
   5478	 * in the reload or resume flow).
   5479	 */
   5480	if (take_rtnl)
   5481		rtnl_lock();
   5482	err = mlx5e_num_channels_changed(priv);
   5483	if (take_rtnl)
   5484		rtnl_unlock();
   5485	if (err)
   5486		goto out;
   5487
   5488	err = profile->init_tx(priv);
   5489	if (err)
   5490		goto out;
   5491
   5492	err = profile->init_rx(priv);
   5493	if (err)
   5494		goto err_cleanup_tx;
   5495
   5496	if (profile->enable)
   5497		profile->enable(priv);
   5498
   5499	mlx5e_update_features(priv->netdev);
   5500
   5501	return 0;
   5502
   5503err_cleanup_tx:
   5504	profile->cleanup_tx(priv);
   5505
   5506out:
   5507	mlx5e_reset_channels(priv->netdev);
   5508	set_bit(MLX5E_STATE_DESTROYING, &priv->state);
   5509	cancel_work_sync(&priv->update_stats_work);
   5510	return err;
   5511}
   5512
   5513void mlx5e_detach_netdev(struct mlx5e_priv *priv)
   5514{
   5515	const struct mlx5e_profile *profile = priv->profile;
   5516
   5517	set_bit(MLX5E_STATE_DESTROYING, &priv->state);
   5518
   5519	if (profile->disable)
   5520		profile->disable(priv);
   5521	flush_workqueue(priv->wq);
   5522
   5523	profile->cleanup_rx(priv);
   5524	profile->cleanup_tx(priv);
   5525	mlx5e_reset_channels(priv->netdev);
   5526	cancel_work_sync(&priv->update_stats_work);
   5527}
   5528
   5529static int
   5530mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev,
   5531			    const struct mlx5e_profile *new_profile, void *new_ppriv)
   5532{
   5533	struct mlx5e_priv *priv = netdev_priv(netdev);
   5534	int err;
   5535
   5536	err = mlx5e_priv_init(priv, new_profile, netdev, mdev);
   5537	if (err) {
   5538		mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err);
   5539		return err;
   5540	}
   5541	netif_carrier_off(netdev);
   5542	priv->profile = new_profile;
   5543	priv->ppriv = new_ppriv;
   5544	err = new_profile->init(priv->mdev, priv->netdev);
   5545	if (err)
   5546		goto priv_cleanup;
   5547	err = mlx5e_attach_netdev(priv);
   5548	if (err)
   5549		goto profile_cleanup;
   5550	return err;
   5551
   5552profile_cleanup:
   5553	new_profile->cleanup(priv);
   5554priv_cleanup:
   5555	mlx5e_priv_cleanup(priv);
   5556	return err;
   5557}
   5558
   5559int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
   5560				const struct mlx5e_profile *new_profile, void *new_ppriv)
   5561{
   5562	const struct mlx5e_profile *orig_profile = priv->profile;
   5563	struct net_device *netdev = priv->netdev;
   5564	struct mlx5_core_dev *mdev = priv->mdev;
   5565	void *orig_ppriv = priv->ppriv;
   5566	int err, rollback_err;
   5567
   5568	/* cleanup old profile */
   5569	mlx5e_detach_netdev(priv);
   5570	priv->profile->cleanup(priv);
   5571	mlx5e_priv_cleanup(priv);
   5572
   5573	err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv);
   5574	if (err) { /* roll back to original profile */
   5575		netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err);
   5576		goto rollback;
   5577	}
   5578
   5579	return 0;
   5580
   5581rollback:
   5582	rollback_err = mlx5e_netdev_attach_profile(netdev, mdev, orig_profile, orig_ppriv);
   5583	if (rollback_err)
   5584		netdev_err(netdev, "%s: failed to rollback to orig profile, %d\n",
   5585			   __func__, rollback_err);
   5586	return err;
   5587}
   5588
   5589void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv)
   5590{
   5591	mlx5e_netdev_change_profile(priv, &mlx5e_nic_profile, NULL);
   5592}
   5593
   5594void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
   5595{
   5596	struct net_device *netdev = priv->netdev;
   5597
   5598	mlx5e_priv_cleanup(priv);
   5599	free_netdev(netdev);
   5600}
   5601
   5602static int mlx5e_resume(struct auxiliary_device *adev)
   5603{
   5604	struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
   5605	struct mlx5e_priv *priv = auxiliary_get_drvdata(adev);
   5606	struct net_device *netdev = priv->netdev;
   5607	struct mlx5_core_dev *mdev = edev->mdev;
   5608	int err;
   5609
   5610	if (netif_device_present(netdev))
   5611		return 0;
   5612
   5613	err = mlx5e_create_mdev_resources(mdev);
   5614	if (err)
   5615		return err;
   5616
   5617	err = mlx5e_attach_netdev(priv);
   5618	if (err) {
   5619		mlx5e_destroy_mdev_resources(mdev);
   5620		return err;
   5621	}
   5622
   5623	return 0;
   5624}
   5625
   5626static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
   5627{
   5628	struct mlx5e_priv *priv = auxiliary_get_drvdata(adev);
   5629	struct net_device *netdev = priv->netdev;
   5630	struct mlx5_core_dev *mdev = priv->mdev;
   5631
   5632	if (!netif_device_present(netdev))
   5633		return -ENODEV;
   5634
   5635	mlx5e_detach_netdev(priv);
   5636	mlx5e_destroy_mdev_resources(mdev);
   5637	return 0;
   5638}
   5639
   5640static int mlx5e_probe(struct auxiliary_device *adev,
   5641		       const struct auxiliary_device_id *id)
   5642{
   5643	struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
   5644	const struct mlx5e_profile *profile = &mlx5e_nic_profile;
   5645	struct mlx5_core_dev *mdev = edev->mdev;
   5646	struct net_device *netdev;
   5647	pm_message_t state = {};
   5648	struct mlx5e_priv *priv;
   5649	int err;
   5650
   5651	netdev = mlx5e_create_netdev(mdev, profile);
   5652	if (!netdev) {
   5653		mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
   5654		return -ENOMEM;
   5655	}
   5656
   5657	mlx5e_build_nic_netdev(netdev);
   5658
   5659	priv = netdev_priv(netdev);
   5660	auxiliary_set_drvdata(adev, priv);
   5661
   5662	priv->profile = profile;
   5663	priv->ppriv = NULL;
   5664
   5665	err = mlx5e_devlink_port_register(priv);
   5666	if (err) {
   5667		mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err);
   5668		goto err_destroy_netdev;
   5669	}
   5670
   5671	err = profile->init(mdev, netdev);
   5672	if (err) {
   5673		mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err);
   5674		goto err_devlink_cleanup;
   5675	}
   5676
   5677	err = mlx5e_resume(adev);
   5678	if (err) {
   5679		mlx5_core_err(mdev, "mlx5e_resume failed, %d\n", err);
   5680		goto err_profile_cleanup;
   5681	}
   5682
   5683	err = register_netdev(netdev);
   5684	if (err) {
   5685		mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
   5686		goto err_resume;
   5687	}
   5688
   5689	mlx5e_devlink_port_type_eth_set(priv);
   5690
   5691	mlx5e_dcbnl_init_app(priv);
   5692	mlx5_uplink_netdev_set(mdev, netdev);
   5693	return 0;
   5694
   5695err_resume:
   5696	mlx5e_suspend(adev, state);
   5697err_profile_cleanup:
   5698	profile->cleanup(priv);
   5699err_devlink_cleanup:
   5700	mlx5e_devlink_port_unregister(priv);
   5701err_destroy_netdev:
   5702	mlx5e_destroy_netdev(priv);
   5703	return err;
   5704}
   5705
   5706static void mlx5e_remove(struct auxiliary_device *adev)
   5707{
   5708	struct mlx5e_priv *priv = auxiliary_get_drvdata(adev);
   5709	pm_message_t state = {};
   5710
   5711	mlx5e_dcbnl_delete_app(priv);
   5712	unregister_netdev(priv->netdev);
   5713	mlx5e_suspend(adev, state);
   5714	priv->profile->cleanup(priv);
   5715	mlx5e_devlink_port_unregister(priv);
   5716	mlx5e_destroy_netdev(priv);
   5717}
   5718
   5719static const struct auxiliary_device_id mlx5e_id_table[] = {
   5720	{ .name = MLX5_ADEV_NAME ".eth", },
   5721	{},
   5722};
   5723
   5724MODULE_DEVICE_TABLE(auxiliary, mlx5e_id_table);
   5725
   5726static struct auxiliary_driver mlx5e_driver = {
   5727	.name = "eth",
   5728	.probe = mlx5e_probe,
   5729	.remove = mlx5e_remove,
   5730	.suspend = mlx5e_suspend,
   5731	.resume = mlx5e_resume,
   5732	.id_table = mlx5e_id_table,
   5733};
   5734
   5735int mlx5e_init(void)
   5736{
   5737	int ret;
   5738
   5739	mlx5e_build_ptys2ethtool_map();
   5740	ret = auxiliary_driver_register(&mlx5e_driver);
   5741	if (ret)
   5742		return ret;
   5743
   5744	ret = mlx5e_rep_init();
   5745	if (ret)
   5746		auxiliary_driver_unregister(&mlx5e_driver);
   5747	return ret;
   5748}
   5749
   5750void mlx5e_cleanup(void)
   5751{
   5752	mlx5e_rep_cleanup();
   5753	auxiliary_driver_unregister(&mlx5e_driver);
   5754}