cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

mlx5_vnet.c (85269B)


      1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
      2/* Copyright (c) 2020 Mellanox Technologies Ltd. */
      3
      4#include <linux/module.h>
      5#include <linux/vdpa.h>
      6#include <linux/vringh.h>
      7#include <uapi/linux/virtio_net.h>
      8#include <uapi/linux/virtio_ids.h>
      9#include <uapi/linux/vdpa.h>
     10#include <linux/virtio_config.h>
     11#include <linux/auxiliary_bus.h>
     12#include <linux/mlx5/cq.h>
     13#include <linux/mlx5/qp.h>
     14#include <linux/mlx5/device.h>
     15#include <linux/mlx5/driver.h>
     16#include <linux/mlx5/vport.h>
     17#include <linux/mlx5/fs.h>
     18#include <linux/mlx5/mlx5_ifc_vdpa.h>
     19#include <linux/mlx5/mpfs.h>
     20#include "mlx5_vdpa.h"
     21
     22MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
     23MODULE_DESCRIPTION("Mellanox VDPA driver");
     24MODULE_LICENSE("Dual BSD/GPL");
     25
     26#define to_mlx5_vdpa_ndev(__mvdev)                                             \
     27	container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
     28#define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
     29
     30#define VALID_FEATURES_MASK                                                                        \
     31	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
     32	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
     33	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
     34	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
     35	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
     36	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
     37	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
     38	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
     39	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
     40	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
     41	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
     42	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
     43	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
     44
     45#define VALID_STATUS_MASK                                                                          \
     46	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
     47	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
     48
     49#define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
     50
     51#define MLX5V_UNTAGGED 0x1000
     52
     53struct mlx5_vdpa_net_resources {
     54	u32 tisn;
     55	u32 tdn;
     56	u32 tirn;
     57	u32 rqtn;
     58	bool valid;
     59};
     60
     61struct mlx5_vdpa_cq_buf {
     62	struct mlx5_frag_buf_ctrl fbc;
     63	struct mlx5_frag_buf frag_buf;
     64	int cqe_size;
     65	int nent;
     66};
     67
     68struct mlx5_vdpa_cq {
     69	struct mlx5_core_cq mcq;
     70	struct mlx5_vdpa_cq_buf buf;
     71	struct mlx5_db db;
     72	int cqe;
     73};
     74
     75struct mlx5_vdpa_umem {
     76	struct mlx5_frag_buf_ctrl fbc;
     77	struct mlx5_frag_buf frag_buf;
     78	int size;
     79	u32 id;
     80};
     81
     82struct mlx5_vdpa_qp {
     83	struct mlx5_core_qp mqp;
     84	struct mlx5_frag_buf frag_buf;
     85	struct mlx5_db db;
     86	u16 head;
     87	bool fw;
     88};
     89
     90struct mlx5_vq_restore_info {
     91	u32 num_ent;
     92	u64 desc_addr;
     93	u64 device_addr;
     94	u64 driver_addr;
     95	u16 avail_index;
     96	u16 used_index;
     97	bool ready;
     98	bool restore;
     99};
    100
    101struct mlx5_vdpa_virtqueue {
    102	bool ready;
    103	u64 desc_addr;
    104	u64 device_addr;
    105	u64 driver_addr;
    106	u32 num_ent;
    107
    108	/* Resources for implementing the notification channel from the device
    109	 * to the driver. fwqp is the firmware end of an RC connection; the
    110	 * other end is vqqp used by the driver. cq is where completions are
    111	 * reported.
    112	 */
    113	struct mlx5_vdpa_cq cq;
    114	struct mlx5_vdpa_qp fwqp;
    115	struct mlx5_vdpa_qp vqqp;
    116
    117	/* umem resources are required for the virtqueue operation. They're use
    118	 * is internal and they must be provided by the driver.
    119	 */
    120	struct mlx5_vdpa_umem umem1;
    121	struct mlx5_vdpa_umem umem2;
    122	struct mlx5_vdpa_umem umem3;
    123
    124	u32 counter_set_id;
    125	bool initialized;
    126	int index;
    127	u32 virtq_id;
    128	struct mlx5_vdpa_net *ndev;
    129	u16 avail_idx;
    130	u16 used_idx;
    131	int fw_state;
    132
    133	/* keep last in the struct */
    134	struct mlx5_vq_restore_info ri;
    135};
    136
    137static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
    138{
    139	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
    140		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
    141			return idx < 2;
    142		else
    143			return idx < 3;
    144	}
    145
    146	return idx <= mvdev->max_idx;
    147}
    148
    149#define MLX5V_MACVLAN_SIZE 256
    150
    151struct mlx5_vdpa_net {
    152	struct mlx5_vdpa_dev mvdev;
    153	struct mlx5_vdpa_net_resources res;
    154	struct virtio_net_config config;
    155	struct mlx5_vdpa_virtqueue *vqs;
    156	struct vdpa_callback *event_cbs;
    157
    158	/* Serialize vq resources creation and destruction. This is required
    159	 * since memory map might change and we need to destroy and create
    160	 * resources while driver in operational.
    161	 */
    162	struct rw_semaphore reslock;
    163	struct mlx5_flow_table *rxft;
    164	bool setup;
    165	u32 cur_num_vqs;
    166	u32 rqt_size;
    167	struct notifier_block nb;
    168	struct vdpa_callback config_cb;
    169	struct mlx5_vdpa_wq_ent cvq_ent;
    170	struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
    171};
    172
    173struct macvlan_node {
    174	struct hlist_node hlist;
    175	struct mlx5_flow_handle *ucast_rule;
    176	struct mlx5_flow_handle *mcast_rule;
    177	u64 macvlan;
    178};
    179
    180static void free_resources(struct mlx5_vdpa_net *ndev);
    181static void init_mvqs(struct mlx5_vdpa_net *ndev);
    182static int setup_driver(struct mlx5_vdpa_dev *mvdev);
    183static void teardown_driver(struct mlx5_vdpa_net *ndev);
    184
    185static bool mlx5_vdpa_debug;
    186
    187#define MLX5_CVQ_MAX_ENT 16
    188
    189#define MLX5_LOG_VIO_FLAG(_feature)                                                                \
    190	do {                                                                                       \
    191		if (features & BIT_ULL(_feature))                                                  \
    192			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
    193	} while (0)
    194
    195#define MLX5_LOG_VIO_STAT(_status)                                                                 \
    196	do {                                                                                       \
    197		if (status & (_status))                                                            \
    198			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
    199	} while (0)
    200
    201/* TODO: cross-endian support */
    202static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
    203{
    204	return virtio_legacy_is_little_endian() ||
    205		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
    206}
    207
    208static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
    209{
    210	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
    211}
    212
    213static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
    214{
    215	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
    216}
    217
    218static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
    219{
    220	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
    221		return 2;
    222
    223	return mvdev->max_vqs;
    224}
    225
    226static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
    227{
    228	return idx == ctrl_vq_idx(mvdev);
    229}
    230
    231static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
    232{
    233	if (status & ~VALID_STATUS_MASK)
    234		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
    235			       status & ~VALID_STATUS_MASK);
    236
    237	if (!mlx5_vdpa_debug)
    238		return;
    239
    240	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
    241	if (set && !status) {
    242		mlx5_vdpa_info(mvdev, "driver resets the device\n");
    243		return;
    244	}
    245
    246	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
    247	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
    248	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
    249	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
    250	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
    251	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
    252}
    253
    254static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
    255{
    256	if (features & ~VALID_FEATURES_MASK)
    257		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
    258			       features & ~VALID_FEATURES_MASK);
    259
    260	if (!mlx5_vdpa_debug)
    261		return;
    262
    263	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
    264	if (!features)
    265		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
    266
    267	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
    268	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
    269	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
    270	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
    271	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
    272	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
    273	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
    274	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
    275	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
    276	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
    277	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
    278	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
    279	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
    280	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
    281	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
    282	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
    283	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
    284	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
    285	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
    286	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
    287	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
    288	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
    289	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
    290	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
    291	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
    292	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
    293	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
    294	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
    295	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
    296	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
    297	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
    298	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
    299	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
    300	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
    301}
    302
    303static int create_tis(struct mlx5_vdpa_net *ndev)
    304{
    305	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
    306	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
    307	void *tisc;
    308	int err;
    309
    310	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
    311	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
    312	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
    313	if (err)
    314		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
    315
    316	return err;
    317}
    318
    319static void destroy_tis(struct mlx5_vdpa_net *ndev)
    320{
    321	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
    322}
    323
    324#define MLX5_VDPA_CQE_SIZE 64
    325#define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
    326
    327static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
    328{
    329	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
    330	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
    331	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
    332	int err;
    333
    334	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
    335				       ndev->mvdev.mdev->priv.numa_node);
    336	if (err)
    337		return err;
    338
    339	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
    340
    341	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
    342	buf->nent = nent;
    343
    344	return 0;
    345}
    346
    347static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
    348{
    349	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
    350
    351	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
    352					ndev->mvdev.mdev->priv.numa_node);
    353}
    354
    355static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
    356{
    357	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
    358}
    359
    360static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
    361{
    362	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
    363}
    364
    365static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
    366{
    367	struct mlx5_cqe64 *cqe64;
    368	void *cqe;
    369	int i;
    370
    371	for (i = 0; i < buf->nent; i++) {
    372		cqe = get_cqe(vcq, i);
    373		cqe64 = cqe;
    374		cqe64->op_own = MLX5_CQE_INVALID << 4;
    375	}
    376}
    377
    378static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
    379{
    380	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
    381
    382	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
    383	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
    384		return cqe64;
    385
    386	return NULL;
    387}
    388
    389static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
    390{
    391	vqp->head += n;
    392	vqp->db.db[0] = cpu_to_be32(vqp->head);
    393}
    394
    395static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
    396		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
    397{
    398	struct mlx5_vdpa_qp *vqp;
    399	__be64 *pas;
    400	void *qpc;
    401
    402	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
    403	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
    404	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
    405	if (vqp->fw) {
    406		/* Firmware QP is allocated by the driver for the firmware's
    407		 * use so we can skip part of the params as they will be chosen by firmware
    408		 */
    409		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
    410		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
    411		MLX5_SET(qpc, qpc, no_sq, 1);
    412		return;
    413	}
    414
    415	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
    416	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
    417	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
    418	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
    419	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
    420	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
    421	MLX5_SET(qpc, qpc, no_sq, 1);
    422	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
    423	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
    424	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
    425	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
    426	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
    427}
    428
    429static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
    430{
    431	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
    432					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
    433					ndev->mvdev.mdev->priv.numa_node);
    434}
    435
    436static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
    437{
    438	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
    439}
    440
    441static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
    442		     struct mlx5_vdpa_qp *vqp)
    443{
    444	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
    445	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
    446	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
    447	void *qpc;
    448	void *in;
    449	int err;
    450
    451	if (!vqp->fw) {
    452		vqp = &mvq->vqqp;
    453		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
    454		if (err)
    455			return err;
    456
    457		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
    458		if (err)
    459			goto err_db;
    460		inlen += vqp->frag_buf.npages * sizeof(__be64);
    461	}
    462
    463	in = kzalloc(inlen, GFP_KERNEL);
    464	if (!in) {
    465		err = -ENOMEM;
    466		goto err_kzalloc;
    467	}
    468
    469	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
    470	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
    471	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
    472	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
    473	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
    474	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
    475	if (!vqp->fw)
    476		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
    477	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
    478	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
    479	kfree(in);
    480	if (err)
    481		goto err_kzalloc;
    482
    483	vqp->mqp.uid = ndev->mvdev.res.uid;
    484	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
    485
    486	if (!vqp->fw)
    487		rx_post(vqp, mvq->num_ent);
    488
    489	return 0;
    490
    491err_kzalloc:
    492	if (!vqp->fw)
    493		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
    494err_db:
    495	if (!vqp->fw)
    496		rq_buf_free(ndev, vqp);
    497
    498	return err;
    499}
    500
    501static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
    502{
    503	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
    504
    505	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
    506	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
    507	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
    508	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
    509		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
    510	if (!vqp->fw) {
    511		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
    512		rq_buf_free(ndev, vqp);
    513	}
    514}
    515
    516static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
    517{
    518	return get_sw_cqe(cq, cq->mcq.cons_index);
    519}
    520
    521static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
    522{
    523	struct mlx5_cqe64 *cqe64;
    524
    525	cqe64 = next_cqe_sw(vcq);
    526	if (!cqe64)
    527		return -EAGAIN;
    528
    529	vcq->mcq.cons_index++;
    530	return 0;
    531}
    532
    533static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
    534{
    535	struct mlx5_vdpa_net *ndev = mvq->ndev;
    536	struct vdpa_callback *event_cb;
    537
    538	event_cb = &ndev->event_cbs[mvq->index];
    539	mlx5_cq_set_ci(&mvq->cq.mcq);
    540
    541	/* make sure CQ cosumer update is visible to the hardware before updating
    542	 * RX doorbell record.
    543	 */
    544	dma_wmb();
    545	rx_post(&mvq->vqqp, num);
    546	if (event_cb->callback)
    547		event_cb->callback(event_cb->private);
    548}
    549
    550static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
    551{
    552	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
    553	struct mlx5_vdpa_net *ndev = mvq->ndev;
    554	void __iomem *uar_page = ndev->mvdev.res.uar->map;
    555	int num = 0;
    556
    557	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
    558		num++;
    559		if (num > mvq->num_ent / 2) {
    560			/* If completions keep coming while we poll, we want to
    561			 * let the hardware know that we consumed them by
    562			 * updating the doorbell record.  We also let vdpa core
    563			 * know about this so it passes it on the virtio driver
    564			 * on the guest.
    565			 */
    566			mlx5_vdpa_handle_completions(mvq, num);
    567			num = 0;
    568		}
    569	}
    570
    571	if (num)
    572		mlx5_vdpa_handle_completions(mvq, num);
    573
    574	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
    575}
    576
    577static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
    578{
    579	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
    580	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
    581	void __iomem *uar_page = ndev->mvdev.res.uar->map;
    582	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
    583	struct mlx5_vdpa_cq *vcq = &mvq->cq;
    584	__be64 *pas;
    585	int inlen;
    586	void *cqc;
    587	void *in;
    588	int err;
    589	int eqn;
    590
    591	err = mlx5_db_alloc(mdev, &vcq->db);
    592	if (err)
    593		return err;
    594
    595	vcq->mcq.set_ci_db = vcq->db.db;
    596	vcq->mcq.arm_db = vcq->db.db + 1;
    597	vcq->mcq.cqe_sz = 64;
    598
    599	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
    600	if (err)
    601		goto err_db;
    602
    603	cq_frag_buf_init(vcq, &vcq->buf);
    604
    605	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
    606		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
    607	in = kzalloc(inlen, GFP_KERNEL);
    608	if (!in) {
    609		err = -ENOMEM;
    610		goto err_vzalloc;
    611	}
    612
    613	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
    614	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
    615	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
    616
    617	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
    618	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
    619
    620	/* Use vector 0 by default. Consider adding code to choose least used
    621	 * vector.
    622	 */
    623	err = mlx5_vector2eqn(mdev, 0, &eqn);
    624	if (err)
    625		goto err_vec;
    626
    627	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
    628	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
    629	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
    630	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
    631	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
    632
    633	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
    634	if (err)
    635		goto err_vec;
    636
    637	vcq->mcq.comp = mlx5_vdpa_cq_comp;
    638	vcq->cqe = num_ent;
    639	vcq->mcq.set_ci_db = vcq->db.db;
    640	vcq->mcq.arm_db = vcq->db.db + 1;
    641	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
    642	kfree(in);
    643	return 0;
    644
    645err_vec:
    646	kfree(in);
    647err_vzalloc:
    648	cq_frag_buf_free(ndev, &vcq->buf);
    649err_db:
    650	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
    651	return err;
    652}
    653
    654static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
    655{
    656	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
    657	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
    658	struct mlx5_vdpa_cq *vcq = &mvq->cq;
    659
    660	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
    661		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
    662		return;
    663	}
    664	cq_frag_buf_free(ndev, &vcq->buf);
    665	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
    666}
    667
    668static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
    669			  struct mlx5_vdpa_umem **umemp)
    670{
    671	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
    672	int p_a;
    673	int p_b;
    674
    675	switch (num) {
    676	case 1:
    677		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
    678		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
    679		*umemp = &mvq->umem1;
    680		break;
    681	case 2:
    682		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
    683		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
    684		*umemp = &mvq->umem2;
    685		break;
    686	case 3:
    687		p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
    688		p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
    689		*umemp = &mvq->umem3;
    690		break;
    691	}
    692	(*umemp)->size = p_a * mvq->num_ent + p_b;
    693}
    694
    695static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
    696{
    697	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
    698}
    699
    700static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
    701{
    702	int inlen;
    703	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
    704	void *um;
    705	void *in;
    706	int err;
    707	__be64 *pas;
    708	struct mlx5_vdpa_umem *umem;
    709
    710	set_umem_size(ndev, mvq, num, &umem);
    711	err = umem_frag_buf_alloc(ndev, umem, umem->size);
    712	if (err)
    713		return err;
    714
    715	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
    716
    717	in = kzalloc(inlen, GFP_KERNEL);
    718	if (!in) {
    719		err = -ENOMEM;
    720		goto err_in;
    721	}
    722
    723	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
    724	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
    725	um = MLX5_ADDR_OF(create_umem_in, in, umem);
    726	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
    727	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
    728
    729	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
    730	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
    731
    732	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
    733	if (err) {
    734		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
    735		goto err_cmd;
    736	}
    737
    738	kfree(in);
    739	umem->id = MLX5_GET(create_umem_out, out, umem_id);
    740
    741	return 0;
    742
    743err_cmd:
    744	kfree(in);
    745err_in:
    746	umem_frag_buf_free(ndev, umem);
    747	return err;
    748}
    749
    750static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
    751{
    752	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
    753	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
    754	struct mlx5_vdpa_umem *umem;
    755
    756	switch (num) {
    757	case 1:
    758		umem = &mvq->umem1;
    759		break;
    760	case 2:
    761		umem = &mvq->umem2;
    762		break;
    763	case 3:
    764		umem = &mvq->umem3;
    765		break;
    766	}
    767
    768	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
    769	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
    770	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
    771		return;
    772
    773	umem_frag_buf_free(ndev, umem);
    774}
    775
    776static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
    777{
    778	int num;
    779	int err;
    780
    781	for (num = 1; num <= 3; num++) {
    782		err = create_umem(ndev, mvq, num);
    783		if (err)
    784			goto err_umem;
    785	}
    786	return 0;
    787
    788err_umem:
    789	for (num--; num > 0; num--)
    790		umem_destroy(ndev, mvq, num);
    791
    792	return err;
    793}
    794
    795static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
    796{
    797	int num;
    798
    799	for (num = 3; num > 0; num--)
    800		umem_destroy(ndev, mvq, num);
    801}
    802
    803static int get_queue_type(struct mlx5_vdpa_net *ndev)
    804{
    805	u32 type_mask;
    806
    807	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
    808
    809	/* prefer split queue */
    810	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
    811		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
    812
    813	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
    814
    815	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
    816}
    817
    818static bool vq_is_tx(u16 idx)
    819{
    820	return idx % 2;
    821}
    822
    823static u16 get_features_12_3(u64 features)
    824{
    825	return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
    826	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
    827	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
    828	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
    829}
    830
    831static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
    832{
    833	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
    834	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
    835}
    836
    837static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
    838{
    839	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
    840	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
    841	void *obj_context;
    842	void *cmd_hdr;
    843	void *vq_ctx;
    844	void *in;
    845	int err;
    846
    847	err = umems_create(ndev, mvq);
    848	if (err)
    849		return err;
    850
    851	in = kzalloc(inlen, GFP_KERNEL);
    852	if (!in) {
    853		err = -ENOMEM;
    854		goto err_alloc;
    855	}
    856
    857	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
    858
    859	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
    860	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
    861	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
    862
    863	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
    864	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
    865	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
    866	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
    867		 get_features_12_3(ndev->mvdev.actual_features));
    868	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
    869	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
    870
    871	if (vq_is_tx(mvq->index))
    872		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
    873
    874	MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
    875	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
    876	MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
    877	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
    878	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
    879		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
    880	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
    881	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
    882	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
    883	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
    884	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
    885	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
    886	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
    887	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
    888	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
    889	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
    890	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
    891	if (counters_supported(&ndev->mvdev))
    892		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
    893
    894	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
    895	if (err)
    896		goto err_cmd;
    897
    898	kfree(in);
    899	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
    900
    901	return 0;
    902
    903err_cmd:
    904	kfree(in);
    905err_alloc:
    906	umems_destroy(ndev, mvq);
    907	return err;
    908}
    909
    910static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
    911{
    912	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
    913	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
    914
    915	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
    916		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
    917	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
    918	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
    919	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
    920		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
    921	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
    922		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
    923		return;
    924	}
    925	umems_destroy(ndev, mvq);
    926}
    927
    928static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
    929{
    930	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
    931}
    932
    933static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
    934{
    935	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
    936}
    937
    938static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
    939			int *outlen, u32 qpn, u32 rqpn)
    940{
    941	void *qpc;
    942	void *pp;
    943
    944	switch (cmd) {
    945	case MLX5_CMD_OP_2RST_QP:
    946		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
    947		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
    948		*in = kzalloc(*inlen, GFP_KERNEL);
    949		*out = kzalloc(*outlen, GFP_KERNEL);
    950		if (!*in || !*out)
    951			goto outerr;
    952
    953		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
    954		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
    955		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
    956		break;
    957	case MLX5_CMD_OP_RST2INIT_QP:
    958		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
    959		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
    960		*in = kzalloc(*inlen, GFP_KERNEL);
    961		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
    962		if (!*in || !*out)
    963			goto outerr;
    964
    965		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
    966		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
    967		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
    968		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
    969		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
    970		MLX5_SET(qpc, qpc, rwe, 1);
    971		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
    972		MLX5_SET(ads, pp, vhca_port_num, 1);
    973		break;
    974	case MLX5_CMD_OP_INIT2RTR_QP:
    975		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
    976		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
    977		*in = kzalloc(*inlen, GFP_KERNEL);
    978		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
    979		if (!*in || !*out)
    980			goto outerr;
    981
    982		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
    983		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
    984		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
    985		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
    986		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
    987		MLX5_SET(qpc, qpc, log_msg_max, 30);
    988		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
    989		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
    990		MLX5_SET(ads, pp, fl, 1);
    991		break;
    992	case MLX5_CMD_OP_RTR2RTS_QP:
    993		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
    994		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
    995		*in = kzalloc(*inlen, GFP_KERNEL);
    996		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
    997		if (!*in || !*out)
    998			goto outerr;
    999
   1000		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
   1001		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
   1002		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
   1003		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
   1004		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
   1005		MLX5_SET(ads, pp, ack_timeout, 14);
   1006		MLX5_SET(qpc, qpc, retry_count, 7);
   1007		MLX5_SET(qpc, qpc, rnr_retry, 7);
   1008		break;
   1009	default:
   1010		goto outerr_nullify;
   1011	}
   1012
   1013	return;
   1014
   1015outerr:
   1016	kfree(*in);
   1017	kfree(*out);
   1018outerr_nullify:
   1019	*in = NULL;
   1020	*out = NULL;
   1021}
   1022
   1023static void free_inout(void *in, void *out)
   1024{
   1025	kfree(in);
   1026	kfree(out);
   1027}
   1028
   1029/* Two QPs are used by each virtqueue. One is used by the driver and one by
   1030 * firmware. The fw argument indicates whether the subjected QP is the one used
   1031 * by firmware.
   1032 */
   1033static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
   1034{
   1035	int outlen;
   1036	int inlen;
   1037	void *out;
   1038	void *in;
   1039	int err;
   1040
   1041	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
   1042	if (!in || !out)
   1043		return -ENOMEM;
   1044
   1045	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
   1046	free_inout(in, out);
   1047	return err;
   1048}
   1049
   1050static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
   1051{
   1052	int err;
   1053
   1054	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
   1055	if (err)
   1056		return err;
   1057
   1058	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
   1059	if (err)
   1060		return err;
   1061
   1062	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
   1063	if (err)
   1064		return err;
   1065
   1066	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
   1067	if (err)
   1068		return err;
   1069
   1070	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
   1071	if (err)
   1072		return err;
   1073
   1074	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
   1075	if (err)
   1076		return err;
   1077
   1078	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
   1079}
   1080
   1081struct mlx5_virtq_attr {
   1082	u8 state;
   1083	u16 available_index;
   1084	u16 used_index;
   1085};
   1086
   1087static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
   1088			   struct mlx5_virtq_attr *attr)
   1089{
   1090	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
   1091	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
   1092	void *out;
   1093	void *obj_context;
   1094	void *cmd_hdr;
   1095	int err;
   1096
   1097	out = kzalloc(outlen, GFP_KERNEL);
   1098	if (!out)
   1099		return -ENOMEM;
   1100
   1101	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
   1102
   1103	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
   1104	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
   1105	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
   1106	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
   1107	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
   1108	if (err)
   1109		goto err_cmd;
   1110
   1111	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
   1112	memset(attr, 0, sizeof(*attr));
   1113	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
   1114	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
   1115	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
   1116	kfree(out);
   1117	return 0;
   1118
   1119err_cmd:
   1120	kfree(out);
   1121	return err;
   1122}
   1123
   1124static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
   1125{
   1126	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
   1127	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
   1128	void *obj_context;
   1129	void *cmd_hdr;
   1130	void *in;
   1131	int err;
   1132
   1133	in = kzalloc(inlen, GFP_KERNEL);
   1134	if (!in)
   1135		return -ENOMEM;
   1136
   1137	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
   1138
   1139	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
   1140	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
   1141	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
   1142	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
   1143
   1144	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
   1145	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
   1146		   MLX5_VIRTQ_MODIFY_MASK_STATE);
   1147	MLX5_SET(virtio_net_q_object, obj_context, state, state);
   1148	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
   1149	kfree(in);
   1150	if (!err)
   1151		mvq->fw_state = state;
   1152
   1153	return err;
   1154}
   1155
   1156static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
   1157{
   1158	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
   1159	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
   1160	void *cmd_hdr;
   1161	int err;
   1162
   1163	if (!counters_supported(&ndev->mvdev))
   1164		return 0;
   1165
   1166	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
   1167
   1168	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
   1169	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
   1170	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
   1171
   1172	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
   1173	if (err)
   1174		return err;
   1175
   1176	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
   1177
   1178	return 0;
   1179}
   1180
   1181static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
   1182{
   1183	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
   1184	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
   1185
   1186	if (!counters_supported(&ndev->mvdev))
   1187		return;
   1188
   1189	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
   1190	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
   1191	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
   1192	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
   1193	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
   1194		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
   1195}
   1196
   1197static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
   1198{
   1199	u16 idx = mvq->index;
   1200	int err;
   1201
   1202	if (!mvq->num_ent)
   1203		return 0;
   1204
   1205	if (mvq->initialized)
   1206		return 0;
   1207
   1208	err = cq_create(ndev, idx, mvq->num_ent);
   1209	if (err)
   1210		return err;
   1211
   1212	err = qp_create(ndev, mvq, &mvq->fwqp);
   1213	if (err)
   1214		goto err_fwqp;
   1215
   1216	err = qp_create(ndev, mvq, &mvq->vqqp);
   1217	if (err)
   1218		goto err_vqqp;
   1219
   1220	err = connect_qps(ndev, mvq);
   1221	if (err)
   1222		goto err_connect;
   1223
   1224	err = counter_set_alloc(ndev, mvq);
   1225	if (err)
   1226		goto err_counter;
   1227
   1228	err = create_virtqueue(ndev, mvq);
   1229	if (err)
   1230		goto err_connect;
   1231
   1232	if (mvq->ready) {
   1233		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
   1234		if (err) {
   1235			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
   1236				       idx, err);
   1237			goto err_connect;
   1238		}
   1239	}
   1240
   1241	mvq->initialized = true;
   1242	return 0;
   1243
   1244err_connect:
   1245	counter_set_dealloc(ndev, mvq);
   1246err_counter:
   1247	qp_destroy(ndev, &mvq->vqqp);
   1248err_vqqp:
   1249	qp_destroy(ndev, &mvq->fwqp);
   1250err_fwqp:
   1251	cq_destroy(ndev, idx);
   1252	return err;
   1253}
   1254
   1255static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
   1256{
   1257	struct mlx5_virtq_attr attr;
   1258
   1259	if (!mvq->initialized)
   1260		return;
   1261
   1262	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
   1263		return;
   1264
   1265	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
   1266		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
   1267
   1268	if (query_virtqueue(ndev, mvq, &attr)) {
   1269		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
   1270		return;
   1271	}
   1272	mvq->avail_idx = attr.available_index;
   1273	mvq->used_idx = attr.used_index;
   1274}
   1275
   1276static void suspend_vqs(struct mlx5_vdpa_net *ndev)
   1277{
   1278	int i;
   1279
   1280	for (i = 0; i < ndev->mvdev.max_vqs; i++)
   1281		suspend_vq(ndev, &ndev->vqs[i]);
   1282}
   1283
   1284static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
   1285{
   1286	if (!mvq->initialized)
   1287		return;
   1288
   1289	suspend_vq(ndev, mvq);
   1290	destroy_virtqueue(ndev, mvq);
   1291	counter_set_dealloc(ndev, mvq);
   1292	qp_destroy(ndev, &mvq->vqqp);
   1293	qp_destroy(ndev, &mvq->fwqp);
   1294	cq_destroy(ndev, mvq->index);
   1295	mvq->initialized = false;
   1296}
   1297
   1298static int create_rqt(struct mlx5_vdpa_net *ndev)
   1299{
   1300	__be32 *list;
   1301	void *rqtc;
   1302	int inlen;
   1303	void *in;
   1304	int i, j;
   1305	int err;
   1306
   1307	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num);
   1308	in = kzalloc(inlen, GFP_KERNEL);
   1309	if (!in)
   1310		return -ENOMEM;
   1311
   1312	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
   1313	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
   1314
   1315	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
   1316	MLX5_SET(rqtc, rqtc, rqt_max_size, ndev->rqt_size);
   1317	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
   1318	for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2)
   1319		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
   1320
   1321	MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size);
   1322	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
   1323	kfree(in);
   1324	if (err)
   1325		return err;
   1326
   1327	return 0;
   1328}
   1329
   1330#define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
   1331
   1332static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
   1333{
   1334	__be32 *list;
   1335	void *rqtc;
   1336	int inlen;
   1337	void *in;
   1338	int i, j;
   1339	int err;
   1340
   1341	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + ndev->rqt_size * MLX5_ST_SZ_BYTES(rq_num);
   1342	in = kzalloc(inlen, GFP_KERNEL);
   1343	if (!in)
   1344		return -ENOMEM;
   1345
   1346	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
   1347	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
   1348	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
   1349	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
   1350
   1351	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
   1352	for (i = 0, j = 0; i < ndev->rqt_size; i++, j += 2)
   1353		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
   1354
   1355	MLX5_SET(rqtc, rqtc, rqt_actual_size, ndev->rqt_size);
   1356	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
   1357	kfree(in);
   1358	if (err)
   1359		return err;
   1360
   1361	return 0;
   1362}
   1363
   1364static void destroy_rqt(struct mlx5_vdpa_net *ndev)
   1365{
   1366	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
   1367}
   1368
   1369static int create_tir(struct mlx5_vdpa_net *ndev)
   1370{
   1371#define HASH_IP_L4PORTS                                                                            \
   1372	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
   1373	 MLX5_HASH_FIELD_SEL_L4_DPORT)
   1374	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
   1375						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
   1376						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
   1377						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
   1378						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
   1379	void *rss_key;
   1380	void *outer;
   1381	void *tirc;
   1382	void *in;
   1383	int err;
   1384
   1385	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
   1386	if (!in)
   1387		return -ENOMEM;
   1388
   1389	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
   1390	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
   1391	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
   1392
   1393	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
   1394	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
   1395	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
   1396	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
   1397
   1398	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
   1399	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
   1400	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
   1401	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
   1402
   1403	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
   1404	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
   1405
   1406	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
   1407	kfree(in);
   1408	return err;
   1409}
   1410
   1411static void destroy_tir(struct mlx5_vdpa_net *ndev)
   1412{
   1413	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
   1414}
   1415
   1416#define MAX_STEERING_ENT 0x8000
   1417#define MAX_STEERING_GROUPS 2
   1418
   1419static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
   1420					u16 vid, bool tagged,
   1421					struct mlx5_flow_handle **ucast,
   1422					struct mlx5_flow_handle **mcast)
   1423{
   1424	struct mlx5_flow_destination dest = {};
   1425	struct mlx5_flow_act flow_act = {};
   1426	struct mlx5_flow_handle *rule;
   1427	struct mlx5_flow_spec *spec;
   1428	void *headers_c;
   1429	void *headers_v;
   1430	u8 *dmac_c;
   1431	u8 *dmac_v;
   1432	int err;
   1433
   1434	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
   1435	if (!spec)
   1436		return -ENOMEM;
   1437
   1438	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
   1439	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
   1440	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
   1441	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
   1442	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
   1443	memset(dmac_c, 0xff, ETH_ALEN);
   1444	ether_addr_copy(dmac_v, mac);
   1445	MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
   1446	if (tagged) {
   1447		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
   1448		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
   1449		MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, vid);
   1450	}
   1451	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
   1452	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
   1453	dest.tir_num = ndev->res.tirn;
   1454	rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1);
   1455	if (IS_ERR(rule))
   1456		return PTR_ERR(rule);
   1457
   1458	*ucast = rule;
   1459
   1460	memset(dmac_c, 0, ETH_ALEN);
   1461	memset(dmac_v, 0, ETH_ALEN);
   1462	dmac_c[0] = 1;
   1463	dmac_v[0] = 1;
   1464	rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1);
   1465	kvfree(spec);
   1466	if (IS_ERR(rule)) {
   1467		err = PTR_ERR(rule);
   1468		goto err_mcast;
   1469	}
   1470
   1471	*mcast = rule;
   1472	return 0;
   1473
   1474err_mcast:
   1475	mlx5_del_flow_rules(*ucast);
   1476	return err;
   1477}
   1478
   1479static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
   1480					 struct mlx5_flow_handle *ucast,
   1481					 struct mlx5_flow_handle *mcast)
   1482{
   1483	mlx5_del_flow_rules(ucast);
   1484	mlx5_del_flow_rules(mcast);
   1485}
   1486
   1487static u64 search_val(u8 *mac, u16 vlan, bool tagged)
   1488{
   1489	u64 val;
   1490
   1491	if (!tagged)
   1492		vlan = MLX5V_UNTAGGED;
   1493
   1494	val = (u64)vlan << 48 |
   1495	      (u64)mac[0] << 40 |
   1496	      (u64)mac[1] << 32 |
   1497	      (u64)mac[2] << 24 |
   1498	      (u64)mac[3] << 16 |
   1499	      (u64)mac[4] << 8 |
   1500	      (u64)mac[5];
   1501
   1502	return val;
   1503}
   1504
   1505static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
   1506{
   1507	struct macvlan_node *pos;
   1508	u32 idx;
   1509
   1510	idx = hash_64(value, 8); // tbd 8
   1511	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
   1512		if (pos->macvlan == value)
   1513			return pos;
   1514	}
   1515	return NULL;
   1516}
   1517
   1518static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) // vlan -> vid
   1519{
   1520	struct macvlan_node *ptr;
   1521	u64 val;
   1522	u32 idx;
   1523	int err;
   1524
   1525	val = search_val(mac, vlan, tagged);
   1526	if (mac_vlan_lookup(ndev, val))
   1527		return -EEXIST;
   1528
   1529	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
   1530	if (!ptr)
   1531		return -ENOMEM;
   1532
   1533	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, vlan, tagged,
   1534					   &ptr->ucast_rule, &ptr->mcast_rule);
   1535	if (err)
   1536		goto err_add;
   1537
   1538	ptr->macvlan = val;
   1539	idx = hash_64(val, 8);
   1540	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
   1541	return 0;
   1542
   1543err_add:
   1544	kfree(ptr);
   1545	return err;
   1546}
   1547
   1548static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
   1549{
   1550	struct macvlan_node *ptr;
   1551
   1552	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
   1553	if (!ptr)
   1554		return;
   1555
   1556	hlist_del(&ptr->hlist);
   1557	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr->ucast_rule, ptr->mcast_rule);
   1558	kfree(ptr);
   1559}
   1560
   1561static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
   1562{
   1563	struct macvlan_node *pos;
   1564	struct hlist_node *n;
   1565	int i;
   1566
   1567	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
   1568		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
   1569			hlist_del(&pos->hlist);
   1570			mlx5_vdpa_del_mac_vlan_rules(ndev, pos->ucast_rule, pos->mcast_rule);
   1571			kfree(pos);
   1572		}
   1573	}
   1574}
   1575
   1576static int setup_steering(struct mlx5_vdpa_net *ndev)
   1577{
   1578	struct mlx5_flow_table_attr ft_attr = {};
   1579	struct mlx5_flow_namespace *ns;
   1580	int err;
   1581
   1582	ft_attr.max_fte = MAX_STEERING_ENT;
   1583	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
   1584
   1585	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
   1586	if (!ns) {
   1587		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
   1588		return -EOPNOTSUPP;
   1589	}
   1590
   1591	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
   1592	if (IS_ERR(ndev->rxft)) {
   1593		mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
   1594		return PTR_ERR(ndev->rxft);
   1595	}
   1596
   1597	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
   1598	if (err)
   1599		goto err_add;
   1600
   1601	return 0;
   1602
   1603err_add:
   1604	mlx5_destroy_flow_table(ndev->rxft);
   1605	return err;
   1606}
   1607
   1608static void teardown_steering(struct mlx5_vdpa_net *ndev)
   1609{
   1610	clear_mac_vlan_table(ndev);
   1611	mlx5_destroy_flow_table(ndev->rxft);
   1612}
   1613
   1614static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
   1615{
   1616	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   1617	struct mlx5_control_vq *cvq = &mvdev->cvq;
   1618	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
   1619	struct mlx5_core_dev *pfmdev;
   1620	size_t read;
   1621	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
   1622
   1623	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
   1624	switch (cmd) {
   1625	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
   1626		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
   1627		if (read != ETH_ALEN)
   1628			break;
   1629
   1630		if (!memcmp(ndev->config.mac, mac, 6)) {
   1631			status = VIRTIO_NET_OK;
   1632			break;
   1633		}
   1634
   1635		if (is_zero_ether_addr(mac))
   1636			break;
   1637
   1638		if (!is_zero_ether_addr(ndev->config.mac)) {
   1639			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
   1640				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
   1641					       ndev->config.mac);
   1642				break;
   1643			}
   1644		}
   1645
   1646		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
   1647			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
   1648				       mac);
   1649			break;
   1650		}
   1651
   1652		/* backup the original mac address so that if failed to add the forward rules
   1653		 * we could restore it
   1654		 */
   1655		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
   1656
   1657		memcpy(ndev->config.mac, mac, ETH_ALEN);
   1658
   1659		/* Need recreate the flow table entry, so that the packet could forward back
   1660		 */
   1661		mac_vlan_del(ndev, ndev->config.mac, 0, false);
   1662
   1663		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
   1664			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
   1665
   1666			/* Although it hardly run here, we still need double check */
   1667			if (is_zero_ether_addr(mac_back)) {
   1668				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
   1669				break;
   1670			}
   1671
   1672			/* Try to restore original mac address to MFPS table, and try to restore
   1673			 * the forward rule entry.
   1674			 */
   1675			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
   1676				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
   1677					       ndev->config.mac);
   1678			}
   1679
   1680			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
   1681				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
   1682					       mac_back);
   1683			}
   1684
   1685			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
   1686
   1687			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
   1688				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
   1689
   1690			break;
   1691		}
   1692
   1693		status = VIRTIO_NET_OK;
   1694		break;
   1695
   1696	default:
   1697		break;
   1698	}
   1699
   1700	return status;
   1701}
   1702
   1703static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
   1704{
   1705	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   1706	int cur_qps = ndev->cur_num_vqs / 2;
   1707	int err;
   1708	int i;
   1709
   1710	if (cur_qps > newqps) {
   1711		err = modify_rqt(ndev, 2 * newqps);
   1712		if (err)
   1713			return err;
   1714
   1715		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
   1716			teardown_vq(ndev, &ndev->vqs[i]);
   1717
   1718		ndev->cur_num_vqs = 2 * newqps;
   1719	} else {
   1720		ndev->cur_num_vqs = 2 * newqps;
   1721		for (i = cur_qps * 2; i < 2 * newqps; i++) {
   1722			err = setup_vq(ndev, &ndev->vqs[i]);
   1723			if (err)
   1724				goto clean_added;
   1725		}
   1726		err = modify_rqt(ndev, 2 * newqps);
   1727		if (err)
   1728			goto clean_added;
   1729	}
   1730	return 0;
   1731
   1732clean_added:
   1733	for (--i; i >= 2 * cur_qps; --i)
   1734		teardown_vq(ndev, &ndev->vqs[i]);
   1735
   1736	ndev->cur_num_vqs = 2 * cur_qps;
   1737
   1738	return err;
   1739}
   1740
   1741static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
   1742{
   1743	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   1744	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
   1745	struct mlx5_control_vq *cvq = &mvdev->cvq;
   1746	struct virtio_net_ctrl_mq mq;
   1747	size_t read;
   1748	u16 newqps;
   1749
   1750	switch (cmd) {
   1751	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
   1752		/* This mq feature check aligns with pre-existing userspace
   1753		 * implementation.
   1754		 *
   1755		 * Without it, an untrusted driver could fake a multiqueue config
   1756		 * request down to a non-mq device that may cause kernel to
   1757		 * panic due to uninitialized resources for extra vqs. Even with
   1758		 * a well behaving guest driver, it is not expected to allow
   1759		 * changing the number of vqs on a non-mq device.
   1760		 */
   1761		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
   1762			break;
   1763
   1764		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
   1765		if (read != sizeof(mq))
   1766			break;
   1767
   1768		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
   1769		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
   1770		    newqps > ndev->rqt_size)
   1771			break;
   1772
   1773		if (ndev->cur_num_vqs == 2 * newqps) {
   1774			status = VIRTIO_NET_OK;
   1775			break;
   1776		}
   1777
   1778		if (!change_num_qps(mvdev, newqps))
   1779			status = VIRTIO_NET_OK;
   1780
   1781		break;
   1782	default:
   1783		break;
   1784	}
   1785
   1786	return status;
   1787}
   1788
   1789static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
   1790{
   1791	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   1792	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
   1793	struct mlx5_control_vq *cvq = &mvdev->cvq;
   1794	__virtio16 vlan;
   1795	size_t read;
   1796	u16 id;
   1797
   1798	switch (cmd) {
   1799	case VIRTIO_NET_CTRL_VLAN_ADD:
   1800		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
   1801		if (read != sizeof(vlan))
   1802			break;
   1803
   1804		id = mlx5vdpa16_to_cpu(mvdev, vlan);
   1805		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
   1806			break;
   1807
   1808		status = VIRTIO_NET_OK;
   1809		break;
   1810	case VIRTIO_NET_CTRL_VLAN_DEL:
   1811		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
   1812		if (read != sizeof(vlan))
   1813			break;
   1814
   1815		id = mlx5vdpa16_to_cpu(mvdev, vlan);
   1816		mac_vlan_del(ndev, ndev->config.mac, id, true);
   1817		status = VIRTIO_NET_OK;
   1818		break;
   1819	default:
   1820		break;
   1821	}
   1822
   1823	return status;
   1824}
   1825
   1826static void mlx5_cvq_kick_handler(struct work_struct *work)
   1827{
   1828	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
   1829	struct virtio_net_ctrl_hdr ctrl;
   1830	struct mlx5_vdpa_wq_ent *wqent;
   1831	struct mlx5_vdpa_dev *mvdev;
   1832	struct mlx5_control_vq *cvq;
   1833	struct mlx5_vdpa_net *ndev;
   1834	size_t read, write;
   1835	int err;
   1836
   1837	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
   1838	mvdev = wqent->mvdev;
   1839	ndev = to_mlx5_vdpa_ndev(mvdev);
   1840	cvq = &mvdev->cvq;
   1841
   1842	down_write(&ndev->reslock);
   1843
   1844	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
   1845		goto out;
   1846
   1847	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
   1848		goto out;
   1849
   1850	if (!cvq->ready)
   1851		goto out;
   1852
   1853	while (true) {
   1854		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
   1855					   GFP_ATOMIC);
   1856		if (err <= 0)
   1857			break;
   1858
   1859		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
   1860		if (read != sizeof(ctrl))
   1861			break;
   1862
   1863		cvq->received_desc++;
   1864		switch (ctrl.class) {
   1865		case VIRTIO_NET_CTRL_MAC:
   1866			status = handle_ctrl_mac(mvdev, ctrl.cmd);
   1867			break;
   1868		case VIRTIO_NET_CTRL_MQ:
   1869			status = handle_ctrl_mq(mvdev, ctrl.cmd);
   1870			break;
   1871		case VIRTIO_NET_CTRL_VLAN:
   1872			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
   1873			break;
   1874		default:
   1875			break;
   1876		}
   1877
   1878		/* Make sure data is written before advancing index */
   1879		smp_wmb();
   1880
   1881		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
   1882		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
   1883		vringh_kiov_cleanup(&cvq->riov);
   1884		vringh_kiov_cleanup(&cvq->wiov);
   1885
   1886		if (vringh_need_notify_iotlb(&cvq->vring))
   1887			vringh_notify(&cvq->vring);
   1888
   1889		cvq->completed_desc++;
   1890		queue_work(mvdev->wq, &wqent->work);
   1891		break;
   1892	}
   1893
   1894out:
   1895	up_write(&ndev->reslock);
   1896}
   1897
   1898static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
   1899{
   1900	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   1901	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   1902	struct mlx5_vdpa_virtqueue *mvq;
   1903
   1904	if (!is_index_valid(mvdev, idx))
   1905		return;
   1906
   1907	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
   1908		if (!mvdev->wq || !mvdev->cvq.ready)
   1909			return;
   1910
   1911		queue_work(mvdev->wq, &ndev->cvq_ent.work);
   1912		return;
   1913	}
   1914
   1915	mvq = &ndev->vqs[idx];
   1916	if (unlikely(!mvq->ready))
   1917		return;
   1918
   1919	iowrite16(idx, ndev->mvdev.res.kick_addr);
   1920}
   1921
   1922static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
   1923				    u64 driver_area, u64 device_area)
   1924{
   1925	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   1926	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   1927	struct mlx5_vdpa_virtqueue *mvq;
   1928
   1929	if (!is_index_valid(mvdev, idx))
   1930		return -EINVAL;
   1931
   1932	if (is_ctrl_vq_idx(mvdev, idx)) {
   1933		mvdev->cvq.desc_addr = desc_area;
   1934		mvdev->cvq.device_addr = device_area;
   1935		mvdev->cvq.driver_addr = driver_area;
   1936		return 0;
   1937	}
   1938
   1939	mvq = &ndev->vqs[idx];
   1940	mvq->desc_addr = desc_area;
   1941	mvq->device_addr = device_area;
   1942	mvq->driver_addr = driver_area;
   1943	return 0;
   1944}
   1945
   1946static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
   1947{
   1948	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   1949	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   1950	struct mlx5_vdpa_virtqueue *mvq;
   1951
   1952	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
   1953		return;
   1954
   1955	mvq = &ndev->vqs[idx];
   1956	mvq->num_ent = num;
   1957}
   1958
   1959static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
   1960{
   1961	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   1962	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   1963
   1964	ndev->event_cbs[idx] = *cb;
   1965	if (is_ctrl_vq_idx(mvdev, idx))
   1966		mvdev->cvq.event_cb = *cb;
   1967}
   1968
   1969static void mlx5_cvq_notify(struct vringh *vring)
   1970{
   1971	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
   1972
   1973	if (!cvq->event_cb.callback)
   1974		return;
   1975
   1976	cvq->event_cb.callback(cvq->event_cb.private);
   1977}
   1978
   1979static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
   1980{
   1981	struct mlx5_control_vq *cvq = &mvdev->cvq;
   1982
   1983	cvq->ready = ready;
   1984	if (!ready)
   1985		return;
   1986
   1987	cvq->vring.notify = mlx5_cvq_notify;
   1988}
   1989
   1990static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
   1991{
   1992	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   1993	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   1994	struct mlx5_vdpa_virtqueue *mvq;
   1995
   1996	if (!mvdev->actual_features)
   1997		return;
   1998
   1999	if (!is_index_valid(mvdev, idx))
   2000		return;
   2001
   2002	if (is_ctrl_vq_idx(mvdev, idx)) {
   2003		set_cvq_ready(mvdev, ready);
   2004		return;
   2005	}
   2006
   2007	mvq = &ndev->vqs[idx];
   2008	if (!ready)
   2009		suspend_vq(ndev, mvq);
   2010
   2011	mvq->ready = ready;
   2012}
   2013
   2014static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
   2015{
   2016	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   2017	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   2018
   2019	if (!is_index_valid(mvdev, idx))
   2020		return false;
   2021
   2022	if (is_ctrl_vq_idx(mvdev, idx))
   2023		return mvdev->cvq.ready;
   2024
   2025	return ndev->vqs[idx].ready;
   2026}
   2027
   2028static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
   2029				  const struct vdpa_vq_state *state)
   2030{
   2031	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   2032	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   2033	struct mlx5_vdpa_virtqueue *mvq;
   2034
   2035	if (!is_index_valid(mvdev, idx))
   2036		return -EINVAL;
   2037
   2038	if (is_ctrl_vq_idx(mvdev, idx)) {
   2039		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
   2040		return 0;
   2041	}
   2042
   2043	mvq = &ndev->vqs[idx];
   2044	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
   2045		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
   2046		return -EINVAL;
   2047	}
   2048
   2049	mvq->used_idx = state->split.avail_index;
   2050	mvq->avail_idx = state->split.avail_index;
   2051	return 0;
   2052}
   2053
   2054static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
   2055{
   2056	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   2057	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   2058	struct mlx5_vdpa_virtqueue *mvq;
   2059	struct mlx5_virtq_attr attr;
   2060	int err;
   2061
   2062	if (!is_index_valid(mvdev, idx))
   2063		return -EINVAL;
   2064
   2065	if (is_ctrl_vq_idx(mvdev, idx)) {
   2066		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
   2067		return 0;
   2068	}
   2069
   2070	mvq = &ndev->vqs[idx];
   2071	/* If the virtq object was destroyed, use the value saved at
   2072	 * the last minute of suspend_vq. This caters for userspace
   2073	 * that cares about emulating the index after vq is stopped.
   2074	 */
   2075	if (!mvq->initialized) {
   2076		/* Firmware returns a wrong value for the available index.
   2077		 * Since both values should be identical, we take the value of
   2078		 * used_idx which is reported correctly.
   2079		 */
   2080		state->split.avail_index = mvq->used_idx;
   2081		return 0;
   2082	}
   2083
   2084	err = query_virtqueue(ndev, mvq, &attr);
   2085	if (err) {
   2086		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
   2087		return err;
   2088	}
   2089	state->split.avail_index = attr.used_index;
   2090	return 0;
   2091}
   2092
   2093static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
   2094{
   2095	return PAGE_SIZE;
   2096}
   2097
   2098static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx)
   2099{
   2100	return 0;
   2101}
   2102
   2103enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
   2104	MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
   2105	MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
   2106	MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
   2107};
   2108
   2109static u64 mlx_to_vritio_features(u16 dev_features)
   2110{
   2111	u64 result = 0;
   2112
   2113	if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
   2114		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
   2115	if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
   2116		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
   2117	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
   2118		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
   2119	if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
   2120		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
   2121
   2122	return result;
   2123}
   2124
   2125static u64 get_supported_features(struct mlx5_core_dev *mdev)
   2126{
   2127	u64 mlx_vdpa_features = 0;
   2128	u16 dev_features;
   2129
   2130	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
   2131	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
   2132	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
   2133		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
   2134	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
   2135	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
   2136	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
   2137	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
   2138	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
   2139	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
   2140	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
   2141
   2142	return mlx_vdpa_features;
   2143}
   2144
   2145static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
   2146{
   2147	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   2148	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   2149
   2150	print_features(mvdev, ndev->mvdev.mlx_features, false);
   2151	return ndev->mvdev.mlx_features;
   2152}
   2153
   2154static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
   2155{
   2156	/* Minimum features to expect */
   2157	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
   2158		return -EOPNOTSUPP;
   2159
   2160	/* Double check features combination sent down by the driver.
   2161	 * Fail invalid features due to absence of the depended feature.
   2162	 *
   2163	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
   2164	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
   2165	 * By failing the invalid features sent down by untrusted drivers,
   2166	 * we're assured the assumption made upon is_index_valid() and
   2167	 * is_ctrl_vq_idx() will not be compromised.
   2168	 */
   2169	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
   2170            BIT_ULL(VIRTIO_NET_F_MQ))
   2171		return -EINVAL;
   2172
   2173	return 0;
   2174}
   2175
   2176static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
   2177{
   2178	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   2179	int err;
   2180	int i;
   2181
   2182	for (i = 0; i < mvdev->max_vqs; i++) {
   2183		err = setup_vq(ndev, &ndev->vqs[i]);
   2184		if (err)
   2185			goto err_vq;
   2186	}
   2187
   2188	return 0;
   2189
   2190err_vq:
   2191	for (--i; i >= 0; i--)
   2192		teardown_vq(ndev, &ndev->vqs[i]);
   2193
   2194	return err;
   2195}
   2196
   2197static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
   2198{
   2199	struct mlx5_vdpa_virtqueue *mvq;
   2200	int i;
   2201
   2202	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
   2203		mvq = &ndev->vqs[i];
   2204		if (!mvq->initialized)
   2205			continue;
   2206
   2207		teardown_vq(ndev, mvq);
   2208	}
   2209}
   2210
   2211static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
   2212{
   2213	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
   2214		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
   2215			/* MQ supported. CVQ index is right above the last data virtqueue's */
   2216			mvdev->max_idx = mvdev->max_vqs;
   2217		} else {
   2218			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
   2219			 * CVQ gets index 2
   2220			 */
   2221			mvdev->max_idx = 2;
   2222		}
   2223	} else {
   2224		/* Two data virtqueues only: one for rx and one for tx */
   2225		mvdev->max_idx = 1;
   2226	}
   2227}
   2228
   2229static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
   2230{
   2231	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   2232	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   2233	int err;
   2234
   2235	print_features(mvdev, features, true);
   2236
   2237	err = verify_driver_features(mvdev, features);
   2238	if (err)
   2239		return err;
   2240
   2241	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
   2242	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
   2243		ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
   2244	else
   2245		ndev->rqt_size = 1;
   2246
   2247	ndev->cur_num_vqs = 2 * ndev->rqt_size;
   2248
   2249	update_cvq_info(mvdev);
   2250	return err;
   2251}
   2252
   2253static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
   2254{
   2255	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   2256	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   2257
   2258	ndev->config_cb = *cb;
   2259}
   2260
   2261#define MLX5_VDPA_MAX_VQ_ENTRIES 256
   2262static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
   2263{
   2264	return MLX5_VDPA_MAX_VQ_ENTRIES;
   2265}
   2266
   2267static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
   2268{
   2269	return VIRTIO_ID_NET;
   2270}
   2271
   2272static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
   2273{
   2274	return PCI_VENDOR_ID_MELLANOX;
   2275}
   2276
   2277static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
   2278{
   2279	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   2280	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   2281
   2282	print_status(mvdev, ndev->mvdev.status, false);
   2283	return ndev->mvdev.status;
   2284}
   2285
   2286static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
   2287{
   2288	struct mlx5_vq_restore_info *ri = &mvq->ri;
   2289	struct mlx5_virtq_attr attr = {};
   2290	int err;
   2291
   2292	if (mvq->initialized) {
   2293		err = query_virtqueue(ndev, mvq, &attr);
   2294		if (err)
   2295			return err;
   2296	}
   2297
   2298	ri->avail_index = attr.available_index;
   2299	ri->used_index = attr.used_index;
   2300	ri->ready = mvq->ready;
   2301	ri->num_ent = mvq->num_ent;
   2302	ri->desc_addr = mvq->desc_addr;
   2303	ri->device_addr = mvq->device_addr;
   2304	ri->driver_addr = mvq->driver_addr;
   2305	ri->restore = true;
   2306	return 0;
   2307}
   2308
   2309static int save_channels_info(struct mlx5_vdpa_net *ndev)
   2310{
   2311	int i;
   2312
   2313	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
   2314		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
   2315		save_channel_info(ndev, &ndev->vqs[i]);
   2316	}
   2317	return 0;
   2318}
   2319
   2320static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
   2321{
   2322	int i;
   2323
   2324	for (i = 0; i < ndev->mvdev.max_vqs; i++)
   2325		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
   2326}
   2327
   2328static void restore_channels_info(struct mlx5_vdpa_net *ndev)
   2329{
   2330	struct mlx5_vdpa_virtqueue *mvq;
   2331	struct mlx5_vq_restore_info *ri;
   2332	int i;
   2333
   2334	mlx5_clear_vqs(ndev);
   2335	init_mvqs(ndev);
   2336	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
   2337		mvq = &ndev->vqs[i];
   2338		ri = &mvq->ri;
   2339		if (!ri->restore)
   2340			continue;
   2341
   2342		mvq->avail_idx = ri->avail_index;
   2343		mvq->used_idx = ri->used_index;
   2344		mvq->ready = ri->ready;
   2345		mvq->num_ent = ri->num_ent;
   2346		mvq->desc_addr = ri->desc_addr;
   2347		mvq->device_addr = ri->device_addr;
   2348		mvq->driver_addr = ri->driver_addr;
   2349	}
   2350}
   2351
   2352static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
   2353{
   2354	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   2355	int err;
   2356
   2357	suspend_vqs(ndev);
   2358	err = save_channels_info(ndev);
   2359	if (err)
   2360		goto err_mr;
   2361
   2362	teardown_driver(ndev);
   2363	mlx5_vdpa_destroy_mr(mvdev);
   2364	err = mlx5_vdpa_create_mr(mvdev, iotlb);
   2365	if (err)
   2366		goto err_mr;
   2367
   2368	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
   2369		goto err_mr;
   2370
   2371	restore_channels_info(ndev);
   2372	err = setup_driver(mvdev);
   2373	if (err)
   2374		goto err_setup;
   2375
   2376	return 0;
   2377
   2378err_setup:
   2379	mlx5_vdpa_destroy_mr(mvdev);
   2380err_mr:
   2381	return err;
   2382}
   2383
   2384/* reslock must be held for this function */
   2385static int setup_driver(struct mlx5_vdpa_dev *mvdev)
   2386{
   2387	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   2388	int err;
   2389
   2390	WARN_ON(!rwsem_is_locked(&ndev->reslock));
   2391
   2392	if (ndev->setup) {
   2393		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
   2394		err = 0;
   2395		goto out;
   2396	}
   2397	err = setup_virtqueues(mvdev);
   2398	if (err) {
   2399		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
   2400		goto out;
   2401	}
   2402
   2403	err = create_rqt(ndev);
   2404	if (err) {
   2405		mlx5_vdpa_warn(mvdev, "create_rqt\n");
   2406		goto err_rqt;
   2407	}
   2408
   2409	err = create_tir(ndev);
   2410	if (err) {
   2411		mlx5_vdpa_warn(mvdev, "create_tir\n");
   2412		goto err_tir;
   2413	}
   2414
   2415	err = setup_steering(ndev);
   2416	if (err) {
   2417		mlx5_vdpa_warn(mvdev, "setup_steering\n");
   2418		goto err_fwd;
   2419	}
   2420	ndev->setup = true;
   2421
   2422	return 0;
   2423
   2424err_fwd:
   2425	destroy_tir(ndev);
   2426err_tir:
   2427	destroy_rqt(ndev);
   2428err_rqt:
   2429	teardown_virtqueues(ndev);
   2430out:
   2431	return err;
   2432}
   2433
   2434/* reslock must be held for this function */
   2435static void teardown_driver(struct mlx5_vdpa_net *ndev)
   2436{
   2437
   2438	WARN_ON(!rwsem_is_locked(&ndev->reslock));
   2439
   2440	if (!ndev->setup)
   2441		return;
   2442
   2443	teardown_steering(ndev);
   2444	destroy_tir(ndev);
   2445	destroy_rqt(ndev);
   2446	teardown_virtqueues(ndev);
   2447	ndev->setup = false;
   2448}
   2449
   2450static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
   2451{
   2452	int i;
   2453
   2454	for (i = 0; i < ndev->mvdev.max_vqs; i++)
   2455		ndev->vqs[i].ready = false;
   2456
   2457	ndev->mvdev.cvq.ready = false;
   2458}
   2459
   2460static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
   2461{
   2462	struct mlx5_control_vq *cvq = &mvdev->cvq;
   2463	int err = 0;
   2464
   2465	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
   2466		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
   2467					MLX5_CVQ_MAX_ENT, false,
   2468					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
   2469					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
   2470					(struct vring_used *)(uintptr_t)cvq->device_addr);
   2471
   2472	return err;
   2473}
   2474
   2475static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
   2476{
   2477	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   2478	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   2479	int err;
   2480
   2481	print_status(mvdev, status, true);
   2482
   2483	down_write(&ndev->reslock);
   2484
   2485	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
   2486		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
   2487			err = setup_cvq_vring(mvdev);
   2488			if (err) {
   2489				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
   2490				goto err_setup;
   2491			}
   2492			err = setup_driver(mvdev);
   2493			if (err) {
   2494				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
   2495				goto err_setup;
   2496			}
   2497		} else {
   2498			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
   2499			goto err_clear;
   2500		}
   2501	}
   2502
   2503	ndev->mvdev.status = status;
   2504	up_write(&ndev->reslock);
   2505	return;
   2506
   2507err_setup:
   2508	mlx5_vdpa_destroy_mr(&ndev->mvdev);
   2509	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
   2510err_clear:
   2511	up_write(&ndev->reslock);
   2512}
   2513
   2514static int mlx5_vdpa_reset(struct vdpa_device *vdev)
   2515{
   2516	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   2517	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   2518
   2519	print_status(mvdev, 0, true);
   2520	mlx5_vdpa_info(mvdev, "performing device reset\n");
   2521
   2522	down_write(&ndev->reslock);
   2523	teardown_driver(ndev);
   2524	clear_vqs_ready(ndev);
   2525	mlx5_vdpa_destroy_mr(&ndev->mvdev);
   2526	ndev->mvdev.status = 0;
   2527	ndev->cur_num_vqs = 0;
   2528	ndev->mvdev.cvq.received_desc = 0;
   2529	ndev->mvdev.cvq.completed_desc = 0;
   2530	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
   2531	ndev->mvdev.actual_features = 0;
   2532	++mvdev->generation;
   2533	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
   2534		if (mlx5_vdpa_create_mr(mvdev, NULL))
   2535			mlx5_vdpa_warn(mvdev, "create MR failed\n");
   2536	}
   2537	up_write(&ndev->reslock);
   2538
   2539	return 0;
   2540}
   2541
   2542static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
   2543{
   2544	return sizeof(struct virtio_net_config);
   2545}
   2546
   2547static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
   2548				 unsigned int len)
   2549{
   2550	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   2551	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   2552
   2553	if (offset + len <= sizeof(struct virtio_net_config))
   2554		memcpy(buf, (u8 *)&ndev->config + offset, len);
   2555}
   2556
   2557static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
   2558				 unsigned int len)
   2559{
   2560	/* not supported */
   2561}
   2562
   2563static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
   2564{
   2565	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   2566
   2567	return mvdev->generation;
   2568}
   2569
   2570static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
   2571			     struct vhost_iotlb *iotlb)
   2572{
   2573	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   2574	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   2575	bool change_map;
   2576	int err;
   2577
   2578	down_write(&ndev->reslock);
   2579
   2580	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
   2581	if (err) {
   2582		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
   2583		goto err;
   2584	}
   2585
   2586	if (change_map)
   2587		err = mlx5_vdpa_change_map(mvdev, iotlb);
   2588
   2589err:
   2590	up_write(&ndev->reslock);
   2591	return err;
   2592}
   2593
   2594static void mlx5_vdpa_free(struct vdpa_device *vdev)
   2595{
   2596	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   2597	struct mlx5_core_dev *pfmdev;
   2598	struct mlx5_vdpa_net *ndev;
   2599
   2600	ndev = to_mlx5_vdpa_ndev(mvdev);
   2601
   2602	free_resources(ndev);
   2603	mlx5_vdpa_destroy_mr(mvdev);
   2604	if (!is_zero_ether_addr(ndev->config.mac)) {
   2605		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
   2606		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
   2607	}
   2608	mlx5_vdpa_free_resources(&ndev->mvdev);
   2609	kfree(ndev->event_cbs);
   2610	kfree(ndev->vqs);
   2611}
   2612
   2613static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
   2614{
   2615	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   2616	struct vdpa_notification_area ret = {};
   2617	struct mlx5_vdpa_net *ndev;
   2618	phys_addr_t addr;
   2619
   2620	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
   2621		return ret;
   2622
   2623	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
   2624	 * notification to avoid the risk of mapping pages that contain BAR of more
   2625	 * than one SF
   2626	 */
   2627	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
   2628		return ret;
   2629
   2630	ndev = to_mlx5_vdpa_ndev(mvdev);
   2631	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
   2632	ret.addr = addr;
   2633	ret.size = PAGE_SIZE;
   2634	return ret;
   2635}
   2636
   2637static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
   2638{
   2639	return -EOPNOTSUPP;
   2640}
   2641
   2642static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
   2643{
   2644	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   2645
   2646	return mvdev->actual_features;
   2647}
   2648
   2649static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
   2650			     u64 *received_desc, u64 *completed_desc)
   2651{
   2652	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
   2653	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
   2654	void *cmd_hdr;
   2655	void *ctx;
   2656	int err;
   2657
   2658	if (!counters_supported(&ndev->mvdev))
   2659		return -EOPNOTSUPP;
   2660
   2661	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
   2662		return -EAGAIN;
   2663
   2664	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
   2665
   2666	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
   2667	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
   2668	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
   2669	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
   2670
   2671	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
   2672	if (err)
   2673		return err;
   2674
   2675	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
   2676	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
   2677	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
   2678	return 0;
   2679}
   2680
   2681static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
   2682					 struct sk_buff *msg,
   2683					 struct netlink_ext_ack *extack)
   2684{
   2685	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
   2686	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   2687	struct mlx5_vdpa_virtqueue *mvq;
   2688	struct mlx5_control_vq *cvq;
   2689	u64 received_desc;
   2690	u64 completed_desc;
   2691	int err = 0;
   2692
   2693	down_read(&ndev->reslock);
   2694	if (!is_index_valid(mvdev, idx)) {
   2695		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
   2696		err = -EINVAL;
   2697		goto out_err;
   2698	}
   2699
   2700	if (idx == ctrl_vq_idx(mvdev)) {
   2701		cvq = &mvdev->cvq;
   2702		received_desc = cvq->received_desc;
   2703		completed_desc = cvq->completed_desc;
   2704		goto out;
   2705	}
   2706
   2707	mvq = &ndev->vqs[idx];
   2708	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
   2709	if (err) {
   2710		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
   2711		goto out_err;
   2712	}
   2713
   2714out:
   2715	err = -EMSGSIZE;
   2716	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
   2717		goto out_err;
   2718
   2719	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
   2720			      VDPA_ATTR_PAD))
   2721		goto out_err;
   2722
   2723	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
   2724		goto out_err;
   2725
   2726	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
   2727			      VDPA_ATTR_PAD))
   2728		goto out_err;
   2729
   2730	err = 0;
   2731out_err:
   2732	up_read(&ndev->reslock);
   2733	return err;
   2734}
   2735
   2736static const struct vdpa_config_ops mlx5_vdpa_ops = {
   2737	.set_vq_address = mlx5_vdpa_set_vq_address,
   2738	.set_vq_num = mlx5_vdpa_set_vq_num,
   2739	.kick_vq = mlx5_vdpa_kick_vq,
   2740	.set_vq_cb = mlx5_vdpa_set_vq_cb,
   2741	.set_vq_ready = mlx5_vdpa_set_vq_ready,
   2742	.get_vq_ready = mlx5_vdpa_get_vq_ready,
   2743	.set_vq_state = mlx5_vdpa_set_vq_state,
   2744	.get_vq_state = mlx5_vdpa_get_vq_state,
   2745	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
   2746	.get_vq_notification = mlx5_get_vq_notification,
   2747	.get_vq_irq = mlx5_get_vq_irq,
   2748	.get_vq_align = mlx5_vdpa_get_vq_align,
   2749	.get_vq_group = mlx5_vdpa_get_vq_group,
   2750	.get_device_features = mlx5_vdpa_get_device_features,
   2751	.set_driver_features = mlx5_vdpa_set_driver_features,
   2752	.get_driver_features = mlx5_vdpa_get_driver_features,
   2753	.set_config_cb = mlx5_vdpa_set_config_cb,
   2754	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
   2755	.get_device_id = mlx5_vdpa_get_device_id,
   2756	.get_vendor_id = mlx5_vdpa_get_vendor_id,
   2757	.get_status = mlx5_vdpa_get_status,
   2758	.set_status = mlx5_vdpa_set_status,
   2759	.reset = mlx5_vdpa_reset,
   2760	.get_config_size = mlx5_vdpa_get_config_size,
   2761	.get_config = mlx5_vdpa_get_config,
   2762	.set_config = mlx5_vdpa_set_config,
   2763	.get_generation = mlx5_vdpa_get_generation,
   2764	.set_map = mlx5_vdpa_set_map,
   2765	.free = mlx5_vdpa_free,
   2766};
   2767
   2768static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
   2769{
   2770	u16 hw_mtu;
   2771	int err;
   2772
   2773	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
   2774	if (err)
   2775		return err;
   2776
   2777	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
   2778	return 0;
   2779}
   2780
   2781static int alloc_resources(struct mlx5_vdpa_net *ndev)
   2782{
   2783	struct mlx5_vdpa_net_resources *res = &ndev->res;
   2784	int err;
   2785
   2786	if (res->valid) {
   2787		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
   2788		return -EEXIST;
   2789	}
   2790
   2791	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
   2792	if (err)
   2793		return err;
   2794
   2795	err = create_tis(ndev);
   2796	if (err)
   2797		goto err_tis;
   2798
   2799	res->valid = true;
   2800
   2801	return 0;
   2802
   2803err_tis:
   2804	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
   2805	return err;
   2806}
   2807
   2808static void free_resources(struct mlx5_vdpa_net *ndev)
   2809{
   2810	struct mlx5_vdpa_net_resources *res = &ndev->res;
   2811
   2812	if (!res->valid)
   2813		return;
   2814
   2815	destroy_tis(ndev);
   2816	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
   2817	res->valid = false;
   2818}
   2819
   2820static void init_mvqs(struct mlx5_vdpa_net *ndev)
   2821{
   2822	struct mlx5_vdpa_virtqueue *mvq;
   2823	int i;
   2824
   2825	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
   2826		mvq = &ndev->vqs[i];
   2827		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
   2828		mvq->index = i;
   2829		mvq->ndev = ndev;
   2830		mvq->fwqp.fw = true;
   2831	}
   2832	for (; i < ndev->mvdev.max_vqs; i++) {
   2833		mvq = &ndev->vqs[i];
   2834		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
   2835		mvq->index = i;
   2836		mvq->ndev = ndev;
   2837	}
   2838}
   2839
   2840struct mlx5_vdpa_mgmtdev {
   2841	struct vdpa_mgmt_dev mgtdev;
   2842	struct mlx5_adev *madev;
   2843	struct mlx5_vdpa_net *ndev;
   2844};
   2845
   2846static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
   2847{
   2848	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
   2849	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
   2850	int err;
   2851
   2852	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
   2853	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
   2854	MLX5_SET(query_vport_state_in, in, vport_number, vport);
   2855	if (vport)
   2856		MLX5_SET(query_vport_state_in, in, other_vport, 1);
   2857
   2858	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
   2859	if (err)
   2860		return 0;
   2861
   2862	return MLX5_GET(query_vport_state_out, out, state);
   2863}
   2864
   2865static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
   2866{
   2867	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
   2868	    VPORT_STATE_UP)
   2869		return true;
   2870
   2871	return false;
   2872}
   2873
   2874static void update_carrier(struct work_struct *work)
   2875{
   2876	struct mlx5_vdpa_wq_ent *wqent;
   2877	struct mlx5_vdpa_dev *mvdev;
   2878	struct mlx5_vdpa_net *ndev;
   2879
   2880	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
   2881	mvdev = wqent->mvdev;
   2882	ndev = to_mlx5_vdpa_ndev(mvdev);
   2883	if (get_link_state(mvdev))
   2884		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
   2885	else
   2886		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
   2887
   2888	if (ndev->config_cb.callback)
   2889		ndev->config_cb.callback(ndev->config_cb.private);
   2890
   2891	kfree(wqent);
   2892}
   2893
   2894static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
   2895{
   2896	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
   2897	struct mlx5_eqe *eqe = param;
   2898	int ret = NOTIFY_DONE;
   2899	struct mlx5_vdpa_wq_ent *wqent;
   2900
   2901	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
   2902		switch (eqe->sub_type) {
   2903		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
   2904		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
   2905			wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
   2906			if (!wqent)
   2907				return NOTIFY_DONE;
   2908
   2909			wqent->mvdev = &ndev->mvdev;
   2910			INIT_WORK(&wqent->work, update_carrier);
   2911			queue_work(ndev->mvdev.wq, &wqent->work);
   2912			ret = NOTIFY_OK;
   2913			break;
   2914		default:
   2915			return NOTIFY_DONE;
   2916		}
   2917		return ret;
   2918	}
   2919	return ret;
   2920}
   2921
   2922static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
   2923{
   2924	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
   2925	void *in;
   2926	int err;
   2927
   2928	in = kvzalloc(inlen, GFP_KERNEL);
   2929	if (!in)
   2930		return -ENOMEM;
   2931
   2932	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
   2933	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
   2934		 mtu + MLX5V_ETH_HARD_MTU);
   2935	MLX5_SET(modify_nic_vport_context_in, in, opcode,
   2936		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
   2937
   2938	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
   2939
   2940	kvfree(in);
   2941	return err;
   2942}
   2943
   2944static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
   2945			     const struct vdpa_dev_set_config *add_config)
   2946{
   2947	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
   2948	struct virtio_net_config *config;
   2949	struct mlx5_core_dev *pfmdev;
   2950	struct mlx5_vdpa_dev *mvdev;
   2951	struct mlx5_vdpa_net *ndev;
   2952	struct mlx5_core_dev *mdev;
   2953	u32 max_vqs;
   2954	u16 mtu;
   2955	int err;
   2956
   2957	if (mgtdev->ndev)
   2958		return -ENOSPC;
   2959
   2960	mdev = mgtdev->madev->mdev;
   2961	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
   2962	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
   2963		dev_warn(mdev->device, "missing support for split virtqueues\n");
   2964		return -EOPNOTSUPP;
   2965	}
   2966
   2967	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
   2968			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
   2969	if (max_vqs < 2) {
   2970		dev_warn(mdev->device,
   2971			 "%d virtqueues are supported. At least 2 are required\n",
   2972			 max_vqs);
   2973		return -EAGAIN;
   2974	}
   2975
   2976	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
   2977		if (add_config->net.max_vq_pairs > max_vqs / 2)
   2978			return -EINVAL;
   2979		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
   2980	} else {
   2981		max_vqs = 2;
   2982	}
   2983
   2984	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
   2985				 1, 1, name, false);
   2986	if (IS_ERR(ndev))
   2987		return PTR_ERR(ndev);
   2988
   2989	ndev->mvdev.mlx_features = mgtdev->mgtdev.supported_features;
   2990	ndev->mvdev.max_vqs = max_vqs;
   2991	mvdev = &ndev->mvdev;
   2992	mvdev->mdev = mdev;
   2993
   2994	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
   2995	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
   2996	if (!ndev->vqs || !ndev->event_cbs) {
   2997		err = -ENOMEM;
   2998		goto err_alloc;
   2999	}
   3000
   3001	init_mvqs(ndev);
   3002	init_rwsem(&ndev->reslock);
   3003	config = &ndev->config;
   3004
   3005	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
   3006		err = config_func_mtu(mdev, add_config->net.mtu);
   3007		if (err)
   3008			goto err_alloc;
   3009	}
   3010
   3011	err = query_mtu(mdev, &mtu);
   3012	if (err)
   3013		goto err_alloc;
   3014
   3015	ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
   3016
   3017	if (get_link_state(mvdev))
   3018		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
   3019	else
   3020		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
   3021
   3022	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
   3023		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
   3024	} else {
   3025		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
   3026		if (err)
   3027			goto err_alloc;
   3028	}
   3029
   3030	if (!is_zero_ether_addr(config->mac)) {
   3031		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
   3032		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
   3033		if (err)
   3034			goto err_alloc;
   3035
   3036		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
   3037	}
   3038
   3039	config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
   3040	mvdev->vdev.dma_dev = &mdev->pdev->dev;
   3041	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
   3042	if (err)
   3043		goto err_mpfs;
   3044
   3045	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
   3046		err = mlx5_vdpa_create_mr(mvdev, NULL);
   3047		if (err)
   3048			goto err_res;
   3049	}
   3050
   3051	err = alloc_resources(ndev);
   3052	if (err)
   3053		goto err_mr;
   3054
   3055	ndev->cvq_ent.mvdev = mvdev;
   3056	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
   3057	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
   3058	if (!mvdev->wq) {
   3059		err = -ENOMEM;
   3060		goto err_res2;
   3061	}
   3062
   3063	ndev->nb.notifier_call = event_handler;
   3064	mlx5_notifier_register(mdev, &ndev->nb);
   3065	mvdev->vdev.mdev = &mgtdev->mgtdev;
   3066	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
   3067	if (err)
   3068		goto err_reg;
   3069
   3070	mgtdev->ndev = ndev;
   3071	return 0;
   3072
   3073err_reg:
   3074	destroy_workqueue(mvdev->wq);
   3075err_res2:
   3076	free_resources(ndev);
   3077err_mr:
   3078	mlx5_vdpa_destroy_mr(mvdev);
   3079err_res:
   3080	mlx5_vdpa_free_resources(&ndev->mvdev);
   3081err_mpfs:
   3082	if (!is_zero_ether_addr(config->mac))
   3083		mlx5_mpfs_del_mac(pfmdev, config->mac);
   3084err_alloc:
   3085	put_device(&mvdev->vdev.dev);
   3086	return err;
   3087}
   3088
   3089static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
   3090{
   3091	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
   3092	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
   3093	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
   3094	struct workqueue_struct *wq;
   3095
   3096	mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
   3097	wq = mvdev->wq;
   3098	mvdev->wq = NULL;
   3099	destroy_workqueue(wq);
   3100	_vdpa_unregister_device(dev);
   3101	mgtdev->ndev = NULL;
   3102}
   3103
   3104static const struct vdpa_mgmtdev_ops mdev_ops = {
   3105	.dev_add = mlx5_vdpa_dev_add,
   3106	.dev_del = mlx5_vdpa_dev_del,
   3107};
   3108
   3109static struct virtio_device_id id_table[] = {
   3110	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
   3111	{ 0 },
   3112};
   3113
   3114static int mlx5v_probe(struct auxiliary_device *adev,
   3115		       const struct auxiliary_device_id *id)
   3116
   3117{
   3118	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
   3119	struct mlx5_core_dev *mdev = madev->mdev;
   3120	struct mlx5_vdpa_mgmtdev *mgtdev;
   3121	int err;
   3122
   3123	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
   3124	if (!mgtdev)
   3125		return -ENOMEM;
   3126
   3127	mgtdev->mgtdev.ops = &mdev_ops;
   3128	mgtdev->mgtdev.device = mdev->device;
   3129	mgtdev->mgtdev.id_table = id_table;
   3130	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
   3131					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
   3132					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU);
   3133	mgtdev->mgtdev.max_supported_vqs =
   3134		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
   3135	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
   3136	mgtdev->madev = madev;
   3137
   3138	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
   3139	if (err)
   3140		goto reg_err;
   3141
   3142	auxiliary_set_drvdata(adev, mgtdev);
   3143
   3144	return 0;
   3145
   3146reg_err:
   3147	kfree(mgtdev);
   3148	return err;
   3149}
   3150
   3151static void mlx5v_remove(struct auxiliary_device *adev)
   3152{
   3153	struct mlx5_vdpa_mgmtdev *mgtdev;
   3154
   3155	mgtdev = auxiliary_get_drvdata(adev);
   3156	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
   3157	kfree(mgtdev);
   3158}
   3159
   3160static const struct auxiliary_device_id mlx5v_id_table[] = {
   3161	{ .name = MLX5_ADEV_NAME ".vnet", },
   3162	{},
   3163};
   3164
   3165MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
   3166
   3167static struct auxiliary_driver mlx5v_driver = {
   3168	.name = "vnet",
   3169	.probe = mlx5v_probe,
   3170	.remove = mlx5v_remove,
   3171	.id_table = mlx5v_id_table,
   3172};
   3173
   3174module_auxiliary_driver(mlx5v_driver);