cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

virtio-net.c (118812B)


      1/*
      2 * Virtio Network Device
      3 *
      4 * Copyright IBM, Corp. 2007
      5 *
      6 * Authors:
      7 *  Anthony Liguori   <aliguori@us.ibm.com>
      8 *
      9 * This work is licensed under the terms of the GNU GPL, version 2.  See
     10 * the COPYING file in the top-level directory.
     11 *
     12 */
     13
     14#include "qemu/osdep.h"
     15#include "qemu/atomic.h"
     16#include "qemu/iov.h"
     17#include "qemu/main-loop.h"
     18#include "qemu/module.h"
     19#include "hw/virtio/virtio.h"
     20#include "net/net.h"
     21#include "net/checksum.h"
     22#include "net/tap.h"
     23#include "qemu/error-report.h"
     24#include "qemu/timer.h"
     25#include "qemu/option.h"
     26#include "qemu/option_int.h"
     27#include "qemu/config-file.h"
     28#include "qapi/qmp/qdict.h"
     29#include "hw/virtio/virtio-net.h"
     30#include "net/vhost_net.h"
     31#include "net/announce.h"
     32#include "hw/virtio/virtio-bus.h"
     33#include "qapi/error.h"
     34#include "qapi/qapi-events-net.h"
     35#include "hw/qdev-properties.h"
     36#include "qapi/qapi-types-migration.h"
     37#include "qapi/qapi-events-migration.h"
     38#include "hw/virtio/virtio-access.h"
     39#include "migration/misc.h"
     40#include "standard-headers/linux/ethtool.h"
     41#include "sysemu/sysemu.h"
     42#include "trace.h"
     43#include "monitor/qdev.h"
     44#include "hw/pci/pci.h"
     45#include "net_rx_pkt.h"
     46#include "hw/virtio/vhost.h"
     47
     48#define VIRTIO_NET_VM_VERSION    11
     49
     50#define MAC_TABLE_ENTRIES    64
     51#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
     52
     53/* previously fixed value */
     54#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
     55#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
     56
     57/* for now, only allow larger queues; with virtio-1, guest can downsize */
     58#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
     59#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
     60
     61#define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
     62
     63#define VIRTIO_NET_TCP_FLAG         0x3F
     64#define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
     65
     66/* IPv4 max payload, 16 bits in the header */
     67#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
     68#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
     69
     70/* header length value in ip header without option */
     71#define VIRTIO_NET_IP4_HEADER_LENGTH 5
     72
     73#define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
     74#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
     75
     76/* Purge coalesced packets timer interval, This value affects the performance
     77   a lot, and should be tuned carefully, '300000'(300us) is the recommended
     78   value to pass the WHQL test, '50000' can gain 2x netperf throughput with
     79   tso/gso/gro 'off'. */
     80#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
     81
     82#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
     83                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
     84                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
     85                                         VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
     86                                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
     87                                         VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
     88                                         VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
     89                                         VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
     90                                         VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
     91
     92static const VirtIOFeature feature_sizes[] = {
     93    {.flags = 1ULL << VIRTIO_NET_F_MAC,
     94     .end = endof(struct virtio_net_config, mac)},
     95    {.flags = 1ULL << VIRTIO_NET_F_STATUS,
     96     .end = endof(struct virtio_net_config, status)},
     97    {.flags = 1ULL << VIRTIO_NET_F_MQ,
     98     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
     99    {.flags = 1ULL << VIRTIO_NET_F_MTU,
    100     .end = endof(struct virtio_net_config, mtu)},
    101    {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
    102     .end = endof(struct virtio_net_config, duplex)},
    103    {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
    104     .end = endof(struct virtio_net_config, supported_hash_types)},
    105    {}
    106};
    107
    108static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
    109{
    110    VirtIONet *n = qemu_get_nic_opaque(nc);
    111
    112    return &n->vqs[nc->queue_index];
    113}
    114
    115static int vq2q(int queue_index)
    116{
    117    return queue_index / 2;
    118}
    119
    120/* TODO
    121 * - we could suppress RX interrupt if we were so inclined.
    122 */
    123
    124static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
    125{
    126    VirtIONet *n = VIRTIO_NET(vdev);
    127    struct virtio_net_config netcfg;
    128    NetClientState *nc = qemu_get_queue(n->nic);
    129    static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
    130
    131    int ret = 0;
    132    memset(&netcfg, 0 , sizeof(struct virtio_net_config));
    133    virtio_stw_p(vdev, &netcfg.status, n->status);
    134    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
    135    virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
    136    memcpy(netcfg.mac, n->mac, ETH_ALEN);
    137    virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
    138    netcfg.duplex = n->net_conf.duplex;
    139    netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
    140    virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
    141                 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
    142                 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
    143    virtio_stl_p(vdev, &netcfg.supported_hash_types,
    144                 VIRTIO_NET_RSS_SUPPORTED_HASHES);
    145    memcpy(config, &netcfg, n->config_size);
    146
    147    /*
    148     * Is this VDPA? No peer means not VDPA: there's no way to
    149     * disconnect/reconnect a VDPA peer.
    150     */
    151    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
    152        ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
    153                                   n->config_size);
    154        if (ret != -1) {
    155            /*
    156             * Some NIC/kernel combinations present 0 as the mac address.  As
    157             * that is not a legal address, try to proceed with the
    158             * address from the QEMU command line in the hope that the
    159             * address has been configured correctly elsewhere - just not
    160             * reported by the device.
    161             */
    162            if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
    163                info_report("Zero hardware mac address detected. Ignoring.");
    164                memcpy(netcfg.mac, n->mac, ETH_ALEN);
    165            }
    166            memcpy(config, &netcfg, n->config_size);
    167        }
    168    }
    169}
    170
    171static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
    172{
    173    VirtIONet *n = VIRTIO_NET(vdev);
    174    struct virtio_net_config netcfg = {};
    175    NetClientState *nc = qemu_get_queue(n->nic);
    176
    177    memcpy(&netcfg, config, n->config_size);
    178
    179    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
    180        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
    181        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
    182        memcpy(n->mac, netcfg.mac, ETH_ALEN);
    183        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
    184    }
    185
    186    /*
    187     * Is this VDPA? No peer means not VDPA: there's no way to
    188     * disconnect/reconnect a VDPA peer.
    189     */
    190    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
    191        vhost_net_set_config(get_vhost_net(nc->peer),
    192                             (uint8_t *)&netcfg, 0, n->config_size,
    193                             VHOST_SET_CONFIG_TYPE_MASTER);
    194      }
    195}
    196
    197static bool virtio_net_started(VirtIONet *n, uint8_t status)
    198{
    199    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    200    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
    201        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
    202}
    203
    204static void virtio_net_announce_notify(VirtIONet *net)
    205{
    206    VirtIODevice *vdev = VIRTIO_DEVICE(net);
    207    trace_virtio_net_announce_notify();
    208
    209    net->status |= VIRTIO_NET_S_ANNOUNCE;
    210    virtio_notify_config(vdev);
    211}
    212
    213static void virtio_net_announce_timer(void *opaque)
    214{
    215    VirtIONet *n = opaque;
    216    trace_virtio_net_announce_timer(n->announce_timer.round);
    217
    218    n->announce_timer.round--;
    219    virtio_net_announce_notify(n);
    220}
    221
    222static void virtio_net_announce(NetClientState *nc)
    223{
    224    VirtIONet *n = qemu_get_nic_opaque(nc);
    225    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    226
    227    /*
    228     * Make sure the virtio migration announcement timer isn't running
    229     * If it is, let it trigger announcement so that we do not cause
    230     * confusion.
    231     */
    232    if (n->announce_timer.round) {
    233        return;
    234    }
    235
    236    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
    237        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
    238            virtio_net_announce_notify(n);
    239    }
    240}
    241
    242static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
    243{
    244    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    245    NetClientState *nc = qemu_get_queue(n->nic);
    246    int queues = n->multiqueue ? n->max_queues : 1;
    247
    248    if (!get_vhost_net(nc->peer)) {
    249        return;
    250    }
    251
    252    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
    253        !!n->vhost_started) {
    254        return;
    255    }
    256    if (!n->vhost_started) {
    257        int r, i;
    258
    259        if (n->needs_vnet_hdr_swap) {
    260            error_report("backend does not support %s vnet headers; "
    261                         "falling back on userspace virtio",
    262                         virtio_is_big_endian(vdev) ? "BE" : "LE");
    263            return;
    264        }
    265
    266        /* Any packets outstanding? Purge them to avoid touching rings
    267         * when vhost is running.
    268         */
    269        for (i = 0;  i < queues; i++) {
    270            NetClientState *qnc = qemu_get_subqueue(n->nic, i);
    271
    272            /* Purge both directions: TX and RX. */
    273            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
    274            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
    275        }
    276
    277        if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
    278            r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
    279            if (r < 0) {
    280                error_report("%uBytes MTU not supported by the backend",
    281                             n->net_conf.mtu);
    282
    283                return;
    284            }
    285        }
    286
    287        n->vhost_started = 1;
    288        r = vhost_net_start(vdev, n->nic->ncs, queues);
    289        if (r < 0) {
    290            error_report("unable to start vhost net: %d: "
    291                         "falling back on userspace virtio", -r);
    292            n->vhost_started = 0;
    293        }
    294    } else {
    295        vhost_net_stop(vdev, n->nic->ncs, queues);
    296        n->vhost_started = 0;
    297    }
    298}
    299
    300static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
    301                                          NetClientState *peer,
    302                                          bool enable)
    303{
    304    if (virtio_is_big_endian(vdev)) {
    305        return qemu_set_vnet_be(peer, enable);
    306    } else {
    307        return qemu_set_vnet_le(peer, enable);
    308    }
    309}
    310
    311static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
    312                                       int queues, bool enable)
    313{
    314    int i;
    315
    316    for (i = 0; i < queues; i++) {
    317        if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
    318            enable) {
    319            while (--i >= 0) {
    320                virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
    321            }
    322
    323            return true;
    324        }
    325    }
    326
    327    return false;
    328}
    329
    330static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
    331{
    332    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    333    int queues = n->multiqueue ? n->max_queues : 1;
    334
    335    if (virtio_net_started(n, status)) {
    336        /* Before using the device, we tell the network backend about the
    337         * endianness to use when parsing vnet headers. If the backend
    338         * can't do it, we fallback onto fixing the headers in the core
    339         * virtio-net code.
    340         */
    341        n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
    342                                                            queues, true);
    343    } else if (virtio_net_started(n, vdev->status)) {
    344        /* After using the device, we need to reset the network backend to
    345         * the default (guest native endianness), otherwise the guest may
    346         * lose network connectivity if it is rebooted into a different
    347         * endianness.
    348         */
    349        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
    350    }
    351}
    352
    353static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
    354{
    355    unsigned int dropped = virtqueue_drop_all(vq);
    356    if (dropped) {
    357        virtio_notify(vdev, vq);
    358    }
    359}
    360
    361static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
    362{
    363    VirtIONet *n = VIRTIO_NET(vdev);
    364    VirtIONetQueue *q;
    365    int i;
    366    uint8_t queue_status;
    367
    368    virtio_net_vnet_endian_status(n, status);
    369    virtio_net_vhost_status(n, status);
    370
    371    for (i = 0; i < n->max_queues; i++) {
    372        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
    373        bool queue_started;
    374        q = &n->vqs[i];
    375
    376        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
    377            queue_status = 0;
    378        } else {
    379            queue_status = status;
    380        }
    381        queue_started =
    382            virtio_net_started(n, queue_status) && !n->vhost_started;
    383
    384        if (queue_started) {
    385            qemu_flush_queued_packets(ncs);
    386        }
    387
    388        if (!q->tx_waiting) {
    389            continue;
    390        }
    391
    392        if (queue_started) {
    393            if (q->tx_timer) {
    394                timer_mod(q->tx_timer,
    395                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
    396            } else {
    397                qemu_bh_schedule(q->tx_bh);
    398            }
    399        } else {
    400            if (q->tx_timer) {
    401                timer_del(q->tx_timer);
    402            } else {
    403                qemu_bh_cancel(q->tx_bh);
    404            }
    405            if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
    406                (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
    407                vdev->vm_running) {
    408                /* if tx is waiting we are likely have some packets in tx queue
    409                 * and disabled notification */
    410                q->tx_waiting = 0;
    411                virtio_queue_set_notification(q->tx_vq, 1);
    412                virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
    413            }
    414        }
    415    }
    416}
    417
    418static void virtio_net_set_link_status(NetClientState *nc)
    419{
    420    VirtIONet *n = qemu_get_nic_opaque(nc);
    421    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    422    uint16_t old_status = n->status;
    423
    424    if (nc->link_down)
    425        n->status &= ~VIRTIO_NET_S_LINK_UP;
    426    else
    427        n->status |= VIRTIO_NET_S_LINK_UP;
    428
    429    if (n->status != old_status)
    430        virtio_notify_config(vdev);
    431
    432    virtio_net_set_status(vdev, vdev->status);
    433}
    434
    435static void rxfilter_notify(NetClientState *nc)
    436{
    437    VirtIONet *n = qemu_get_nic_opaque(nc);
    438
    439    if (nc->rxfilter_notify_enabled) {
    440        char *path = object_get_canonical_path(OBJECT(n->qdev));
    441        qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
    442                                              n->netclient_name, path);
    443        g_free(path);
    444
    445        /* disable event notification to avoid events flooding */
    446        nc->rxfilter_notify_enabled = 0;
    447    }
    448}
    449
    450static intList *get_vlan_table(VirtIONet *n)
    451{
    452    intList *list;
    453    int i, j;
    454
    455    list = NULL;
    456    for (i = 0; i < MAX_VLAN >> 5; i++) {
    457        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
    458            if (n->vlans[i] & (1U << j)) {
    459                QAPI_LIST_PREPEND(list, (i << 5) + j);
    460            }
    461        }
    462    }
    463
    464    return list;
    465}
    466
    467static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
    468{
    469    VirtIONet *n = qemu_get_nic_opaque(nc);
    470    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    471    RxFilterInfo *info;
    472    strList *str_list;
    473    int i;
    474
    475    info = g_malloc0(sizeof(*info));
    476    info->name = g_strdup(nc->name);
    477    info->promiscuous = n->promisc;
    478
    479    if (n->nouni) {
    480        info->unicast = RX_STATE_NONE;
    481    } else if (n->alluni) {
    482        info->unicast = RX_STATE_ALL;
    483    } else {
    484        info->unicast = RX_STATE_NORMAL;
    485    }
    486
    487    if (n->nomulti) {
    488        info->multicast = RX_STATE_NONE;
    489    } else if (n->allmulti) {
    490        info->multicast = RX_STATE_ALL;
    491    } else {
    492        info->multicast = RX_STATE_NORMAL;
    493    }
    494
    495    info->broadcast_allowed = n->nobcast;
    496    info->multicast_overflow = n->mac_table.multi_overflow;
    497    info->unicast_overflow = n->mac_table.uni_overflow;
    498
    499    info->main_mac = qemu_mac_strdup_printf(n->mac);
    500
    501    str_list = NULL;
    502    for (i = 0; i < n->mac_table.first_multi; i++) {
    503        QAPI_LIST_PREPEND(str_list,
    504                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
    505    }
    506    info->unicast_table = str_list;
    507
    508    str_list = NULL;
    509    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
    510        QAPI_LIST_PREPEND(str_list,
    511                      qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
    512    }
    513    info->multicast_table = str_list;
    514    info->vlan_table = get_vlan_table(n);
    515
    516    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
    517        info->vlan = RX_STATE_ALL;
    518    } else if (!info->vlan_table) {
    519        info->vlan = RX_STATE_NONE;
    520    } else {
    521        info->vlan = RX_STATE_NORMAL;
    522    }
    523
    524    /* enable event notification after query */
    525    nc->rxfilter_notify_enabled = 1;
    526
    527    return info;
    528}
    529
    530static void virtio_net_reset(VirtIODevice *vdev)
    531{
    532    VirtIONet *n = VIRTIO_NET(vdev);
    533    int i;
    534
    535    /* Reset back to compatibility mode */
    536    n->promisc = 1;
    537    n->allmulti = 0;
    538    n->alluni = 0;
    539    n->nomulti = 0;
    540    n->nouni = 0;
    541    n->nobcast = 0;
    542    /* multiqueue is disabled by default */
    543    n->curr_queues = 1;
    544    timer_del(n->announce_timer.tm);
    545    n->announce_timer.round = 0;
    546    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
    547
    548    /* Flush any MAC and VLAN filter table state */
    549    n->mac_table.in_use = 0;
    550    n->mac_table.first_multi = 0;
    551    n->mac_table.multi_overflow = 0;
    552    n->mac_table.uni_overflow = 0;
    553    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
    554    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
    555    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
    556    memset(n->vlans, 0, MAX_VLAN >> 3);
    557
    558    /* Flush any async TX */
    559    for (i = 0;  i < n->max_queues; i++) {
    560        NetClientState *nc = qemu_get_subqueue(n->nic, i);
    561
    562        if (nc->peer) {
    563            qemu_flush_or_purge_queued_packets(nc->peer, true);
    564            assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
    565        }
    566    }
    567}
    568
    569static void peer_test_vnet_hdr(VirtIONet *n)
    570{
    571    NetClientState *nc = qemu_get_queue(n->nic);
    572    if (!nc->peer) {
    573        return;
    574    }
    575
    576    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
    577}
    578
    579static int peer_has_vnet_hdr(VirtIONet *n)
    580{
    581    return n->has_vnet_hdr;
    582}
    583
    584static int peer_has_ufo(VirtIONet *n)
    585{
    586    if (!peer_has_vnet_hdr(n))
    587        return 0;
    588
    589    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
    590
    591    return n->has_ufo;
    592}
    593
    594static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
    595                                       int version_1, int hash_report)
    596{
    597    int i;
    598    NetClientState *nc;
    599
    600    n->mergeable_rx_bufs = mergeable_rx_bufs;
    601
    602    if (version_1) {
    603        n->guest_hdr_len = hash_report ?
    604            sizeof(struct virtio_net_hdr_v1_hash) :
    605            sizeof(struct virtio_net_hdr_mrg_rxbuf);
    606        n->rss_data.populate_hash = !!hash_report;
    607    } else {
    608        n->guest_hdr_len = n->mergeable_rx_bufs ?
    609            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
    610            sizeof(struct virtio_net_hdr);
    611    }
    612
    613    for (i = 0; i < n->max_queues; i++) {
    614        nc = qemu_get_subqueue(n->nic, i);
    615
    616        if (peer_has_vnet_hdr(n) &&
    617            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
    618            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
    619            n->host_hdr_len = n->guest_hdr_len;
    620        }
    621    }
    622}
    623
    624static int virtio_net_max_tx_queue_size(VirtIONet *n)
    625{
    626    NetClientState *peer = n->nic_conf.peers.ncs[0];
    627
    628    /*
    629     * Backends other than vhost-user don't support max queue size.
    630     */
    631    if (!peer) {
    632        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
    633    }
    634
    635    if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
    636        return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
    637    }
    638
    639    return VIRTQUEUE_MAX_SIZE;
    640}
    641
    642static int peer_attach(VirtIONet *n, int index)
    643{
    644    NetClientState *nc = qemu_get_subqueue(n->nic, index);
    645
    646    if (!nc->peer) {
    647        return 0;
    648    }
    649
    650    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
    651        vhost_set_vring_enable(nc->peer, 1);
    652    }
    653
    654    if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
    655        return 0;
    656    }
    657
    658    if (n->max_queues == 1) {
    659        return 0;
    660    }
    661
    662    return tap_enable(nc->peer);
    663}
    664
    665static int peer_detach(VirtIONet *n, int index)
    666{
    667    NetClientState *nc = qemu_get_subqueue(n->nic, index);
    668
    669    if (!nc->peer) {
    670        return 0;
    671    }
    672
    673    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
    674        vhost_set_vring_enable(nc->peer, 0);
    675    }
    676
    677    if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
    678        return 0;
    679    }
    680
    681    return tap_disable(nc->peer);
    682}
    683
    684static void virtio_net_set_queues(VirtIONet *n)
    685{
    686    int i;
    687    int r;
    688
    689    if (n->nic->peer_deleted) {
    690        return;
    691    }
    692
    693    for (i = 0; i < n->max_queues; i++) {
    694        if (i < n->curr_queues) {
    695            r = peer_attach(n, i);
    696            assert(!r);
    697        } else {
    698            r = peer_detach(n, i);
    699            assert(!r);
    700        }
    701    }
    702}
    703
    704static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
    705
    706static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
    707                                        Error **errp)
    708{
    709    VirtIONet *n = VIRTIO_NET(vdev);
    710    NetClientState *nc = qemu_get_queue(n->nic);
    711
    712    /* Firstly sync all virtio-net possible supported features */
    713    features |= n->host_features;
    714
    715    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
    716
    717    if (!peer_has_vnet_hdr(n)) {
    718        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
    719        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
    720        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
    721        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
    722
    723        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
    724        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
    725        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
    726        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
    727
    728        virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
    729    }
    730
    731    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
    732        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
    733        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
    734    }
    735
    736    if (!get_vhost_net(nc->peer)) {
    737        return features;
    738    }
    739
    740    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
    741        virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
    742    }
    743    features = vhost_net_get_features(get_vhost_net(nc->peer), features);
    744    vdev->backend_features = features;
    745
    746    if (n->mtu_bypass_backend &&
    747            (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
    748        features |= (1ULL << VIRTIO_NET_F_MTU);
    749    }
    750
    751    return features;
    752}
    753
    754static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
    755{
    756    uint64_t features = 0;
    757
    758    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
    759     * but also these: */
    760    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
    761    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
    762    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
    763    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
    764    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
    765
    766    return features;
    767}
    768
    769static void virtio_net_apply_guest_offloads(VirtIONet *n)
    770{
    771    qemu_set_offload(qemu_get_queue(n->nic)->peer,
    772            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
    773            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
    774            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
    775            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
    776            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
    777}
    778
    779static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
    780{
    781    static const uint64_t guest_offloads_mask =
    782        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
    783        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
    784        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
    785        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
    786        (1ULL << VIRTIO_NET_F_GUEST_UFO);
    787
    788    return guest_offloads_mask & features;
    789}
    790
    791static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
    792{
    793    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    794    return virtio_net_guest_offloads_by_features(vdev->guest_features);
    795}
    796
    797typedef struct {
    798    VirtIONet *n;
    799    char *id;
    800} FailoverId;
    801
    802/**
    803 * Set the id of the failover primary device
    804 *
    805 * @opaque: FailoverId to setup
    806 * @opts: opts for device we are handling
    807 * @errp: returns an error if this function fails
    808 */
    809static int failover_set_primary(void *opaque, QemuOpts *opts, Error **errp)
    810{
    811    FailoverId *fid = opaque;
    812    const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
    813
    814    if (g_strcmp0(standby_id, fid->n->netclient_name) == 0) {
    815        fid->id = g_strdup(opts->id);
    816        return 1;
    817    }
    818
    819    return 0;
    820}
    821
    822/**
    823 * Find the primary device id for this failover virtio-net
    824 *
    825 * @n: VirtIONet device
    826 * @errp: returns an error if this function fails
    827 */
    828static char *failover_find_primary_device_id(VirtIONet *n)
    829{
    830    Error *err = NULL;
    831    FailoverId fid;
    832
    833    fid.n = n;
    834    if (!qemu_opts_foreach(qemu_find_opts("device"),
    835                           failover_set_primary, &fid, &err)) {
    836        return NULL;
    837    }
    838    return fid.id;
    839}
    840
    841/**
    842 * Find the primary device for this failover virtio-net
    843 *
    844 * @n: VirtIONet device
    845 * @errp: returns an error if this function fails
    846 */
    847static DeviceState *failover_find_primary_device(VirtIONet *n)
    848{
    849    char *id = failover_find_primary_device_id(n);
    850
    851    if (!id) {
    852        return NULL;
    853    }
    854
    855    return qdev_find_recursive(sysbus_get_default(), id);
    856}
    857
    858static void failover_add_primary(VirtIONet *n, Error **errp)
    859{
    860    Error *err = NULL;
    861    QemuOpts *opts;
    862    char *id;
    863    DeviceState *dev = failover_find_primary_device(n);
    864
    865    if (dev) {
    866        return;
    867    }
    868
    869    id = failover_find_primary_device_id(n);
    870    if (!id) {
    871        error_setg(errp, "Primary device not found");
    872        error_append_hint(errp, "Virtio-net failover will not work. Make "
    873                          "sure primary device has parameter"
    874                          " failover_pair_id=%s\n", n->netclient_name);
    875        return;
    876    }
    877    opts = qemu_opts_find(qemu_find_opts("device"), id);
    878    g_assert(opts); /* cannot be NULL because id was found using opts list */
    879    dev = qdev_device_add(opts, &err);
    880    if (err) {
    881        qemu_opts_del(opts);
    882    } else {
    883        object_unref(OBJECT(dev));
    884    }
    885    error_propagate(errp, err);
    886}
    887
    888static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
    889{
    890    VirtIONet *n = VIRTIO_NET(vdev);
    891    Error *err = NULL;
    892    int i;
    893
    894    if (n->mtu_bypass_backend &&
    895            !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
    896        features &= ~(1ULL << VIRTIO_NET_F_MTU);
    897    }
    898
    899    virtio_net_set_multiqueue(n,
    900                              virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
    901                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
    902
    903    virtio_net_set_mrg_rx_bufs(n,
    904                               virtio_has_feature(features,
    905                                                  VIRTIO_NET_F_MRG_RXBUF),
    906                               virtio_has_feature(features,
    907                                                  VIRTIO_F_VERSION_1),
    908                               virtio_has_feature(features,
    909                                                  VIRTIO_NET_F_HASH_REPORT));
    910
    911    n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
    912        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
    913    n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
    914        virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
    915    n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
    916
    917    if (n->has_vnet_hdr) {
    918        n->curr_guest_offloads =
    919            virtio_net_guest_offloads_by_features(features);
    920        virtio_net_apply_guest_offloads(n);
    921    }
    922
    923    for (i = 0;  i < n->max_queues; i++) {
    924        NetClientState *nc = qemu_get_subqueue(n->nic, i);
    925
    926        if (!get_vhost_net(nc->peer)) {
    927            continue;
    928        }
    929        vhost_net_ack_features(get_vhost_net(nc->peer), features);
    930    }
    931
    932    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
    933        memset(n->vlans, 0, MAX_VLAN >> 3);
    934    } else {
    935        memset(n->vlans, 0xff, MAX_VLAN >> 3);
    936    }
    937
    938    if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
    939        qapi_event_send_failover_negotiated(n->netclient_name);
    940        qatomic_set(&n->failover_primary_hidden, false);
    941        failover_add_primary(n, &err);
    942        if (err) {
    943            warn_report_err(err);
    944        }
    945    }
    946}
    947
    948static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
    949                                     struct iovec *iov, unsigned int iov_cnt)
    950{
    951    uint8_t on;
    952    size_t s;
    953    NetClientState *nc = qemu_get_queue(n->nic);
    954
    955    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
    956    if (s != sizeof(on)) {
    957        return VIRTIO_NET_ERR;
    958    }
    959
    960    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
    961        n->promisc = on;
    962    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
    963        n->allmulti = on;
    964    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
    965        n->alluni = on;
    966    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
    967        n->nomulti = on;
    968    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
    969        n->nouni = on;
    970    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
    971        n->nobcast = on;
    972    } else {
    973        return VIRTIO_NET_ERR;
    974    }
    975
    976    rxfilter_notify(nc);
    977
    978    return VIRTIO_NET_OK;
    979}
    980
    981static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
    982                                     struct iovec *iov, unsigned int iov_cnt)
    983{
    984    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    985    uint64_t offloads;
    986    size_t s;
    987
    988    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
    989        return VIRTIO_NET_ERR;
    990    }
    991
    992    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
    993    if (s != sizeof(offloads)) {
    994        return VIRTIO_NET_ERR;
    995    }
    996
    997    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
    998        uint64_t supported_offloads;
    999
   1000        offloads = virtio_ldq_p(vdev, &offloads);
   1001
   1002        if (!n->has_vnet_hdr) {
   1003            return VIRTIO_NET_ERR;
   1004        }
   1005
   1006        n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
   1007            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
   1008        n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
   1009            virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
   1010        virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
   1011
   1012        supported_offloads = virtio_net_supported_guest_offloads(n);
   1013        if (offloads & ~supported_offloads) {
   1014            return VIRTIO_NET_ERR;
   1015        }
   1016
   1017        n->curr_guest_offloads = offloads;
   1018        virtio_net_apply_guest_offloads(n);
   1019
   1020        return VIRTIO_NET_OK;
   1021    } else {
   1022        return VIRTIO_NET_ERR;
   1023    }
   1024}
   1025
   1026static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
   1027                                 struct iovec *iov, unsigned int iov_cnt)
   1028{
   1029    VirtIODevice *vdev = VIRTIO_DEVICE(n);
   1030    struct virtio_net_ctrl_mac mac_data;
   1031    size_t s;
   1032    NetClientState *nc = qemu_get_queue(n->nic);
   1033
   1034    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
   1035        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
   1036            return VIRTIO_NET_ERR;
   1037        }
   1038        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
   1039        assert(s == sizeof(n->mac));
   1040        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
   1041        rxfilter_notify(nc);
   1042
   1043        return VIRTIO_NET_OK;
   1044    }
   1045
   1046    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
   1047        return VIRTIO_NET_ERR;
   1048    }
   1049
   1050    int in_use = 0;
   1051    int first_multi = 0;
   1052    uint8_t uni_overflow = 0;
   1053    uint8_t multi_overflow = 0;
   1054    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
   1055
   1056    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
   1057                   sizeof(mac_data.entries));
   1058    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
   1059    if (s != sizeof(mac_data.entries)) {
   1060        goto error;
   1061    }
   1062    iov_discard_front(&iov, &iov_cnt, s);
   1063
   1064    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
   1065        goto error;
   1066    }
   1067
   1068    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
   1069        s = iov_to_buf(iov, iov_cnt, 0, macs,
   1070                       mac_data.entries * ETH_ALEN);
   1071        if (s != mac_data.entries * ETH_ALEN) {
   1072            goto error;
   1073        }
   1074        in_use += mac_data.entries;
   1075    } else {
   1076        uni_overflow = 1;
   1077    }
   1078
   1079    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
   1080
   1081    first_multi = in_use;
   1082
   1083    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
   1084                   sizeof(mac_data.entries));
   1085    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
   1086    if (s != sizeof(mac_data.entries)) {
   1087        goto error;
   1088    }
   1089
   1090    iov_discard_front(&iov, &iov_cnt, s);
   1091
   1092    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
   1093        goto error;
   1094    }
   1095
   1096    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
   1097        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
   1098                       mac_data.entries * ETH_ALEN);
   1099        if (s != mac_data.entries * ETH_ALEN) {
   1100            goto error;
   1101        }
   1102        in_use += mac_data.entries;
   1103    } else {
   1104        multi_overflow = 1;
   1105    }
   1106
   1107    n->mac_table.in_use = in_use;
   1108    n->mac_table.first_multi = first_multi;
   1109    n->mac_table.uni_overflow = uni_overflow;
   1110    n->mac_table.multi_overflow = multi_overflow;
   1111    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
   1112    g_free(macs);
   1113    rxfilter_notify(nc);
   1114
   1115    return VIRTIO_NET_OK;
   1116
   1117error:
   1118    g_free(macs);
   1119    return VIRTIO_NET_ERR;
   1120}
   1121
   1122static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
   1123                                        struct iovec *iov, unsigned int iov_cnt)
   1124{
   1125    VirtIODevice *vdev = VIRTIO_DEVICE(n);
   1126    uint16_t vid;
   1127    size_t s;
   1128    NetClientState *nc = qemu_get_queue(n->nic);
   1129
   1130    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
   1131    vid = virtio_lduw_p(vdev, &vid);
   1132    if (s != sizeof(vid)) {
   1133        return VIRTIO_NET_ERR;
   1134    }
   1135
   1136    if (vid >= MAX_VLAN)
   1137        return VIRTIO_NET_ERR;
   1138
   1139    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
   1140        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
   1141    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
   1142        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
   1143    else
   1144        return VIRTIO_NET_ERR;
   1145
   1146    rxfilter_notify(nc);
   1147
   1148    return VIRTIO_NET_OK;
   1149}
   1150
   1151static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
   1152                                      struct iovec *iov, unsigned int iov_cnt)
   1153{
   1154    trace_virtio_net_handle_announce(n->announce_timer.round);
   1155    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
   1156        n->status & VIRTIO_NET_S_ANNOUNCE) {
   1157        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
   1158        if (n->announce_timer.round) {
   1159            qemu_announce_timer_step(&n->announce_timer);
   1160        }
   1161        return VIRTIO_NET_OK;
   1162    } else {
   1163        return VIRTIO_NET_ERR;
   1164    }
   1165}
   1166
   1167static void virtio_net_detach_epbf_rss(VirtIONet *n);
   1168
   1169static void virtio_net_disable_rss(VirtIONet *n)
   1170{
   1171    if (n->rss_data.enabled) {
   1172        trace_virtio_net_rss_disable();
   1173    }
   1174    n->rss_data.enabled = false;
   1175
   1176    virtio_net_detach_epbf_rss(n);
   1177}
   1178
   1179static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
   1180{
   1181    NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
   1182    if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
   1183        return false;
   1184    }
   1185
   1186    return nc->info->set_steering_ebpf(nc, prog_fd);
   1187}
   1188
   1189static void rss_data_to_rss_config(struct VirtioNetRssData *data,
   1190                                   struct EBPFRSSConfig *config)
   1191{
   1192    config->redirect = data->redirect;
   1193    config->populate_hash = data->populate_hash;
   1194    config->hash_types = data->hash_types;
   1195    config->indirections_len = data->indirections_len;
   1196    config->default_queue = data->default_queue;
   1197}
   1198
   1199static bool virtio_net_attach_epbf_rss(VirtIONet *n)
   1200{
   1201    struct EBPFRSSConfig config = {};
   1202
   1203    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
   1204        return false;
   1205    }
   1206
   1207    rss_data_to_rss_config(&n->rss_data, &config);
   1208
   1209    if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
   1210                          n->rss_data.indirections_table, n->rss_data.key)) {
   1211        return false;
   1212    }
   1213
   1214    if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
   1215        return false;
   1216    }
   1217
   1218    return true;
   1219}
   1220
   1221static void virtio_net_detach_epbf_rss(VirtIONet *n)
   1222{
   1223    virtio_net_attach_ebpf_to_backend(n->nic, -1);
   1224}
   1225
   1226static bool virtio_net_load_ebpf(VirtIONet *n)
   1227{
   1228    if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
   1229        /* backend does't support steering ebpf */
   1230        return false;
   1231    }
   1232
   1233    return ebpf_rss_load(&n->ebpf_rss);
   1234}
   1235
   1236static void virtio_net_unload_ebpf(VirtIONet *n)
   1237{
   1238    virtio_net_attach_ebpf_to_backend(n->nic, -1);
   1239    ebpf_rss_unload(&n->ebpf_rss);
   1240}
   1241
   1242static uint16_t virtio_net_handle_rss(VirtIONet *n,
   1243                                      struct iovec *iov,
   1244                                      unsigned int iov_cnt,
   1245                                      bool do_rss)
   1246{
   1247    VirtIODevice *vdev = VIRTIO_DEVICE(n);
   1248    struct virtio_net_rss_config cfg;
   1249    size_t s, offset = 0, size_get;
   1250    uint16_t queues, i;
   1251    struct {
   1252        uint16_t us;
   1253        uint8_t b;
   1254    } QEMU_PACKED temp;
   1255    const char *err_msg = "";
   1256    uint32_t err_value = 0;
   1257
   1258    if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
   1259        err_msg = "RSS is not negotiated";
   1260        goto error;
   1261    }
   1262    if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
   1263        err_msg = "Hash report is not negotiated";
   1264        goto error;
   1265    }
   1266    size_get = offsetof(struct virtio_net_rss_config, indirection_table);
   1267    s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
   1268    if (s != size_get) {
   1269        err_msg = "Short command buffer";
   1270        err_value = (uint32_t)s;
   1271        goto error;
   1272    }
   1273    n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
   1274    n->rss_data.indirections_len =
   1275        virtio_lduw_p(vdev, &cfg.indirection_table_mask);
   1276    n->rss_data.indirections_len++;
   1277    if (!do_rss) {
   1278        n->rss_data.indirections_len = 1;
   1279    }
   1280    if (!is_power_of_2(n->rss_data.indirections_len)) {
   1281        err_msg = "Invalid size of indirection table";
   1282        err_value = n->rss_data.indirections_len;
   1283        goto error;
   1284    }
   1285    if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
   1286        err_msg = "Too large indirection table";
   1287        err_value = n->rss_data.indirections_len;
   1288        goto error;
   1289    }
   1290    n->rss_data.default_queue = do_rss ?
   1291        virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
   1292    if (n->rss_data.default_queue >= n->max_queues) {
   1293        err_msg = "Invalid default queue";
   1294        err_value = n->rss_data.default_queue;
   1295        goto error;
   1296    }
   1297    offset += size_get;
   1298    size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
   1299    g_free(n->rss_data.indirections_table);
   1300    n->rss_data.indirections_table = g_malloc(size_get);
   1301    if (!n->rss_data.indirections_table) {
   1302        err_msg = "Can't allocate indirections table";
   1303        err_value = n->rss_data.indirections_len;
   1304        goto error;
   1305    }
   1306    s = iov_to_buf(iov, iov_cnt, offset,
   1307                   n->rss_data.indirections_table, size_get);
   1308    if (s != size_get) {
   1309        err_msg = "Short indirection table buffer";
   1310        err_value = (uint32_t)s;
   1311        goto error;
   1312    }
   1313    for (i = 0; i < n->rss_data.indirections_len; ++i) {
   1314        uint16_t val = n->rss_data.indirections_table[i];
   1315        n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
   1316    }
   1317    offset += size_get;
   1318    size_get = sizeof(temp);
   1319    s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
   1320    if (s != size_get) {
   1321        err_msg = "Can't get queues";
   1322        err_value = (uint32_t)s;
   1323        goto error;
   1324    }
   1325    queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
   1326    if (queues == 0 || queues > n->max_queues) {
   1327        err_msg = "Invalid number of queues";
   1328        err_value = queues;
   1329        goto error;
   1330    }
   1331    if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
   1332        err_msg = "Invalid key size";
   1333        err_value = temp.b;
   1334        goto error;
   1335    }
   1336    if (!temp.b && n->rss_data.hash_types) {
   1337        err_msg = "No key provided";
   1338        err_value = 0;
   1339        goto error;
   1340    }
   1341    if (!temp.b && !n->rss_data.hash_types) {
   1342        virtio_net_disable_rss(n);
   1343        return queues;
   1344    }
   1345    offset += size_get;
   1346    size_get = temp.b;
   1347    s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
   1348    if (s != size_get) {
   1349        err_msg = "Can get key buffer";
   1350        err_value = (uint32_t)s;
   1351        goto error;
   1352    }
   1353    n->rss_data.enabled = true;
   1354
   1355    if (!n->rss_data.populate_hash) {
   1356        if (!virtio_net_attach_epbf_rss(n)) {
   1357            /* EBPF must be loaded for vhost */
   1358            if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
   1359                warn_report("Can't load eBPF RSS for vhost");
   1360                goto error;
   1361            }
   1362            /* fallback to software RSS */
   1363            warn_report("Can't load eBPF RSS - fallback to software RSS");
   1364            n->rss_data.enabled_software_rss = true;
   1365        }
   1366    } else {
   1367        /* use software RSS for hash populating */
   1368        /* and detach eBPF if was loaded before */
   1369        virtio_net_detach_epbf_rss(n);
   1370        n->rss_data.enabled_software_rss = true;
   1371    }
   1372
   1373    trace_virtio_net_rss_enable(n->rss_data.hash_types,
   1374                                n->rss_data.indirections_len,
   1375                                temp.b);
   1376    return queues;
   1377error:
   1378    trace_virtio_net_rss_error(err_msg, err_value);
   1379    virtio_net_disable_rss(n);
   1380    return 0;
   1381}
   1382
   1383static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
   1384                                struct iovec *iov, unsigned int iov_cnt)
   1385{
   1386    VirtIODevice *vdev = VIRTIO_DEVICE(n);
   1387    uint16_t queues;
   1388
   1389    virtio_net_disable_rss(n);
   1390    if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
   1391        queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
   1392        return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
   1393    }
   1394    if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
   1395        queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
   1396    } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
   1397        struct virtio_net_ctrl_mq mq;
   1398        size_t s;
   1399        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
   1400            return VIRTIO_NET_ERR;
   1401        }
   1402        s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
   1403        if (s != sizeof(mq)) {
   1404            return VIRTIO_NET_ERR;
   1405        }
   1406        queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
   1407
   1408    } else {
   1409        return VIRTIO_NET_ERR;
   1410    }
   1411
   1412    if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
   1413        queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
   1414        queues > n->max_queues ||
   1415        !n->multiqueue) {
   1416        return VIRTIO_NET_ERR;
   1417    }
   1418
   1419    n->curr_queues = queues;
   1420    /* stop the backend before changing the number of queues to avoid handling a
   1421     * disabled queue */
   1422    virtio_net_set_status(vdev, vdev->status);
   1423    virtio_net_set_queues(n);
   1424
   1425    return VIRTIO_NET_OK;
   1426}
   1427
   1428static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
   1429{
   1430    VirtIONet *n = VIRTIO_NET(vdev);
   1431    struct virtio_net_ctrl_hdr ctrl;
   1432    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
   1433    VirtQueueElement *elem;
   1434    size_t s;
   1435    struct iovec *iov, *iov2;
   1436    unsigned int iov_cnt;
   1437
   1438    for (;;) {
   1439        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
   1440        if (!elem) {
   1441            break;
   1442        }
   1443        if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
   1444            iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
   1445            virtio_error(vdev, "virtio-net ctrl missing headers");
   1446            virtqueue_detach_element(vq, elem, 0);
   1447            g_free(elem);
   1448            break;
   1449        }
   1450
   1451        iov_cnt = elem->out_num;
   1452        iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
   1453        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
   1454        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
   1455        if (s != sizeof(ctrl)) {
   1456            status = VIRTIO_NET_ERR;
   1457        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
   1458            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
   1459        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
   1460            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
   1461        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
   1462            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
   1463        } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
   1464            status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
   1465        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
   1466            status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
   1467        } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
   1468            status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
   1469        }
   1470
   1471        s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
   1472        assert(s == sizeof(status));
   1473
   1474        virtqueue_push(vq, elem, sizeof(status));
   1475        virtio_notify(vdev, vq);
   1476        g_free(iov2);
   1477        g_free(elem);
   1478    }
   1479}
   1480
   1481/* RX */
   1482
   1483static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
   1484{
   1485    VirtIONet *n = VIRTIO_NET(vdev);
   1486    int queue_index = vq2q(virtio_get_queue_index(vq));
   1487
   1488    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
   1489}
   1490
   1491static bool virtio_net_can_receive(NetClientState *nc)
   1492{
   1493    VirtIONet *n = qemu_get_nic_opaque(nc);
   1494    VirtIODevice *vdev = VIRTIO_DEVICE(n);
   1495    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
   1496
   1497    if (!vdev->vm_running) {
   1498        return false;
   1499    }
   1500
   1501    if (nc->queue_index >= n->curr_queues) {
   1502        return false;
   1503    }
   1504
   1505    if (!virtio_queue_ready(q->rx_vq) ||
   1506        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
   1507        return false;
   1508    }
   1509
   1510    return true;
   1511}
   1512
   1513static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
   1514{
   1515    VirtIONet *n = q->n;
   1516    if (virtio_queue_empty(q->rx_vq) ||
   1517        (n->mergeable_rx_bufs &&
   1518         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
   1519        virtio_queue_set_notification(q->rx_vq, 1);
   1520
   1521        /* To avoid a race condition where the guest has made some buffers
   1522         * available after the above check but before notification was
   1523         * enabled, check for available buffers again.
   1524         */
   1525        if (virtio_queue_empty(q->rx_vq) ||
   1526            (n->mergeable_rx_bufs &&
   1527             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
   1528            return 0;
   1529        }
   1530    }
   1531
   1532    virtio_queue_set_notification(q->rx_vq, 0);
   1533    return 1;
   1534}
   1535
   1536static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
   1537{
   1538    virtio_tswap16s(vdev, &hdr->hdr_len);
   1539    virtio_tswap16s(vdev, &hdr->gso_size);
   1540    virtio_tswap16s(vdev, &hdr->csum_start);
   1541    virtio_tswap16s(vdev, &hdr->csum_offset);
   1542}
   1543
   1544/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
   1545 * it never finds out that the packets don't have valid checksums.  This
   1546 * causes dhclient to get upset.  Fedora's carried a patch for ages to
   1547 * fix this with Xen but it hasn't appeared in an upstream release of
   1548 * dhclient yet.
   1549 *
   1550 * To avoid breaking existing guests, we catch udp packets and add
   1551 * checksums.  This is terrible but it's better than hacking the guest
   1552 * kernels.
   1553 *
   1554 * N.B. if we introduce a zero-copy API, this operation is no longer free so
   1555 * we should provide a mechanism to disable it to avoid polluting the host
   1556 * cache.
   1557 */
   1558static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
   1559                                        uint8_t *buf, size_t size)
   1560{
   1561    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
   1562        (size > 27 && size < 1500) && /* normal sized MTU */
   1563        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
   1564        (buf[23] == 17) && /* ip.protocol == UDP */
   1565        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
   1566        net_checksum_calculate(buf, size, CSUM_UDP);
   1567        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
   1568    }
   1569}
   1570
   1571static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
   1572                           const void *buf, size_t size)
   1573{
   1574    if (n->has_vnet_hdr) {
   1575        /* FIXME this cast is evil */
   1576        void *wbuf = (void *)buf;
   1577        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
   1578                                    size - n->host_hdr_len);
   1579
   1580        if (n->needs_vnet_hdr_swap) {
   1581            virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
   1582        }
   1583        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
   1584    } else {
   1585        struct virtio_net_hdr hdr = {
   1586            .flags = 0,
   1587            .gso_type = VIRTIO_NET_HDR_GSO_NONE
   1588        };
   1589        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
   1590    }
   1591}
   1592
   1593static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
   1594{
   1595    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
   1596    static const uint8_t vlan[] = {0x81, 0x00};
   1597    uint8_t *ptr = (uint8_t *)buf;
   1598    int i;
   1599
   1600    if (n->promisc)
   1601        return 1;
   1602
   1603    ptr += n->host_hdr_len;
   1604
   1605    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
   1606        int vid = lduw_be_p(ptr + 14) & 0xfff;
   1607        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
   1608            return 0;
   1609    }
   1610
   1611    if (ptr[0] & 1) { // multicast
   1612        if (!memcmp(ptr, bcast, sizeof(bcast))) {
   1613            return !n->nobcast;
   1614        } else if (n->nomulti) {
   1615            return 0;
   1616        } else if (n->allmulti || n->mac_table.multi_overflow) {
   1617            return 1;
   1618        }
   1619
   1620        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
   1621            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
   1622                return 1;
   1623            }
   1624        }
   1625    } else { // unicast
   1626        if (n->nouni) {
   1627            return 0;
   1628        } else if (n->alluni || n->mac_table.uni_overflow) {
   1629            return 1;
   1630        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
   1631            return 1;
   1632        }
   1633
   1634        for (i = 0; i < n->mac_table.first_multi; i++) {
   1635            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
   1636                return 1;
   1637            }
   1638        }
   1639    }
   1640
   1641    return 0;
   1642}
   1643
   1644static uint8_t virtio_net_get_hash_type(bool isip4,
   1645                                        bool isip6,
   1646                                        bool isudp,
   1647                                        bool istcp,
   1648                                        uint32_t types)
   1649{
   1650    if (isip4) {
   1651        if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
   1652            return NetPktRssIpV4Tcp;
   1653        }
   1654        if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
   1655            return NetPktRssIpV4Udp;
   1656        }
   1657        if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
   1658            return NetPktRssIpV4;
   1659        }
   1660    } else if (isip6) {
   1661        uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
   1662                        VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
   1663
   1664        if (istcp && (types & mask)) {
   1665            return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
   1666                NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
   1667        }
   1668        mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
   1669        if (isudp && (types & mask)) {
   1670            return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
   1671                NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
   1672        }
   1673        mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
   1674        if (types & mask) {
   1675            return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
   1676                NetPktRssIpV6Ex : NetPktRssIpV6;
   1677        }
   1678    }
   1679    return 0xff;
   1680}
   1681
   1682static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
   1683                                   uint32_t hash)
   1684{
   1685    struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
   1686    hdr->hash_value = hash;
   1687    hdr->hash_report = report;
   1688}
   1689
   1690static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
   1691                                  size_t size)
   1692{
   1693    VirtIONet *n = qemu_get_nic_opaque(nc);
   1694    unsigned int index = nc->queue_index, new_index = index;
   1695    struct NetRxPkt *pkt = n->rx_pkt;
   1696    uint8_t net_hash_type;
   1697    uint32_t hash;
   1698    bool isip4, isip6, isudp, istcp;
   1699    static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
   1700        VIRTIO_NET_HASH_REPORT_IPv4,
   1701        VIRTIO_NET_HASH_REPORT_TCPv4,
   1702        VIRTIO_NET_HASH_REPORT_TCPv6,
   1703        VIRTIO_NET_HASH_REPORT_IPv6,
   1704        VIRTIO_NET_HASH_REPORT_IPv6_EX,
   1705        VIRTIO_NET_HASH_REPORT_TCPv6_EX,
   1706        VIRTIO_NET_HASH_REPORT_UDPv4,
   1707        VIRTIO_NET_HASH_REPORT_UDPv6,
   1708        VIRTIO_NET_HASH_REPORT_UDPv6_EX
   1709    };
   1710
   1711    net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
   1712                             size - n->host_hdr_len);
   1713    net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
   1714    if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
   1715        istcp = isudp = false;
   1716    }
   1717    if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
   1718        istcp = isudp = false;
   1719    }
   1720    net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
   1721                                             n->rss_data.hash_types);
   1722    if (net_hash_type > NetPktRssIpV6UdpEx) {
   1723        if (n->rss_data.populate_hash) {
   1724            virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
   1725        }
   1726        return n->rss_data.redirect ? n->rss_data.default_queue : -1;
   1727    }
   1728
   1729    hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
   1730
   1731    if (n->rss_data.populate_hash) {
   1732        virtio_set_packet_hash(buf, reports[net_hash_type], hash);
   1733    }
   1734
   1735    if (n->rss_data.redirect) {
   1736        new_index = hash & (n->rss_data.indirections_len - 1);
   1737        new_index = n->rss_data.indirections_table[new_index];
   1738    }
   1739
   1740    return (index == new_index) ? -1 : new_index;
   1741}
   1742
   1743static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
   1744                                      size_t size, bool no_rss)
   1745{
   1746    VirtIONet *n = qemu_get_nic_opaque(nc);
   1747    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
   1748    VirtIODevice *vdev = VIRTIO_DEVICE(n);
   1749    VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
   1750    size_t lens[VIRTQUEUE_MAX_SIZE];
   1751    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
   1752    struct virtio_net_hdr_mrg_rxbuf mhdr;
   1753    unsigned mhdr_cnt = 0;
   1754    size_t offset, i, guest_offset, j;
   1755    ssize_t err;
   1756
   1757    if (!virtio_net_can_receive(nc)) {
   1758        return -1;
   1759    }
   1760
   1761    if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
   1762        int index = virtio_net_process_rss(nc, buf, size);
   1763        if (index >= 0) {
   1764            NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
   1765            return virtio_net_receive_rcu(nc2, buf, size, true);
   1766        }
   1767    }
   1768
   1769    /* hdr_len refers to the header we supply to the guest */
   1770    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
   1771        return 0;
   1772    }
   1773
   1774    if (!receive_filter(n, buf, size))
   1775        return size;
   1776
   1777    offset = i = 0;
   1778
   1779    while (offset < size) {
   1780        VirtQueueElement *elem;
   1781        int len, total;
   1782        const struct iovec *sg;
   1783
   1784        total = 0;
   1785
   1786        if (i == VIRTQUEUE_MAX_SIZE) {
   1787            virtio_error(vdev, "virtio-net unexpected long buffer chain");
   1788            err = size;
   1789            goto err;
   1790        }
   1791
   1792        elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
   1793        if (!elem) {
   1794            if (i) {
   1795                virtio_error(vdev, "virtio-net unexpected empty queue: "
   1796                             "i %zd mergeable %d offset %zd, size %zd, "
   1797                             "guest hdr len %zd, host hdr len %zd "
   1798                             "guest features 0x%" PRIx64,
   1799                             i, n->mergeable_rx_bufs, offset, size,
   1800                             n->guest_hdr_len, n->host_hdr_len,
   1801                             vdev->guest_features);
   1802            }
   1803            err = -1;
   1804            goto err;
   1805        }
   1806
   1807        if (elem->in_num < 1) {
   1808            virtio_error(vdev,
   1809                         "virtio-net receive queue contains no in buffers");
   1810            virtqueue_detach_element(q->rx_vq, elem, 0);
   1811            g_free(elem);
   1812            err = -1;
   1813            goto err;
   1814        }
   1815
   1816        sg = elem->in_sg;
   1817        if (i == 0) {
   1818            assert(offset == 0);
   1819            if (n->mergeable_rx_bufs) {
   1820                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
   1821                                    sg, elem->in_num,
   1822                                    offsetof(typeof(mhdr), num_buffers),
   1823                                    sizeof(mhdr.num_buffers));
   1824            }
   1825
   1826            receive_header(n, sg, elem->in_num, buf, size);
   1827            if (n->rss_data.populate_hash) {
   1828                offset = sizeof(mhdr);
   1829                iov_from_buf(sg, elem->in_num, offset,
   1830                             buf + offset, n->host_hdr_len - sizeof(mhdr));
   1831            }
   1832            offset = n->host_hdr_len;
   1833            total += n->guest_hdr_len;
   1834            guest_offset = n->guest_hdr_len;
   1835        } else {
   1836            guest_offset = 0;
   1837        }
   1838
   1839        /* copy in packet.  ugh */
   1840        len = iov_from_buf(sg, elem->in_num, guest_offset,
   1841                           buf + offset, size - offset);
   1842        total += len;
   1843        offset += len;
   1844        /* If buffers can't be merged, at this point we
   1845         * must have consumed the complete packet.
   1846         * Otherwise, drop it. */
   1847        if (!n->mergeable_rx_bufs && offset < size) {
   1848            virtqueue_unpop(q->rx_vq, elem, total);
   1849            g_free(elem);
   1850            err = size;
   1851            goto err;
   1852        }
   1853
   1854        elems[i] = elem;
   1855        lens[i] = total;
   1856        i++;
   1857    }
   1858
   1859    if (mhdr_cnt) {
   1860        virtio_stw_p(vdev, &mhdr.num_buffers, i);
   1861        iov_from_buf(mhdr_sg, mhdr_cnt,
   1862                     0,
   1863                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
   1864    }
   1865
   1866    for (j = 0; j < i; j++) {
   1867        /* signal other side */
   1868        virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
   1869        g_free(elems[j]);
   1870    }
   1871
   1872    virtqueue_flush(q->rx_vq, i);
   1873    virtio_notify(vdev, q->rx_vq);
   1874
   1875    return size;
   1876
   1877err:
   1878    for (j = 0; j < i; j++) {
   1879        g_free(elems[j]);
   1880    }
   1881
   1882    return err;
   1883}
   1884
   1885static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
   1886                                  size_t size)
   1887{
   1888    RCU_READ_LOCK_GUARD();
   1889
   1890    return virtio_net_receive_rcu(nc, buf, size, false);
   1891}
   1892
   1893static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
   1894                                         const uint8_t *buf,
   1895                                         VirtioNetRscUnit *unit)
   1896{
   1897    uint16_t ip_hdrlen;
   1898    struct ip_header *ip;
   1899
   1900    ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
   1901                              + sizeof(struct eth_header));
   1902    unit->ip = (void *)ip;
   1903    ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
   1904    unit->ip_plen = &ip->ip_len;
   1905    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
   1906    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
   1907    unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
   1908}
   1909
   1910static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
   1911                                         const uint8_t *buf,
   1912                                         VirtioNetRscUnit *unit)
   1913{
   1914    struct ip6_header *ip6;
   1915
   1916    ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
   1917                                 + sizeof(struct eth_header));
   1918    unit->ip = ip6;
   1919    unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
   1920    unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
   1921                                        + sizeof(struct ip6_header));
   1922    unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
   1923
   1924    /* There is a difference between payload lenght in ipv4 and v6,
   1925       ip header is excluded in ipv6 */
   1926    unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
   1927}
   1928
   1929static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
   1930                                       VirtioNetRscSeg *seg)
   1931{
   1932    int ret;
   1933    struct virtio_net_hdr_v1 *h;
   1934
   1935    h = (struct virtio_net_hdr_v1 *)seg->buf;
   1936    h->flags = 0;
   1937    h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
   1938
   1939    if (seg->is_coalesced) {
   1940        h->rsc.segments = seg->packets;
   1941        h->rsc.dup_acks = seg->dup_ack;
   1942        h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
   1943        if (chain->proto == ETH_P_IP) {
   1944            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
   1945        } else {
   1946            h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
   1947        }
   1948    }
   1949
   1950    ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
   1951    QTAILQ_REMOVE(&chain->buffers, seg, next);
   1952    g_free(seg->buf);
   1953    g_free(seg);
   1954
   1955    return ret;
   1956}
   1957
   1958static void virtio_net_rsc_purge(void *opq)
   1959{
   1960    VirtioNetRscSeg *seg, *rn;
   1961    VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
   1962
   1963    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
   1964        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
   1965            chain->stat.purge_failed++;
   1966            continue;
   1967        }
   1968    }
   1969
   1970    chain->stat.timer++;
   1971    if (!QTAILQ_EMPTY(&chain->buffers)) {
   1972        timer_mod(chain->drain_timer,
   1973              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
   1974    }
   1975}
   1976
   1977static void virtio_net_rsc_cleanup(VirtIONet *n)
   1978{
   1979    VirtioNetRscChain *chain, *rn_chain;
   1980    VirtioNetRscSeg *seg, *rn_seg;
   1981
   1982    QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
   1983        QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
   1984            QTAILQ_REMOVE(&chain->buffers, seg, next);
   1985            g_free(seg->buf);
   1986            g_free(seg);
   1987        }
   1988
   1989        timer_free(chain->drain_timer);
   1990        QTAILQ_REMOVE(&n->rsc_chains, chain, next);
   1991        g_free(chain);
   1992    }
   1993}
   1994
   1995static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
   1996                                     NetClientState *nc,
   1997                                     const uint8_t *buf, size_t size)
   1998{
   1999    uint16_t hdr_len;
   2000    VirtioNetRscSeg *seg;
   2001
   2002    hdr_len = chain->n->guest_hdr_len;
   2003    seg = g_malloc(sizeof(VirtioNetRscSeg));
   2004    seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
   2005        + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
   2006    memcpy(seg->buf, buf, size);
   2007    seg->size = size;
   2008    seg->packets = 1;
   2009    seg->dup_ack = 0;
   2010    seg->is_coalesced = 0;
   2011    seg->nc = nc;
   2012
   2013    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
   2014    chain->stat.cache++;
   2015
   2016    switch (chain->proto) {
   2017    case ETH_P_IP:
   2018        virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
   2019        break;
   2020    case ETH_P_IPV6:
   2021        virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
   2022        break;
   2023    default:
   2024        g_assert_not_reached();
   2025    }
   2026}
   2027
   2028static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
   2029                                         VirtioNetRscSeg *seg,
   2030                                         const uint8_t *buf,
   2031                                         struct tcp_header *n_tcp,
   2032                                         struct tcp_header *o_tcp)
   2033{
   2034    uint32_t nack, oack;
   2035    uint16_t nwin, owin;
   2036
   2037    nack = htonl(n_tcp->th_ack);
   2038    nwin = htons(n_tcp->th_win);
   2039    oack = htonl(o_tcp->th_ack);
   2040    owin = htons(o_tcp->th_win);
   2041
   2042    if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
   2043        chain->stat.ack_out_of_win++;
   2044        return RSC_FINAL;
   2045    } else if (nack == oack) {
   2046        /* duplicated ack or window probe */
   2047        if (nwin == owin) {
   2048            /* duplicated ack, add dup ack count due to whql test up to 1 */
   2049            chain->stat.dup_ack++;
   2050            return RSC_FINAL;
   2051        } else {
   2052            /* Coalesce window update */
   2053            o_tcp->th_win = n_tcp->th_win;
   2054            chain->stat.win_update++;
   2055            return RSC_COALESCE;
   2056        }
   2057    } else {
   2058        /* pure ack, go to 'C', finalize*/
   2059        chain->stat.pure_ack++;
   2060        return RSC_FINAL;
   2061    }
   2062}
   2063
   2064static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
   2065                                            VirtioNetRscSeg *seg,
   2066                                            const uint8_t *buf,
   2067                                            VirtioNetRscUnit *n_unit)
   2068{
   2069    void *data;
   2070    uint16_t o_ip_len;
   2071    uint32_t nseq, oseq;
   2072    VirtioNetRscUnit *o_unit;
   2073
   2074    o_unit = &seg->unit;
   2075    o_ip_len = htons(*o_unit->ip_plen);
   2076    nseq = htonl(n_unit->tcp->th_seq);
   2077    oseq = htonl(o_unit->tcp->th_seq);
   2078
   2079    /* out of order or retransmitted. */
   2080    if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
   2081        chain->stat.data_out_of_win++;
   2082        return RSC_FINAL;
   2083    }
   2084
   2085    data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
   2086    if (nseq == oseq) {
   2087        if ((o_unit->payload == 0) && n_unit->payload) {
   2088            /* From no payload to payload, normal case, not a dup ack or etc */
   2089            chain->stat.data_after_pure_ack++;
   2090            goto coalesce;
   2091        } else {
   2092            return virtio_net_rsc_handle_ack(chain, seg, buf,
   2093                                             n_unit->tcp, o_unit->tcp);
   2094        }
   2095    } else if ((nseq - oseq) != o_unit->payload) {
   2096        /* Not a consistent packet, out of order */
   2097        chain->stat.data_out_of_order++;
   2098        return RSC_FINAL;
   2099    } else {
   2100coalesce:
   2101        if ((o_ip_len + n_unit->payload) > chain->max_payload) {
   2102            chain->stat.over_size++;
   2103            return RSC_FINAL;
   2104        }
   2105
   2106        /* Here comes the right data, the payload length in v4/v6 is different,
   2107           so use the field value to update and record the new data len */
   2108        o_unit->payload += n_unit->payload; /* update new data len */
   2109
   2110        /* update field in ip header */
   2111        *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
   2112
   2113        /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
   2114           for windows guest, while this may change the behavior for linux
   2115           guest (only if it uses RSC feature). */
   2116        o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
   2117
   2118        o_unit->tcp->th_ack = n_unit->tcp->th_ack;
   2119        o_unit->tcp->th_win = n_unit->tcp->th_win;
   2120
   2121        memmove(seg->buf + seg->size, data, n_unit->payload);
   2122        seg->size += n_unit->payload;
   2123        seg->packets++;
   2124        chain->stat.coalesced++;
   2125        return RSC_COALESCE;
   2126    }
   2127}
   2128
   2129static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
   2130                                        VirtioNetRscSeg *seg,
   2131                                        const uint8_t *buf, size_t size,
   2132                                        VirtioNetRscUnit *unit)
   2133{
   2134    struct ip_header *ip1, *ip2;
   2135
   2136    ip1 = (struct ip_header *)(unit->ip);
   2137    ip2 = (struct ip_header *)(seg->unit.ip);
   2138    if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
   2139        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
   2140        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
   2141        chain->stat.no_match++;
   2142        return RSC_NO_MATCH;
   2143    }
   2144
   2145    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
   2146}
   2147
   2148static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
   2149                                        VirtioNetRscSeg *seg,
   2150                                        const uint8_t *buf, size_t size,
   2151                                        VirtioNetRscUnit *unit)
   2152{
   2153    struct ip6_header *ip1, *ip2;
   2154
   2155    ip1 = (struct ip6_header *)(unit->ip);
   2156    ip2 = (struct ip6_header *)(seg->unit.ip);
   2157    if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
   2158        || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
   2159        || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
   2160        || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
   2161            chain->stat.no_match++;
   2162            return RSC_NO_MATCH;
   2163    }
   2164
   2165    return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
   2166}
   2167
   2168/* Packets with 'SYN' should bypass, other flag should be sent after drain
   2169 * to prevent out of order */
   2170static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
   2171                                         struct tcp_header *tcp)
   2172{
   2173    uint16_t tcp_hdr;
   2174    uint16_t tcp_flag;
   2175
   2176    tcp_flag = htons(tcp->th_offset_flags);
   2177    tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
   2178    tcp_flag &= VIRTIO_NET_TCP_FLAG;
   2179    if (tcp_flag & TH_SYN) {
   2180        chain->stat.tcp_syn++;
   2181        return RSC_BYPASS;
   2182    }
   2183
   2184    if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
   2185        chain->stat.tcp_ctrl_drain++;
   2186        return RSC_FINAL;
   2187    }
   2188
   2189    if (tcp_hdr > sizeof(struct tcp_header)) {
   2190        chain->stat.tcp_all_opt++;
   2191        return RSC_FINAL;
   2192    }
   2193
   2194    return RSC_CANDIDATE;
   2195}
   2196
   2197static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
   2198                                         NetClientState *nc,
   2199                                         const uint8_t *buf, size_t size,
   2200                                         VirtioNetRscUnit *unit)
   2201{
   2202    int ret;
   2203    VirtioNetRscSeg *seg, *nseg;
   2204
   2205    if (QTAILQ_EMPTY(&chain->buffers)) {
   2206        chain->stat.empty_cache++;
   2207        virtio_net_rsc_cache_buf(chain, nc, buf, size);
   2208        timer_mod(chain->drain_timer,
   2209              qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
   2210        return size;
   2211    }
   2212
   2213    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
   2214        if (chain->proto == ETH_P_IP) {
   2215            ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
   2216        } else {
   2217            ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
   2218        }
   2219
   2220        if (ret == RSC_FINAL) {
   2221            if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
   2222                /* Send failed */
   2223                chain->stat.final_failed++;
   2224                return 0;
   2225            }
   2226
   2227            /* Send current packet */
   2228            return virtio_net_do_receive(nc, buf, size);
   2229        } else if (ret == RSC_NO_MATCH) {
   2230            continue;
   2231        } else {
   2232            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
   2233            seg->is_coalesced = 1;
   2234            return size;
   2235        }
   2236    }
   2237
   2238    chain->stat.no_match_cache++;
   2239    virtio_net_rsc_cache_buf(chain, nc, buf, size);
   2240    return size;
   2241}
   2242
   2243/* Drain a connection data, this is to avoid out of order segments */
   2244static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
   2245                                        NetClientState *nc,
   2246                                        const uint8_t *buf, size_t size,
   2247                                        uint16_t ip_start, uint16_t ip_size,
   2248                                        uint16_t tcp_port)
   2249{
   2250    VirtioNetRscSeg *seg, *nseg;
   2251    uint32_t ppair1, ppair2;
   2252
   2253    ppair1 = *(uint32_t *)(buf + tcp_port);
   2254    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
   2255        ppair2 = *(uint32_t *)(seg->buf + tcp_port);
   2256        if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
   2257            || (ppair1 != ppair2)) {
   2258            continue;
   2259        }
   2260        if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
   2261            chain->stat.drain_failed++;
   2262        }
   2263
   2264        break;
   2265    }
   2266
   2267    return virtio_net_do_receive(nc, buf, size);
   2268}
   2269
   2270static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
   2271                                            struct ip_header *ip,
   2272                                            const uint8_t *buf, size_t size)
   2273{
   2274    uint16_t ip_len;
   2275
   2276    /* Not an ipv4 packet */
   2277    if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
   2278        chain->stat.ip_option++;
   2279        return RSC_BYPASS;
   2280    }
   2281
   2282    /* Don't handle packets with ip option */
   2283    if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
   2284        chain->stat.ip_option++;
   2285        return RSC_BYPASS;
   2286    }
   2287
   2288    if (ip->ip_p != IPPROTO_TCP) {
   2289        chain->stat.bypass_not_tcp++;
   2290        return RSC_BYPASS;
   2291    }
   2292
   2293    /* Don't handle packets with ip fragment */
   2294    if (!(htons(ip->ip_off) & IP_DF)) {
   2295        chain->stat.ip_frag++;
   2296        return RSC_BYPASS;
   2297    }
   2298
   2299    /* Don't handle packets with ecn flag */
   2300    if (IPTOS_ECN(ip->ip_tos)) {
   2301        chain->stat.ip_ecn++;
   2302        return RSC_BYPASS;
   2303    }
   2304
   2305    ip_len = htons(ip->ip_len);
   2306    if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
   2307        || ip_len > (size - chain->n->guest_hdr_len -
   2308                     sizeof(struct eth_header))) {
   2309        chain->stat.ip_hacked++;
   2310        return RSC_BYPASS;
   2311    }
   2312
   2313    return RSC_CANDIDATE;
   2314}
   2315
   2316static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
   2317                                      NetClientState *nc,
   2318                                      const uint8_t *buf, size_t size)
   2319{
   2320    int32_t ret;
   2321    uint16_t hdr_len;
   2322    VirtioNetRscUnit unit;
   2323
   2324    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
   2325
   2326    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
   2327        + sizeof(struct tcp_header))) {
   2328        chain->stat.bypass_not_tcp++;
   2329        return virtio_net_do_receive(nc, buf, size);
   2330    }
   2331
   2332    virtio_net_rsc_extract_unit4(chain, buf, &unit);
   2333    if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
   2334        != RSC_CANDIDATE) {
   2335        return virtio_net_do_receive(nc, buf, size);
   2336    }
   2337
   2338    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
   2339    if (ret == RSC_BYPASS) {
   2340        return virtio_net_do_receive(nc, buf, size);
   2341    } else if (ret == RSC_FINAL) {
   2342        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
   2343                ((hdr_len + sizeof(struct eth_header)) + 12),
   2344                VIRTIO_NET_IP4_ADDR_SIZE,
   2345                hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
   2346    }
   2347
   2348    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
   2349}
   2350
   2351static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
   2352                                            struct ip6_header *ip6,
   2353                                            const uint8_t *buf, size_t size)
   2354{
   2355    uint16_t ip_len;
   2356
   2357    if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
   2358        != IP_HEADER_VERSION_6) {
   2359        return RSC_BYPASS;
   2360    }
   2361
   2362    /* Both option and protocol is checked in this */
   2363    if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
   2364        chain->stat.bypass_not_tcp++;
   2365        return RSC_BYPASS;
   2366    }
   2367
   2368    ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
   2369    if (ip_len < sizeof(struct tcp_header) ||
   2370        ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
   2371                  - sizeof(struct ip6_header))) {
   2372        chain->stat.ip_hacked++;
   2373        return RSC_BYPASS;
   2374    }
   2375
   2376    /* Don't handle packets with ecn flag */
   2377    if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
   2378        chain->stat.ip_ecn++;
   2379        return RSC_BYPASS;
   2380    }
   2381
   2382    return RSC_CANDIDATE;
   2383}
   2384
   2385static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
   2386                                      const uint8_t *buf, size_t size)
   2387{
   2388    int32_t ret;
   2389    uint16_t hdr_len;
   2390    VirtioNetRscChain *chain;
   2391    VirtioNetRscUnit unit;
   2392
   2393    chain = (VirtioNetRscChain *)opq;
   2394    hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
   2395
   2396    if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
   2397        + sizeof(tcp_header))) {
   2398        return virtio_net_do_receive(nc, buf, size);
   2399    }
   2400
   2401    virtio_net_rsc_extract_unit6(chain, buf, &unit);
   2402    if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
   2403                                                 unit.ip, buf, size)) {
   2404        return virtio_net_do_receive(nc, buf, size);
   2405    }
   2406
   2407    ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
   2408    if (ret == RSC_BYPASS) {
   2409        return virtio_net_do_receive(nc, buf, size);
   2410    } else if (ret == RSC_FINAL) {
   2411        return virtio_net_rsc_drain_flow(chain, nc, buf, size,
   2412                ((hdr_len + sizeof(struct eth_header)) + 8),
   2413                VIRTIO_NET_IP6_ADDR_SIZE,
   2414                hdr_len + sizeof(struct eth_header)
   2415                + sizeof(struct ip6_header));
   2416    }
   2417
   2418    return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
   2419}
   2420
   2421static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
   2422                                                      NetClientState *nc,
   2423                                                      uint16_t proto)
   2424{
   2425    VirtioNetRscChain *chain;
   2426
   2427    if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
   2428        return NULL;
   2429    }
   2430
   2431    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
   2432        if (chain->proto == proto) {
   2433            return chain;
   2434        }
   2435    }
   2436
   2437    chain = g_malloc(sizeof(*chain));
   2438    chain->n = n;
   2439    chain->proto = proto;
   2440    if (proto == (uint16_t)ETH_P_IP) {
   2441        chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
   2442        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
   2443    } else {
   2444        chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
   2445        chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
   2446    }
   2447    chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
   2448                                      virtio_net_rsc_purge, chain);
   2449    memset(&chain->stat, 0, sizeof(chain->stat));
   2450
   2451    QTAILQ_INIT(&chain->buffers);
   2452    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
   2453
   2454    return chain;
   2455}
   2456
   2457static ssize_t virtio_net_rsc_receive(NetClientState *nc,
   2458                                      const uint8_t *buf,
   2459                                      size_t size)
   2460{
   2461    uint16_t proto;
   2462    VirtioNetRscChain *chain;
   2463    struct eth_header *eth;
   2464    VirtIONet *n;
   2465
   2466    n = qemu_get_nic_opaque(nc);
   2467    if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
   2468        return virtio_net_do_receive(nc, buf, size);
   2469    }
   2470
   2471    eth = (struct eth_header *)(buf + n->guest_hdr_len);
   2472    proto = htons(eth->h_proto);
   2473
   2474    chain = virtio_net_rsc_lookup_chain(n, nc, proto);
   2475    if (chain) {
   2476        chain->stat.received++;
   2477        if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
   2478            return virtio_net_rsc_receive4(chain, nc, buf, size);
   2479        } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
   2480            return virtio_net_rsc_receive6(chain, nc, buf, size);
   2481        }
   2482    }
   2483    return virtio_net_do_receive(nc, buf, size);
   2484}
   2485
   2486static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
   2487                                  size_t size)
   2488{
   2489    VirtIONet *n = qemu_get_nic_opaque(nc);
   2490    if ((n->rsc4_enabled || n->rsc6_enabled)) {
   2491        return virtio_net_rsc_receive(nc, buf, size);
   2492    } else {
   2493        return virtio_net_do_receive(nc, buf, size);
   2494    }
   2495}
   2496
   2497static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
   2498
   2499static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
   2500{
   2501    VirtIONet *n = qemu_get_nic_opaque(nc);
   2502    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
   2503    VirtIODevice *vdev = VIRTIO_DEVICE(n);
   2504
   2505    virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
   2506    virtio_notify(vdev, q->tx_vq);
   2507
   2508    g_free(q->async_tx.elem);
   2509    q->async_tx.elem = NULL;
   2510
   2511    virtio_queue_set_notification(q->tx_vq, 1);
   2512    virtio_net_flush_tx(q);
   2513}
   2514
   2515/* TX */
   2516static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
   2517{
   2518    VirtIONet *n = q->n;
   2519    VirtIODevice *vdev = VIRTIO_DEVICE(n);
   2520    VirtQueueElement *elem;
   2521    int32_t num_packets = 0;
   2522    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
   2523    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
   2524        return num_packets;
   2525    }
   2526
   2527    if (q->async_tx.elem) {
   2528        virtio_queue_set_notification(q->tx_vq, 0);
   2529        return num_packets;
   2530    }
   2531
   2532    for (;;) {
   2533        ssize_t ret;
   2534        unsigned int out_num;
   2535        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
   2536        struct virtio_net_hdr_mrg_rxbuf mhdr;
   2537
   2538        elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
   2539        if (!elem) {
   2540            break;
   2541        }
   2542
   2543        out_num = elem->out_num;
   2544        out_sg = elem->out_sg;
   2545        if (out_num < 1) {
   2546            virtio_error(vdev, "virtio-net header not in first element");
   2547            virtqueue_detach_element(q->tx_vq, elem, 0);
   2548            g_free(elem);
   2549            return -EINVAL;
   2550        }
   2551
   2552        if (n->has_vnet_hdr) {
   2553            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
   2554                n->guest_hdr_len) {
   2555                virtio_error(vdev, "virtio-net header incorrect");
   2556                virtqueue_detach_element(q->tx_vq, elem, 0);
   2557                g_free(elem);
   2558                return -EINVAL;
   2559            }
   2560            if (n->needs_vnet_hdr_swap) {
   2561                virtio_net_hdr_swap(vdev, (void *) &mhdr);
   2562                sg2[0].iov_base = &mhdr;
   2563                sg2[0].iov_len = n->guest_hdr_len;
   2564                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
   2565                                   out_sg, out_num,
   2566                                   n->guest_hdr_len, -1);
   2567                if (out_num == VIRTQUEUE_MAX_SIZE) {
   2568                    goto drop;
   2569                }
   2570                out_num += 1;
   2571                out_sg = sg2;
   2572            }
   2573        }
   2574        /*
   2575         * If host wants to see the guest header as is, we can
   2576         * pass it on unchanged. Otherwise, copy just the parts
   2577         * that host is interested in.
   2578         */
   2579        assert(n->host_hdr_len <= n->guest_hdr_len);
   2580        if (n->host_hdr_len != n->guest_hdr_len) {
   2581            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
   2582                                       out_sg, out_num,
   2583                                       0, n->host_hdr_len);
   2584            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
   2585                             out_sg, out_num,
   2586                             n->guest_hdr_len, -1);
   2587            out_num = sg_num;
   2588            out_sg = sg;
   2589        }
   2590
   2591        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
   2592                                      out_sg, out_num, virtio_net_tx_complete);
   2593        if (ret == 0) {
   2594            virtio_queue_set_notification(q->tx_vq, 0);
   2595            q->async_tx.elem = elem;
   2596            return -EBUSY;
   2597        }
   2598
   2599drop:
   2600        virtqueue_push(q->tx_vq, elem, 0);
   2601        virtio_notify(vdev, q->tx_vq);
   2602        g_free(elem);
   2603
   2604        if (++num_packets >= n->tx_burst) {
   2605            break;
   2606        }
   2607    }
   2608    return num_packets;
   2609}
   2610
   2611static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
   2612{
   2613    VirtIONet *n = VIRTIO_NET(vdev);
   2614    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
   2615
   2616    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
   2617        virtio_net_drop_tx_queue_data(vdev, vq);
   2618        return;
   2619    }
   2620
   2621    /* This happens when device was stopped but VCPU wasn't. */
   2622    if (!vdev->vm_running) {
   2623        q->tx_waiting = 1;
   2624        return;
   2625    }
   2626
   2627    if (q->tx_waiting) {
   2628        virtio_queue_set_notification(vq, 1);
   2629        timer_del(q->tx_timer);
   2630        q->tx_waiting = 0;
   2631        if (virtio_net_flush_tx(q) == -EINVAL) {
   2632            return;
   2633        }
   2634    } else {
   2635        timer_mod(q->tx_timer,
   2636                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
   2637        q->tx_waiting = 1;
   2638        virtio_queue_set_notification(vq, 0);
   2639    }
   2640}
   2641
   2642static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
   2643{
   2644    VirtIONet *n = VIRTIO_NET(vdev);
   2645    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
   2646
   2647    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
   2648        virtio_net_drop_tx_queue_data(vdev, vq);
   2649        return;
   2650    }
   2651
   2652    if (unlikely(q->tx_waiting)) {
   2653        return;
   2654    }
   2655    q->tx_waiting = 1;
   2656    /* This happens when device was stopped but VCPU wasn't. */
   2657    if (!vdev->vm_running) {
   2658        return;
   2659    }
   2660    virtio_queue_set_notification(vq, 0);
   2661    qemu_bh_schedule(q->tx_bh);
   2662}
   2663
   2664static void virtio_net_tx_timer(void *opaque)
   2665{
   2666    VirtIONetQueue *q = opaque;
   2667    VirtIONet *n = q->n;
   2668    VirtIODevice *vdev = VIRTIO_DEVICE(n);
   2669    /* This happens when device was stopped but BH wasn't. */
   2670    if (!vdev->vm_running) {
   2671        /* Make sure tx waiting is set, so we'll run when restarted. */
   2672        assert(q->tx_waiting);
   2673        return;
   2674    }
   2675
   2676    q->tx_waiting = 0;
   2677
   2678    /* Just in case the driver is not ready on more */
   2679    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
   2680        return;
   2681    }
   2682
   2683    virtio_queue_set_notification(q->tx_vq, 1);
   2684    virtio_net_flush_tx(q);
   2685}
   2686
   2687static void virtio_net_tx_bh(void *opaque)
   2688{
   2689    VirtIONetQueue *q = opaque;
   2690    VirtIONet *n = q->n;
   2691    VirtIODevice *vdev = VIRTIO_DEVICE(n);
   2692    int32_t ret;
   2693
   2694    /* This happens when device was stopped but BH wasn't. */
   2695    if (!vdev->vm_running) {
   2696        /* Make sure tx waiting is set, so we'll run when restarted. */
   2697        assert(q->tx_waiting);
   2698        return;
   2699    }
   2700
   2701    q->tx_waiting = 0;
   2702
   2703    /* Just in case the driver is not ready on more */
   2704    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
   2705        return;
   2706    }
   2707
   2708    ret = virtio_net_flush_tx(q);
   2709    if (ret == -EBUSY || ret == -EINVAL) {
   2710        return; /* Notification re-enable handled by tx_complete or device
   2711                 * broken */
   2712    }
   2713
   2714    /* If we flush a full burst of packets, assume there are
   2715     * more coming and immediately reschedule */
   2716    if (ret >= n->tx_burst) {
   2717        qemu_bh_schedule(q->tx_bh);
   2718        q->tx_waiting = 1;
   2719        return;
   2720    }
   2721
   2722    /* If less than a full burst, re-enable notification and flush
   2723     * anything that may have come in while we weren't looking.  If
   2724     * we find something, assume the guest is still active and reschedule */
   2725    virtio_queue_set_notification(q->tx_vq, 1);
   2726    ret = virtio_net_flush_tx(q);
   2727    if (ret == -EINVAL) {
   2728        return;
   2729    } else if (ret > 0) {
   2730        virtio_queue_set_notification(q->tx_vq, 0);
   2731        qemu_bh_schedule(q->tx_bh);
   2732        q->tx_waiting = 1;
   2733    }
   2734}
   2735
   2736static void virtio_net_add_queue(VirtIONet *n, int index)
   2737{
   2738    VirtIODevice *vdev = VIRTIO_DEVICE(n);
   2739
   2740    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
   2741                                           virtio_net_handle_rx);
   2742
   2743    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
   2744        n->vqs[index].tx_vq =
   2745            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
   2746                             virtio_net_handle_tx_timer);
   2747        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
   2748                                              virtio_net_tx_timer,
   2749                                              &n->vqs[index]);
   2750    } else {
   2751        n->vqs[index].tx_vq =
   2752            virtio_add_queue(vdev, n->net_conf.tx_queue_size,
   2753                             virtio_net_handle_tx_bh);
   2754        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
   2755    }
   2756
   2757    n->vqs[index].tx_waiting = 0;
   2758    n->vqs[index].n = n;
   2759}
   2760
   2761static void virtio_net_del_queue(VirtIONet *n, int index)
   2762{
   2763    VirtIODevice *vdev = VIRTIO_DEVICE(n);
   2764    VirtIONetQueue *q = &n->vqs[index];
   2765    NetClientState *nc = qemu_get_subqueue(n->nic, index);
   2766
   2767    qemu_purge_queued_packets(nc);
   2768
   2769    virtio_del_queue(vdev, index * 2);
   2770    if (q->tx_timer) {
   2771        timer_free(q->tx_timer);
   2772        q->tx_timer = NULL;
   2773    } else {
   2774        qemu_bh_delete(q->tx_bh);
   2775        q->tx_bh = NULL;
   2776    }
   2777    q->tx_waiting = 0;
   2778    virtio_del_queue(vdev, index * 2 + 1);
   2779}
   2780
   2781static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
   2782{
   2783    VirtIODevice *vdev = VIRTIO_DEVICE(n);
   2784    int old_num_queues = virtio_get_num_queues(vdev);
   2785    int new_num_queues = new_max_queues * 2 + 1;
   2786    int i;
   2787
   2788    assert(old_num_queues >= 3);
   2789    assert(old_num_queues % 2 == 1);
   2790
   2791    if (old_num_queues == new_num_queues) {
   2792        return;
   2793    }
   2794
   2795    /*
   2796     * We always need to remove and add ctrl vq if
   2797     * old_num_queues != new_num_queues. Remove ctrl_vq first,
   2798     * and then we only enter one of the following two loops.
   2799     */
   2800    virtio_del_queue(vdev, old_num_queues - 1);
   2801
   2802    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
   2803        /* new_num_queues < old_num_queues */
   2804        virtio_net_del_queue(n, i / 2);
   2805    }
   2806
   2807    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
   2808        /* new_num_queues > old_num_queues */
   2809        virtio_net_add_queue(n, i / 2);
   2810    }
   2811
   2812    /* add ctrl_vq last */
   2813    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
   2814}
   2815
   2816static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
   2817{
   2818    int max = multiqueue ? n->max_queues : 1;
   2819
   2820    n->multiqueue = multiqueue;
   2821    virtio_net_change_num_queues(n, max);
   2822
   2823    virtio_net_set_queues(n);
   2824}
   2825
   2826static int virtio_net_post_load_device(void *opaque, int version_id)
   2827{
   2828    VirtIONet *n = opaque;
   2829    VirtIODevice *vdev = VIRTIO_DEVICE(n);
   2830    int i, link_down;
   2831
   2832    trace_virtio_net_post_load_device();
   2833    virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
   2834                               virtio_vdev_has_feature(vdev,
   2835                                                       VIRTIO_F_VERSION_1),
   2836                               virtio_vdev_has_feature(vdev,
   2837                                                       VIRTIO_NET_F_HASH_REPORT));
   2838
   2839    /* MAC_TABLE_ENTRIES may be different from the saved image */
   2840    if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
   2841        n->mac_table.in_use = 0;
   2842    }
   2843
   2844    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
   2845        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
   2846    }
   2847
   2848    /*
   2849     * curr_guest_offloads will be later overwritten by the
   2850     * virtio_set_features_nocheck call done from the virtio_load.
   2851     * Here we make sure it is preserved and restored accordingly
   2852     * in the virtio_net_post_load_virtio callback.
   2853     */
   2854    n->saved_guest_offloads = n->curr_guest_offloads;
   2855
   2856    virtio_net_set_queues(n);
   2857
   2858    /* Find the first multicast entry in the saved MAC filter */
   2859    for (i = 0; i < n->mac_table.in_use; i++) {
   2860        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
   2861            break;
   2862        }
   2863    }
   2864    n->mac_table.first_multi = i;
   2865
   2866    /* nc.link_down can't be migrated, so infer link_down according
   2867     * to link status bit in n->status */
   2868    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
   2869    for (i = 0; i < n->max_queues; i++) {
   2870        qemu_get_subqueue(n->nic, i)->link_down = link_down;
   2871    }
   2872
   2873    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
   2874        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
   2875        qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
   2876                                  QEMU_CLOCK_VIRTUAL,
   2877                                  virtio_net_announce_timer, n);
   2878        if (n->announce_timer.round) {
   2879            timer_mod(n->announce_timer.tm,
   2880                      qemu_clock_get_ms(n->announce_timer.type));
   2881        } else {
   2882            qemu_announce_timer_del(&n->announce_timer, false);
   2883        }
   2884    }
   2885
   2886    if (n->rss_data.enabled) {
   2887        n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
   2888        if (!n->rss_data.populate_hash) {
   2889            if (!virtio_net_attach_epbf_rss(n)) {
   2890                if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
   2891                    warn_report("Can't post-load eBPF RSS for vhost");
   2892                } else {
   2893                    warn_report("Can't post-load eBPF RSS - "
   2894                                "fallback to software RSS");
   2895                    n->rss_data.enabled_software_rss = true;
   2896                }
   2897            }
   2898        }
   2899
   2900        trace_virtio_net_rss_enable(n->rss_data.hash_types,
   2901                                    n->rss_data.indirections_len,
   2902                                    sizeof(n->rss_data.key));
   2903    } else {
   2904        trace_virtio_net_rss_disable();
   2905    }
   2906    return 0;
   2907}
   2908
   2909static int virtio_net_post_load_virtio(VirtIODevice *vdev)
   2910{
   2911    VirtIONet *n = VIRTIO_NET(vdev);
   2912    /*
   2913     * The actual needed state is now in saved_guest_offloads,
   2914     * see virtio_net_post_load_device for detail.
   2915     * Restore it back and apply the desired offloads.
   2916     */
   2917    n->curr_guest_offloads = n->saved_guest_offloads;
   2918    if (peer_has_vnet_hdr(n)) {
   2919        virtio_net_apply_guest_offloads(n);
   2920    }
   2921
   2922    return 0;
   2923}
   2924
   2925/* tx_waiting field of a VirtIONetQueue */
   2926static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
   2927    .name = "virtio-net-queue-tx_waiting",
   2928    .fields = (VMStateField[]) {
   2929        VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
   2930        VMSTATE_END_OF_LIST()
   2931   },
   2932};
   2933
   2934static bool max_queues_gt_1(void *opaque, int version_id)
   2935{
   2936    return VIRTIO_NET(opaque)->max_queues > 1;
   2937}
   2938
   2939static bool has_ctrl_guest_offloads(void *opaque, int version_id)
   2940{
   2941    return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
   2942                                   VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
   2943}
   2944
   2945static bool mac_table_fits(void *opaque, int version_id)
   2946{
   2947    return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
   2948}
   2949
   2950static bool mac_table_doesnt_fit(void *opaque, int version_id)
   2951{
   2952    return !mac_table_fits(opaque, version_id);
   2953}
   2954
   2955/* This temporary type is shared by all the WITH_TMP methods
   2956 * although only some fields are used by each.
   2957 */
   2958struct VirtIONetMigTmp {
   2959    VirtIONet      *parent;
   2960    VirtIONetQueue *vqs_1;
   2961    uint16_t        curr_queues_1;
   2962    uint8_t         has_ufo;
   2963    uint32_t        has_vnet_hdr;
   2964};
   2965
   2966/* The 2nd and subsequent tx_waiting flags are loaded later than
   2967 * the 1st entry in the queues and only if there's more than one
   2968 * entry.  We use the tmp mechanism to calculate a temporary
   2969 * pointer and count and also validate the count.
   2970 */
   2971
   2972static int virtio_net_tx_waiting_pre_save(void *opaque)
   2973{
   2974    struct VirtIONetMigTmp *tmp = opaque;
   2975
   2976    tmp->vqs_1 = tmp->parent->vqs + 1;
   2977    tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
   2978    if (tmp->parent->curr_queues == 0) {
   2979        tmp->curr_queues_1 = 0;
   2980    }
   2981
   2982    return 0;
   2983}
   2984
   2985static int virtio_net_tx_waiting_pre_load(void *opaque)
   2986{
   2987    struct VirtIONetMigTmp *tmp = opaque;
   2988
   2989    /* Reuse the pointer setup from save */
   2990    virtio_net_tx_waiting_pre_save(opaque);
   2991
   2992    if (tmp->parent->curr_queues > tmp->parent->max_queues) {
   2993        error_report("virtio-net: curr_queues %x > max_queues %x",
   2994            tmp->parent->curr_queues, tmp->parent->max_queues);
   2995
   2996        return -EINVAL;
   2997    }
   2998
   2999    return 0; /* all good */
   3000}
   3001
   3002static const VMStateDescription vmstate_virtio_net_tx_waiting = {
   3003    .name      = "virtio-net-tx_waiting",
   3004    .pre_load  = virtio_net_tx_waiting_pre_load,
   3005    .pre_save  = virtio_net_tx_waiting_pre_save,
   3006    .fields    = (VMStateField[]) {
   3007        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
   3008                                     curr_queues_1,
   3009                                     vmstate_virtio_net_queue_tx_waiting,
   3010                                     struct VirtIONetQueue),
   3011        VMSTATE_END_OF_LIST()
   3012    },
   3013};
   3014
   3015/* the 'has_ufo' flag is just tested; if the incoming stream has the
   3016 * flag set we need to check that we have it
   3017 */
   3018static int virtio_net_ufo_post_load(void *opaque, int version_id)
   3019{
   3020    struct VirtIONetMigTmp *tmp = opaque;
   3021
   3022    if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
   3023        error_report("virtio-net: saved image requires TUN_F_UFO support");
   3024        return -EINVAL;
   3025    }
   3026
   3027    return 0;
   3028}
   3029
   3030static int virtio_net_ufo_pre_save(void *opaque)
   3031{
   3032    struct VirtIONetMigTmp *tmp = opaque;
   3033
   3034    tmp->has_ufo = tmp->parent->has_ufo;
   3035
   3036    return 0;
   3037}
   3038
   3039static const VMStateDescription vmstate_virtio_net_has_ufo = {
   3040    .name      = "virtio-net-ufo",
   3041    .post_load = virtio_net_ufo_post_load,
   3042    .pre_save  = virtio_net_ufo_pre_save,
   3043    .fields    = (VMStateField[]) {
   3044        VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
   3045        VMSTATE_END_OF_LIST()
   3046    },
   3047};
   3048
   3049/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
   3050 * flag set we need to check that we have it
   3051 */
   3052static int virtio_net_vnet_post_load(void *opaque, int version_id)
   3053{
   3054    struct VirtIONetMigTmp *tmp = opaque;
   3055
   3056    if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
   3057        error_report("virtio-net: saved image requires vnet_hdr=on");
   3058        return -EINVAL;
   3059    }
   3060
   3061    return 0;
   3062}
   3063
   3064static int virtio_net_vnet_pre_save(void *opaque)
   3065{
   3066    struct VirtIONetMigTmp *tmp = opaque;
   3067
   3068    tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
   3069
   3070    return 0;
   3071}
   3072
   3073static const VMStateDescription vmstate_virtio_net_has_vnet = {
   3074    .name      = "virtio-net-vnet",
   3075    .post_load = virtio_net_vnet_post_load,
   3076    .pre_save  = virtio_net_vnet_pre_save,
   3077    .fields    = (VMStateField[]) {
   3078        VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
   3079        VMSTATE_END_OF_LIST()
   3080    },
   3081};
   3082
   3083static bool virtio_net_rss_needed(void *opaque)
   3084{
   3085    return VIRTIO_NET(opaque)->rss_data.enabled;
   3086}
   3087
   3088static const VMStateDescription vmstate_virtio_net_rss = {
   3089    .name      = "virtio-net-device/rss",
   3090    .version_id = 1,
   3091    .minimum_version_id = 1,
   3092    .needed = virtio_net_rss_needed,
   3093    .fields = (VMStateField[]) {
   3094        VMSTATE_BOOL(rss_data.enabled, VirtIONet),
   3095        VMSTATE_BOOL(rss_data.redirect, VirtIONet),
   3096        VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
   3097        VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
   3098        VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
   3099        VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
   3100        VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
   3101                            VIRTIO_NET_RSS_MAX_KEY_SIZE),
   3102        VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
   3103                                    rss_data.indirections_len, 0,
   3104                                    vmstate_info_uint16, uint16_t),
   3105        VMSTATE_END_OF_LIST()
   3106    },
   3107};
   3108
   3109static const VMStateDescription vmstate_virtio_net_device = {
   3110    .name = "virtio-net-device",
   3111    .version_id = VIRTIO_NET_VM_VERSION,
   3112    .minimum_version_id = VIRTIO_NET_VM_VERSION,
   3113    .post_load = virtio_net_post_load_device,
   3114    .fields = (VMStateField[]) {
   3115        VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
   3116        VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
   3117                               vmstate_virtio_net_queue_tx_waiting,
   3118                               VirtIONetQueue),
   3119        VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
   3120        VMSTATE_UINT16(status, VirtIONet),
   3121        VMSTATE_UINT8(promisc, VirtIONet),
   3122        VMSTATE_UINT8(allmulti, VirtIONet),
   3123        VMSTATE_UINT32(mac_table.in_use, VirtIONet),
   3124
   3125        /* Guarded pair: If it fits we load it, else we throw it away
   3126         * - can happen if source has a larger MAC table.; post-load
   3127         *  sets flags in this case.
   3128         */
   3129        VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
   3130                                0, mac_table_fits, mac_table.in_use,
   3131                                 ETH_ALEN),
   3132        VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
   3133                                     mac_table.in_use, ETH_ALEN),
   3134
   3135        /* Note: This is an array of uint32's that's always been saved as a
   3136         * buffer; hold onto your endiannesses; it's actually used as a bitmap
   3137         * but based on the uint.
   3138         */
   3139        VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
   3140        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
   3141                         vmstate_virtio_net_has_vnet),
   3142        VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
   3143        VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
   3144        VMSTATE_UINT8(alluni, VirtIONet),
   3145        VMSTATE_UINT8(nomulti, VirtIONet),
   3146        VMSTATE_UINT8(nouni, VirtIONet),
   3147        VMSTATE_UINT8(nobcast, VirtIONet),
   3148        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
   3149                         vmstate_virtio_net_has_ufo),
   3150        VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
   3151                            vmstate_info_uint16_equal, uint16_t),
   3152        VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
   3153        VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
   3154                         vmstate_virtio_net_tx_waiting),
   3155        VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
   3156                            has_ctrl_guest_offloads),
   3157        VMSTATE_END_OF_LIST()
   3158   },
   3159    .subsections = (const VMStateDescription * []) {
   3160        &vmstate_virtio_net_rss,
   3161        NULL
   3162    }
   3163};
   3164
   3165static NetClientInfo net_virtio_info = {
   3166    .type = NET_CLIENT_DRIVER_NIC,
   3167    .size = sizeof(NICState),
   3168    .can_receive = virtio_net_can_receive,
   3169    .receive = virtio_net_receive,
   3170    .link_status_changed = virtio_net_set_link_status,
   3171    .query_rx_filter = virtio_net_query_rxfilter,
   3172    .announce = virtio_net_announce,
   3173};
   3174
   3175static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
   3176{
   3177    VirtIONet *n = VIRTIO_NET(vdev);
   3178    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
   3179    assert(n->vhost_started);
   3180    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
   3181}
   3182
   3183static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
   3184                                           bool mask)
   3185{
   3186    VirtIONet *n = VIRTIO_NET(vdev);
   3187    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
   3188    assert(n->vhost_started);
   3189    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
   3190                             vdev, idx, mask);
   3191}
   3192
   3193static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
   3194{
   3195    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
   3196
   3197    n->config_size = virtio_feature_get_config_size(feature_sizes,
   3198                                                    host_features);
   3199}
   3200
   3201void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
   3202                                   const char *type)
   3203{
   3204    /*
   3205     * The name can be NULL, the netclient name will be type.x.
   3206     */
   3207    assert(type != NULL);
   3208
   3209    g_free(n->netclient_name);
   3210    g_free(n->netclient_type);
   3211    n->netclient_name = g_strdup(name);
   3212    n->netclient_type = g_strdup(type);
   3213}
   3214
   3215static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
   3216{
   3217    HotplugHandler *hotplug_ctrl;
   3218    PCIDevice *pci_dev;
   3219    Error *err = NULL;
   3220
   3221    hotplug_ctrl = qdev_get_hotplug_handler(dev);
   3222    if (hotplug_ctrl) {
   3223        pci_dev = PCI_DEVICE(dev);
   3224        pci_dev->partially_hotplugged = true;
   3225        hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
   3226        if (err) {
   3227            error_report_err(err);
   3228            return false;
   3229        }
   3230    } else {
   3231        return false;
   3232    }
   3233    return true;
   3234}
   3235
   3236static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
   3237                                    Error **errp)
   3238{
   3239    Error *err = NULL;
   3240    HotplugHandler *hotplug_ctrl;
   3241    PCIDevice *pdev = PCI_DEVICE(dev);
   3242    BusState *primary_bus;
   3243
   3244    if (!pdev->partially_hotplugged) {
   3245        return true;
   3246    }
   3247    primary_bus = dev->parent_bus;
   3248    if (!primary_bus) {
   3249        error_setg(errp, "virtio_net: couldn't find primary bus");
   3250        return false;
   3251    }
   3252    qdev_set_parent_bus(dev, primary_bus, &error_abort);
   3253    qatomic_set(&n->failover_primary_hidden, false);
   3254    hotplug_ctrl = qdev_get_hotplug_handler(dev);
   3255    if (hotplug_ctrl) {
   3256        hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
   3257        if (err) {
   3258            goto out;
   3259        }
   3260        hotplug_handler_plug(hotplug_ctrl, dev, &err);
   3261    }
   3262    pdev->partially_hotplugged = false;
   3263
   3264out:
   3265    error_propagate(errp, err);
   3266    return !err;
   3267}
   3268
   3269static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
   3270{
   3271    bool should_be_hidden;
   3272    Error *err = NULL;
   3273    DeviceState *dev = failover_find_primary_device(n);
   3274
   3275    if (!dev) {
   3276        return;
   3277    }
   3278
   3279    should_be_hidden = qatomic_read(&n->failover_primary_hidden);
   3280
   3281    if (migration_in_setup(s) && !should_be_hidden) {
   3282        if (failover_unplug_primary(n, dev)) {
   3283            vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
   3284            qapi_event_send_unplug_primary(dev->id);
   3285            qatomic_set(&n->failover_primary_hidden, true);
   3286        } else {
   3287            warn_report("couldn't unplug primary device");
   3288        }
   3289    } else if (migration_has_failed(s)) {
   3290        /* We already unplugged the device let's plug it back */
   3291        if (!failover_replug_primary(n, dev, &err)) {
   3292            if (err) {
   3293                error_report_err(err);
   3294            }
   3295        }
   3296    }
   3297}
   3298
   3299static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
   3300{
   3301    MigrationState *s = data;
   3302    VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
   3303    virtio_net_handle_migration_primary(n, s);
   3304}
   3305
   3306static bool failover_hide_primary_device(DeviceListener *listener,
   3307                                         QemuOpts *device_opts)
   3308{
   3309    VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
   3310    const char *standby_id;
   3311
   3312    if (!device_opts) {
   3313        return false;
   3314    }
   3315    standby_id = qemu_opt_get(device_opts, "failover_pair_id");
   3316    if (g_strcmp0(standby_id, n->netclient_name) != 0) {
   3317        return false;
   3318    }
   3319
   3320    /* failover_primary_hidden is set during feature negotiation */
   3321    return qatomic_read(&n->failover_primary_hidden);
   3322}
   3323
   3324static void virtio_net_device_realize(DeviceState *dev, Error **errp)
   3325{
   3326    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
   3327    VirtIONet *n = VIRTIO_NET(dev);
   3328    NetClientState *nc;
   3329    int i;
   3330
   3331    if (n->net_conf.mtu) {
   3332        n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
   3333    }
   3334
   3335    if (n->net_conf.duplex_str) {
   3336        if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
   3337            n->net_conf.duplex = DUPLEX_HALF;
   3338        } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
   3339            n->net_conf.duplex = DUPLEX_FULL;
   3340        } else {
   3341            error_setg(errp, "'duplex' must be 'half' or 'full'");
   3342            return;
   3343        }
   3344        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
   3345    } else {
   3346        n->net_conf.duplex = DUPLEX_UNKNOWN;
   3347    }
   3348
   3349    if (n->net_conf.speed < SPEED_UNKNOWN) {
   3350        error_setg(errp, "'speed' must be between 0 and INT_MAX");
   3351        return;
   3352    }
   3353    if (n->net_conf.speed >= 0) {
   3354        n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
   3355    }
   3356
   3357    if (n->failover) {
   3358        n->primary_listener.hide_device = failover_hide_primary_device;
   3359        qatomic_set(&n->failover_primary_hidden, true);
   3360        device_listener_register(&n->primary_listener);
   3361        n->migration_state.notify = virtio_net_migration_state_notifier;
   3362        add_migration_state_change_notifier(&n->migration_state);
   3363        n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
   3364    }
   3365
   3366    virtio_net_set_config_size(n, n->host_features);
   3367    virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
   3368
   3369    /*
   3370     * We set a lower limit on RX queue size to what it always was.
   3371     * Guests that want a smaller ring can always resize it without
   3372     * help from us (using virtio 1 and up).
   3373     */
   3374    if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
   3375        n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
   3376        !is_power_of_2(n->net_conf.rx_queue_size)) {
   3377        error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
   3378                   "must be a power of 2 between %d and %d.",
   3379                   n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
   3380                   VIRTQUEUE_MAX_SIZE);
   3381        virtio_cleanup(vdev);
   3382        return;
   3383    }
   3384
   3385    if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
   3386        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
   3387        !is_power_of_2(n->net_conf.tx_queue_size)) {
   3388        error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
   3389                   "must be a power of 2 between %d and %d",
   3390                   n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
   3391                   VIRTQUEUE_MAX_SIZE);
   3392        virtio_cleanup(vdev);
   3393        return;
   3394    }
   3395
   3396    n->max_queues = MAX(n->nic_conf.peers.queues, 1);
   3397    if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
   3398        error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
   3399                   "must be a positive integer less than %d.",
   3400                   n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
   3401        virtio_cleanup(vdev);
   3402        return;
   3403    }
   3404    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
   3405    n->curr_queues = 1;
   3406    n->tx_timeout = n->net_conf.txtimer;
   3407
   3408    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
   3409                       && strcmp(n->net_conf.tx, "bh")) {
   3410        warn_report("virtio-net: "
   3411                    "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
   3412                    n->net_conf.tx);
   3413        error_printf("Defaulting to \"bh\"");
   3414    }
   3415
   3416    n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
   3417                                    n->net_conf.tx_queue_size);
   3418
   3419    for (i = 0; i < n->max_queues; i++) {
   3420        virtio_net_add_queue(n, i);
   3421    }
   3422
   3423    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
   3424    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
   3425    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
   3426    n->status = VIRTIO_NET_S_LINK_UP;
   3427    qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
   3428                              QEMU_CLOCK_VIRTUAL,
   3429                              virtio_net_announce_timer, n);
   3430    n->announce_timer.round = 0;
   3431
   3432    if (n->netclient_type) {
   3433        /*
   3434         * Happen when virtio_net_set_netclient_name has been called.
   3435         */
   3436        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
   3437                              n->netclient_type, n->netclient_name, n);
   3438    } else {
   3439        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
   3440                              object_get_typename(OBJECT(dev)), dev->id, n);
   3441    }
   3442
   3443    for (i = 0; i < n->max_queues; i++) {
   3444        n->nic->ncs[i].do_not_pad = true;
   3445    }
   3446
   3447    peer_test_vnet_hdr(n);
   3448    if (peer_has_vnet_hdr(n)) {
   3449        for (i = 0; i < n->max_queues; i++) {
   3450            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
   3451        }
   3452        n->host_hdr_len = sizeof(struct virtio_net_hdr);
   3453    } else {
   3454        n->host_hdr_len = 0;
   3455    }
   3456
   3457    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
   3458
   3459    n->vqs[0].tx_waiting = 0;
   3460    n->tx_burst = n->net_conf.txburst;
   3461    virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
   3462    n->promisc = 1; /* for compatibility */
   3463
   3464    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
   3465
   3466    n->vlans = g_malloc0(MAX_VLAN >> 3);
   3467
   3468    nc = qemu_get_queue(n->nic);
   3469    nc->rxfilter_notify_enabled = 1;
   3470
   3471   if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
   3472        struct virtio_net_config netcfg = {};
   3473        memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
   3474        vhost_net_set_config(get_vhost_net(nc->peer),
   3475            (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
   3476    }
   3477    QTAILQ_INIT(&n->rsc_chains);
   3478    n->qdev = dev;
   3479
   3480    net_rx_pkt_init(&n->rx_pkt, false);
   3481
   3482    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
   3483        virtio_net_load_ebpf(n);
   3484    }
   3485}
   3486
   3487static void virtio_net_device_unrealize(DeviceState *dev)
   3488{
   3489    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
   3490    VirtIONet *n = VIRTIO_NET(dev);
   3491    int i, max_queues;
   3492
   3493    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
   3494        virtio_net_unload_ebpf(n);
   3495    }
   3496
   3497    /* This will stop vhost backend if appropriate. */
   3498    virtio_net_set_status(vdev, 0);
   3499
   3500    g_free(n->netclient_name);
   3501    n->netclient_name = NULL;
   3502    g_free(n->netclient_type);
   3503    n->netclient_type = NULL;
   3504
   3505    g_free(n->mac_table.macs);
   3506    g_free(n->vlans);
   3507
   3508    if (n->failover) {
   3509        device_listener_unregister(&n->primary_listener);
   3510        remove_migration_state_change_notifier(&n->migration_state);
   3511    }
   3512
   3513    max_queues = n->multiqueue ? n->max_queues : 1;
   3514    for (i = 0; i < max_queues; i++) {
   3515        virtio_net_del_queue(n, i);
   3516    }
   3517    /* delete also control vq */
   3518    virtio_del_queue(vdev, max_queues * 2);
   3519    qemu_announce_timer_del(&n->announce_timer, false);
   3520    g_free(n->vqs);
   3521    qemu_del_nic(n->nic);
   3522    virtio_net_rsc_cleanup(n);
   3523    g_free(n->rss_data.indirections_table);
   3524    net_rx_pkt_uninit(n->rx_pkt);
   3525    virtio_cleanup(vdev);
   3526}
   3527
   3528static void virtio_net_instance_init(Object *obj)
   3529{
   3530    VirtIONet *n = VIRTIO_NET(obj);
   3531
   3532    /*
   3533     * The default config_size is sizeof(struct virtio_net_config).
   3534     * Can be overriden with virtio_net_set_config_size.
   3535     */
   3536    n->config_size = sizeof(struct virtio_net_config);
   3537    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
   3538                                  "bootindex", "/ethernet-phy@0",
   3539                                  DEVICE(n));
   3540
   3541    ebpf_rss_init(&n->ebpf_rss);
   3542}
   3543
   3544static int virtio_net_pre_save(void *opaque)
   3545{
   3546    VirtIONet *n = opaque;
   3547
   3548    /* At this point, backend must be stopped, otherwise
   3549     * it might keep writing to memory. */
   3550    assert(!n->vhost_started);
   3551
   3552    return 0;
   3553}
   3554
   3555static bool primary_unplug_pending(void *opaque)
   3556{
   3557    DeviceState *dev = opaque;
   3558    DeviceState *primary;
   3559    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
   3560    VirtIONet *n = VIRTIO_NET(vdev);
   3561
   3562    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
   3563        return false;
   3564    }
   3565    primary = failover_find_primary_device(n);
   3566    return primary ? primary->pending_deleted_event : false;
   3567}
   3568
   3569static bool dev_unplug_pending(void *opaque)
   3570{
   3571    DeviceState *dev = opaque;
   3572    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
   3573
   3574    return vdc->primary_unplug_pending(dev);
   3575}
   3576
   3577static const VMStateDescription vmstate_virtio_net = {
   3578    .name = "virtio-net",
   3579    .minimum_version_id = VIRTIO_NET_VM_VERSION,
   3580    .version_id = VIRTIO_NET_VM_VERSION,
   3581    .fields = (VMStateField[]) {
   3582        VMSTATE_VIRTIO_DEVICE,
   3583        VMSTATE_END_OF_LIST()
   3584    },
   3585    .pre_save = virtio_net_pre_save,
   3586    .dev_unplug_pending = dev_unplug_pending,
   3587};
   3588
   3589static Property virtio_net_properties[] = {
   3590    DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
   3591                    VIRTIO_NET_F_CSUM, true),
   3592    DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
   3593                    VIRTIO_NET_F_GUEST_CSUM, true),
   3594    DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
   3595    DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
   3596                    VIRTIO_NET_F_GUEST_TSO4, true),
   3597    DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
   3598                    VIRTIO_NET_F_GUEST_TSO6, true),
   3599    DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
   3600                    VIRTIO_NET_F_GUEST_ECN, true),
   3601    DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
   3602                    VIRTIO_NET_F_GUEST_UFO, true),
   3603    DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
   3604                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
   3605    DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
   3606                    VIRTIO_NET_F_HOST_TSO4, true),
   3607    DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
   3608                    VIRTIO_NET_F_HOST_TSO6, true),
   3609    DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
   3610                    VIRTIO_NET_F_HOST_ECN, true),
   3611    DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
   3612                    VIRTIO_NET_F_HOST_UFO, true),
   3613    DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
   3614                    VIRTIO_NET_F_MRG_RXBUF, true),
   3615    DEFINE_PROP_BIT64("status", VirtIONet, host_features,
   3616                    VIRTIO_NET_F_STATUS, true),
   3617    DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
   3618                    VIRTIO_NET_F_CTRL_VQ, true),
   3619    DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
   3620                    VIRTIO_NET_F_CTRL_RX, true),
   3621    DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
   3622                    VIRTIO_NET_F_CTRL_VLAN, true),
   3623    DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
   3624                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
   3625    DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
   3626                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
   3627    DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
   3628                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
   3629    DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
   3630    DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
   3631                    VIRTIO_NET_F_RSS, false),
   3632    DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
   3633                    VIRTIO_NET_F_HASH_REPORT, false),
   3634    DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
   3635                    VIRTIO_NET_F_RSC_EXT, false),
   3636    DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
   3637                       VIRTIO_NET_RSC_DEFAULT_INTERVAL),
   3638    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
   3639    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
   3640                       TX_TIMER_INTERVAL),
   3641    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
   3642    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
   3643    DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
   3644                       VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
   3645    DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
   3646                       VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
   3647    DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
   3648    DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
   3649                     true),
   3650    DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
   3651    DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
   3652    DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
   3653    DEFINE_PROP_END_OF_LIST(),
   3654};
   3655
   3656static void virtio_net_class_init(ObjectClass *klass, void *data)
   3657{
   3658    DeviceClass *dc = DEVICE_CLASS(klass);
   3659    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
   3660
   3661    device_class_set_props(dc, virtio_net_properties);
   3662    dc->vmsd = &vmstate_virtio_net;
   3663    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
   3664    vdc->realize = virtio_net_device_realize;
   3665    vdc->unrealize = virtio_net_device_unrealize;
   3666    vdc->get_config = virtio_net_get_config;
   3667    vdc->set_config = virtio_net_set_config;
   3668    vdc->get_features = virtio_net_get_features;
   3669    vdc->set_features = virtio_net_set_features;
   3670    vdc->bad_features = virtio_net_bad_features;
   3671    vdc->reset = virtio_net_reset;
   3672    vdc->set_status = virtio_net_set_status;
   3673    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
   3674    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
   3675    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
   3676    vdc->post_load = virtio_net_post_load_virtio;
   3677    vdc->vmsd = &vmstate_virtio_net_device;
   3678    vdc->primary_unplug_pending = primary_unplug_pending;
   3679}
   3680
   3681static const TypeInfo virtio_net_info = {
   3682    .name = TYPE_VIRTIO_NET,
   3683    .parent = TYPE_VIRTIO_DEVICE,
   3684    .instance_size = sizeof(VirtIONet),
   3685    .instance_init = virtio_net_instance_init,
   3686    .class_init = virtio_net_class_init,
   3687};
   3688
   3689static void virtio_register_types(void)
   3690{
   3691    type_register_static(&virtio_net_info);
   3692}
   3693
   3694type_init(virtio_register_types)