cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

e1000.c (61414B)


      1/*
      2 * QEMU e1000 emulation
      3 *
      4 * Software developer's manual:
      5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
      6 *
      7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
      8 * Copyright (c) 2008 Qumranet
      9 * Based on work done by:
     10 * Copyright (c) 2007 Dan Aloni
     11 * Copyright (c) 2004 Antony T Curtis
     12 *
     13 * This library is free software; you can redistribute it and/or
     14 * modify it under the terms of the GNU Lesser General Public
     15 * License as published by the Free Software Foundation; either
     16 * version 2.1 of the License, or (at your option) any later version.
     17 *
     18 * This library is distributed in the hope that it will be useful,
     19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21 * Lesser General Public License for more details.
     22 *
     23 * You should have received a copy of the GNU Lesser General Public
     24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     25 */
     26
     27
     28#include "qemu/osdep.h"
     29#include "hw/pci/pci.h"
     30#include "hw/qdev-properties.h"
     31#include "migration/vmstate.h"
     32#include "net/eth.h"
     33#include "net/net.h"
     34#include "net/checksum.h"
     35#include "sysemu/sysemu.h"
     36#include "sysemu/dma.h"
     37#include "qemu/iov.h"
     38#include "qemu/module.h"
     39#include "qemu/range.h"
     40
     41#include "e1000x_common.h"
     42#include "trace.h"
     43#include "qom/object.h"
     44
     45static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
     46
     47/* #define E1000_DEBUG */
     48
     49#ifdef E1000_DEBUG
     50enum {
     51    DEBUG_GENERAL,      DEBUG_IO,       DEBUG_MMIO,     DEBUG_INTERRUPT,
     52    DEBUG_RX,           DEBUG_TX,       DEBUG_MDIC,     DEBUG_EEPROM,
     53    DEBUG_UNKNOWN,      DEBUG_TXSUM,    DEBUG_TXERR,    DEBUG_RXERR,
     54    DEBUG_RXFILTER,     DEBUG_PHY,      DEBUG_NOTYET,
     55};
     56#define DBGBIT(x)    (1<<DEBUG_##x)
     57static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
     58
     59#define DBGOUT(what, fmt, ...) do { \
     60    if (debugflags & DBGBIT(what)) \
     61        fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
     62    } while (0)
     63#else
     64#define DBGOUT(what, fmt, ...) do {} while (0)
     65#endif
     66
     67#define IOPORT_SIZE       0x40
     68#define PNPMMIO_SIZE      0x20000
     69#define MIN_BUF_SIZE      60 /* Min. octets in an ethernet frame sans FCS */
     70
     71#define MAXIMUM_ETHERNET_HDR_LEN (14+4)
     72
     73/*
     74 * HW models:
     75 *  E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
     76 *  E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
     77 *  E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
     78 *  Others never tested
     79 */
     80
     81struct E1000State_st {
     82    /*< private >*/
     83    PCIDevice parent_obj;
     84    /*< public >*/
     85
     86    NICState *nic;
     87    NICConf conf;
     88    MemoryRegion mmio;
     89    MemoryRegion io;
     90
     91    uint32_t mac_reg[0x8000];
     92    uint16_t phy_reg[0x20];
     93    uint16_t eeprom_data[64];
     94
     95    uint32_t rxbuf_size;
     96    uint32_t rxbuf_min_shift;
     97    struct e1000_tx {
     98        unsigned char header[256];
     99        unsigned char vlan_header[4];
    100        /* Fields vlan and data must not be reordered or separated. */
    101        unsigned char vlan[4];
    102        unsigned char data[0x10000];
    103        uint16_t size;
    104        unsigned char vlan_needed;
    105        unsigned char sum_needed;
    106        bool cptse;
    107        e1000x_txd_props props;
    108        e1000x_txd_props tso_props;
    109        uint16_t tso_frames;
    110    } tx;
    111
    112    struct {
    113        uint32_t val_in;    /* shifted in from guest driver */
    114        uint16_t bitnum_in;
    115        uint16_t bitnum_out;
    116        uint16_t reading;
    117        uint32_t old_eecd;
    118    } eecd_state;
    119
    120    QEMUTimer *autoneg_timer;
    121
    122    QEMUTimer *mit_timer;      /* Mitigation timer. */
    123    bool mit_timer_on;         /* Mitigation timer is running. */
    124    bool mit_irq_level;        /* Tracks interrupt pin level. */
    125    uint32_t mit_ide;          /* Tracks E1000_TXD_CMD_IDE bit. */
    126
    127    QEMUTimer *flush_queue_timer;
    128
    129/* Compatibility flags for migration to/from qemu 1.3.0 and older */
    130#define E1000_FLAG_AUTONEG_BIT 0
    131#define E1000_FLAG_MIT_BIT 1
    132#define E1000_FLAG_MAC_BIT 2
    133#define E1000_FLAG_TSO_BIT 3
    134#define E1000_FLAG_VET_BIT 4
    135#define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
    136#define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
    137#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
    138#define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
    139#define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
    140
    141    uint32_t compat_flags;
    142    bool received_tx_tso;
    143    bool use_tso_for_migration;
    144    e1000x_txd_props mig_props;
    145};
    146typedef struct E1000State_st E1000State;
    147
    148#define chkflag(x)     (s->compat_flags & E1000_FLAG_##x)
    149
    150struct E1000BaseClass {
    151    PCIDeviceClass parent_class;
    152    uint16_t phy_id2;
    153};
    154typedef struct E1000BaseClass E1000BaseClass;
    155
    156#define TYPE_E1000_BASE "e1000-base"
    157
    158DECLARE_OBJ_CHECKERS(E1000State, E1000BaseClass,
    159                     E1000, TYPE_E1000_BASE)
    160
    161
    162static void
    163e1000_link_up(E1000State *s)
    164{
    165    e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg);
    166
    167    /* E1000_STATUS_LU is tested by e1000_can_receive() */
    168    qemu_flush_queued_packets(qemu_get_queue(s->nic));
    169}
    170
    171static void
    172e1000_autoneg_done(E1000State *s)
    173{
    174    e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg);
    175
    176    /* E1000_STATUS_LU is tested by e1000_can_receive() */
    177    qemu_flush_queued_packets(qemu_get_queue(s->nic));
    178}
    179
    180static bool
    181have_autoneg(E1000State *s)
    182{
    183    return chkflag(AUTONEG) && (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
    184}
    185
    186static void
    187set_phy_ctrl(E1000State *s, int index, uint16_t val)
    188{
    189    /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
    190    s->phy_reg[PHY_CTRL] = val & ~(0x3f |
    191                                   MII_CR_RESET |
    192                                   MII_CR_RESTART_AUTO_NEG);
    193
    194    /*
    195     * QEMU 1.3 does not support link auto-negotiation emulation, so if we
    196     * migrate during auto negotiation, after migration the link will be
    197     * down.
    198     */
    199    if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
    200        e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
    201    }
    202}
    203
    204static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
    205    [PHY_CTRL] = set_phy_ctrl,
    206};
    207
    208enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
    209
    210enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
    211static const char phy_regcap[0x20] = {
    212    [PHY_STATUS]      = PHY_R,     [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
    213    [PHY_ID1]         = PHY_R,     [M88E1000_PHY_SPEC_CTRL]     = PHY_RW,
    214    [PHY_CTRL]        = PHY_RW,    [PHY_1000T_CTRL]             = PHY_RW,
    215    [PHY_LP_ABILITY]  = PHY_R,     [PHY_1000T_STATUS]           = PHY_R,
    216    [PHY_AUTONEG_ADV] = PHY_RW,    [M88E1000_RX_ERR_CNTR]       = PHY_R,
    217    [PHY_ID2]         = PHY_R,     [M88E1000_PHY_SPEC_STATUS]   = PHY_R,
    218    [PHY_AUTONEG_EXP] = PHY_R,
    219};
    220
    221/* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
    222static const uint16_t phy_reg_init[] = {
    223    [PHY_CTRL]   = MII_CR_SPEED_SELECT_MSB |
    224                   MII_CR_FULL_DUPLEX |
    225                   MII_CR_AUTO_NEG_EN,
    226
    227    [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
    228                   MII_SR_LINK_STATUS |   /* link initially up */
    229                   MII_SR_AUTONEG_CAPS |
    230                   /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
    231                   MII_SR_PREAMBLE_SUPPRESS |
    232                   MII_SR_EXTENDED_STATUS |
    233                   MII_SR_10T_HD_CAPS |
    234                   MII_SR_10T_FD_CAPS |
    235                   MII_SR_100X_HD_CAPS |
    236                   MII_SR_100X_FD_CAPS,
    237
    238    [PHY_ID1] = 0x141,
    239    /* [PHY_ID2] configured per DevId, from e1000_reset() */
    240    [PHY_AUTONEG_ADV] = 0xde1,
    241    [PHY_LP_ABILITY] = 0x1e0,
    242    [PHY_1000T_CTRL] = 0x0e00,
    243    [PHY_1000T_STATUS] = 0x3c00,
    244    [M88E1000_PHY_SPEC_CTRL] = 0x360,
    245    [M88E1000_PHY_SPEC_STATUS] = 0xac00,
    246    [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
    247};
    248
    249static const uint32_t mac_reg_init[] = {
    250    [PBA]     = 0x00100030,
    251    [LEDCTL]  = 0x602,
    252    [CTRL]    = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
    253                E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
    254    [STATUS]  = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
    255                E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
    256                E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
    257                E1000_STATUS_LU,
    258    [MANC]    = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
    259                E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
    260                E1000_MANC_RMCP_EN,
    261};
    262
    263/* Helper function, *curr == 0 means the value is not set */
    264static inline void
    265mit_update_delay(uint32_t *curr, uint32_t value)
    266{
    267    if (value && (*curr == 0 || value < *curr)) {
    268        *curr = value;
    269    }
    270}
    271
    272static void
    273set_interrupt_cause(E1000State *s, int index, uint32_t val)
    274{
    275    PCIDevice *d = PCI_DEVICE(s);
    276    uint32_t pending_ints;
    277    uint32_t mit_delay;
    278
    279    s->mac_reg[ICR] = val;
    280
    281    /*
    282     * Make sure ICR and ICS registers have the same value.
    283     * The spec says that the ICS register is write-only.  However in practice,
    284     * on real hardware ICS is readable, and for reads it has the same value as
    285     * ICR (except that ICS does not have the clear on read behaviour of ICR).
    286     *
    287     * The VxWorks PRO/1000 driver uses this behaviour.
    288     */
    289    s->mac_reg[ICS] = val;
    290
    291    pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
    292    if (!s->mit_irq_level && pending_ints) {
    293        /*
    294         * Here we detect a potential raising edge. We postpone raising the
    295         * interrupt line if we are inside the mitigation delay window
    296         * (s->mit_timer_on == 1).
    297         * We provide a partial implementation of interrupt mitigation,
    298         * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
    299         * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
    300         * RADV; relative timers based on TIDV and RDTR are not implemented.
    301         */
    302        if (s->mit_timer_on) {
    303            return;
    304        }
    305        if (chkflag(MIT)) {
    306            /* Compute the next mitigation delay according to pending
    307             * interrupts and the current values of RADV (provided
    308             * RDTR!=0), TADV and ITR.
    309             * Then rearm the timer.
    310             */
    311            mit_delay = 0;
    312            if (s->mit_ide &&
    313                    (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
    314                mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
    315            }
    316            if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
    317                mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
    318            }
    319            mit_update_delay(&mit_delay, s->mac_reg[ITR]);
    320
    321            /*
    322             * According to e1000 SPEC, the Ethernet controller guarantees
    323             * a maximum observable interrupt rate of 7813 interrupts/sec.
    324             * Thus if mit_delay < 500 then the delay should be set to the
    325             * minimum delay possible which is 500.
    326             */
    327            mit_delay = (mit_delay < 500) ? 500 : mit_delay;
    328
    329            s->mit_timer_on = 1;
    330            timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
    331                      mit_delay * 256);
    332            s->mit_ide = 0;
    333        }
    334    }
    335
    336    s->mit_irq_level = (pending_ints != 0);
    337    pci_set_irq(d, s->mit_irq_level);
    338}
    339
    340static void
    341e1000_mit_timer(void *opaque)
    342{
    343    E1000State *s = opaque;
    344
    345    s->mit_timer_on = 0;
    346    /* Call set_interrupt_cause to update the irq level (if necessary). */
    347    set_interrupt_cause(s, 0, s->mac_reg[ICR]);
    348}
    349
    350static void
    351set_ics(E1000State *s, int index, uint32_t val)
    352{
    353    DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
    354        s->mac_reg[IMS]);
    355    set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
    356}
    357
    358static void
    359e1000_autoneg_timer(void *opaque)
    360{
    361    E1000State *s = opaque;
    362    if (!qemu_get_queue(s->nic)->link_down) {
    363        e1000_autoneg_done(s);
    364        set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
    365    }
    366}
    367
    368static bool e1000_vet_init_need(void *opaque)
    369{
    370    E1000State *s = opaque;
    371
    372    return chkflag(VET);
    373}
    374
    375static void e1000_reset(void *opaque)
    376{
    377    E1000State *d = opaque;
    378    E1000BaseClass *edc = E1000_GET_CLASS(d);
    379    uint8_t *macaddr = d->conf.macaddr.a;
    380
    381    timer_del(d->autoneg_timer);
    382    timer_del(d->mit_timer);
    383    timer_del(d->flush_queue_timer);
    384    d->mit_timer_on = 0;
    385    d->mit_irq_level = 0;
    386    d->mit_ide = 0;
    387    memset(d->phy_reg, 0, sizeof d->phy_reg);
    388    memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
    389    d->phy_reg[PHY_ID2] = edc->phy_id2;
    390    memset(d->mac_reg, 0, sizeof d->mac_reg);
    391    memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
    392    d->rxbuf_min_shift = 1;
    393    memset(&d->tx, 0, sizeof d->tx);
    394
    395    if (qemu_get_queue(d->nic)->link_down) {
    396        e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg);
    397    }
    398
    399    e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
    400
    401    if (e1000_vet_init_need(d)) {
    402        d->mac_reg[VET] = ETH_P_VLAN;
    403    }
    404}
    405
    406static void
    407set_ctrl(E1000State *s, int index, uint32_t val)
    408{
    409    /* RST is self clearing */
    410    s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
    411}
    412
    413static void
    414e1000_flush_queue_timer(void *opaque)
    415{
    416    E1000State *s = opaque;
    417
    418    qemu_flush_queued_packets(qemu_get_queue(s->nic));
    419}
    420
    421static void
    422set_rx_control(E1000State *s, int index, uint32_t val)
    423{
    424    s->mac_reg[RCTL] = val;
    425    s->rxbuf_size = e1000x_rxbufsize(val);
    426    s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
    427    DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
    428           s->mac_reg[RCTL]);
    429    timer_mod(s->flush_queue_timer,
    430              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
    431}
    432
    433static void
    434set_mdic(E1000State *s, int index, uint32_t val)
    435{
    436    uint32_t data = val & E1000_MDIC_DATA_MASK;
    437    uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
    438
    439    if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
    440        val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
    441    else if (val & E1000_MDIC_OP_READ) {
    442        DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
    443        if (!(phy_regcap[addr] & PHY_R)) {
    444            DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
    445            val |= E1000_MDIC_ERROR;
    446        } else
    447            val = (val ^ data) | s->phy_reg[addr];
    448    } else if (val & E1000_MDIC_OP_WRITE) {
    449        DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
    450        if (!(phy_regcap[addr] & PHY_W)) {
    451            DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
    452            val |= E1000_MDIC_ERROR;
    453        } else {
    454            if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
    455                phyreg_writeops[addr](s, index, data);
    456            } else {
    457                s->phy_reg[addr] = data;
    458            }
    459        }
    460    }
    461    s->mac_reg[MDIC] = val | E1000_MDIC_READY;
    462
    463    if (val & E1000_MDIC_INT_EN) {
    464        set_ics(s, 0, E1000_ICR_MDAC);
    465    }
    466}
    467
    468static uint32_t
    469get_eecd(E1000State *s, int index)
    470{
    471    uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
    472
    473    DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
    474           s->eecd_state.bitnum_out, s->eecd_state.reading);
    475    if (!s->eecd_state.reading ||
    476        ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
    477          ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
    478        ret |= E1000_EECD_DO;
    479    return ret;
    480}
    481
    482static void
    483set_eecd(E1000State *s, int index, uint32_t val)
    484{
    485    uint32_t oldval = s->eecd_state.old_eecd;
    486
    487    s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
    488            E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
    489    if (!(E1000_EECD_CS & val)) {            /* CS inactive; nothing to do */
    490        return;
    491    }
    492    if (E1000_EECD_CS & (val ^ oldval)) {    /* CS rise edge; reset state */
    493        s->eecd_state.val_in = 0;
    494        s->eecd_state.bitnum_in = 0;
    495        s->eecd_state.bitnum_out = 0;
    496        s->eecd_state.reading = 0;
    497    }
    498    if (!(E1000_EECD_SK & (val ^ oldval))) {    /* no clock edge */
    499        return;
    500    }
    501    if (!(E1000_EECD_SK & val)) {               /* falling edge */
    502        s->eecd_state.bitnum_out++;
    503        return;
    504    }
    505    s->eecd_state.val_in <<= 1;
    506    if (val & E1000_EECD_DI)
    507        s->eecd_state.val_in |= 1;
    508    if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
    509        s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
    510        s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
    511            EEPROM_READ_OPCODE_MICROWIRE);
    512    }
    513    DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
    514           s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
    515           s->eecd_state.reading);
    516}
    517
    518static uint32_t
    519flash_eerd_read(E1000State *s, int x)
    520{
    521    unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
    522
    523    if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
    524        return (s->mac_reg[EERD]);
    525
    526    if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
    527        return (E1000_EEPROM_RW_REG_DONE | r);
    528
    529    return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
    530           E1000_EEPROM_RW_REG_DONE | r);
    531}
    532
    533static void
    534putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
    535{
    536    uint32_t sum;
    537
    538    if (cse && cse < n)
    539        n = cse + 1;
    540    if (sloc < n-1) {
    541        sum = net_checksum_add(n-css, data+css);
    542        stw_be_p(data + sloc, net_checksum_finish_nozero(sum));
    543    }
    544}
    545
    546static inline void
    547inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr)
    548{
    549    if (!memcmp(arr, bcast, sizeof bcast)) {
    550        e1000x_inc_reg_if_not_full(s->mac_reg, BPTC);
    551    } else if (arr[0] & 1) {
    552        e1000x_inc_reg_if_not_full(s->mac_reg, MPTC);
    553    }
    554}
    555
    556static void
    557e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
    558{
    559    static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
    560                                    PTC1023, PTC1522 };
    561
    562    NetClientState *nc = qemu_get_queue(s->nic);
    563    if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
    564        qemu_receive_packet(nc, buf, size);
    565    } else {
    566        qemu_send_packet(nc, buf, size);
    567    }
    568    inc_tx_bcast_or_mcast_count(s, buf);
    569    e1000x_increase_size_stats(s->mac_reg, PTCregs, size);
    570}
    571
    572static void
    573xmit_seg(E1000State *s)
    574{
    575    uint16_t len;
    576    unsigned int frames = s->tx.tso_frames, css, sofar;
    577    struct e1000_tx *tp = &s->tx;
    578    struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
    579
    580    if (tp->cptse) {
    581        css = props->ipcss;
    582        DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
    583               frames, tp->size, css);
    584        if (props->ip) {    /* IPv4 */
    585            stw_be_p(tp->data+css+2, tp->size - css);
    586            stw_be_p(tp->data+css+4,
    587                     lduw_be_p(tp->data + css + 4) + frames);
    588        } else {         /* IPv6 */
    589            stw_be_p(tp->data+css+4, tp->size - css);
    590        }
    591        css = props->tucss;
    592        len = tp->size - css;
    593        DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
    594        if (props->tcp) {
    595            sofar = frames * props->mss;
    596            stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
    597            if (props->paylen - sofar > props->mss) {
    598                tp->data[css + 13] &= ~9;    /* PSH, FIN */
    599            } else if (frames) {
    600                e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
    601            }
    602        } else {    /* UDP */
    603            stw_be_p(tp->data+css+4, len);
    604        }
    605        if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
    606            unsigned int phsum;
    607            // add pseudo-header length before checksum calculation
    608            void *sp = tp->data + props->tucso;
    609
    610            phsum = lduw_be_p(sp) + len;
    611            phsum = (phsum >> 16) + (phsum & 0xffff);
    612            stw_be_p(sp, phsum);
    613        }
    614        tp->tso_frames++;
    615    }
    616
    617    if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
    618        putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
    619    }
    620    if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
    621        putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
    622    }
    623    if (tp->vlan_needed) {
    624        memmove(tp->vlan, tp->data, 4);
    625        memmove(tp->data, tp->data + 4, 8);
    626        memcpy(tp->data + 8, tp->vlan_header, 4);
    627        e1000_send_packet(s, tp->vlan, tp->size + 4);
    628    } else {
    629        e1000_send_packet(s, tp->data, tp->size);
    630    }
    631
    632    e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
    633    e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size);
    634    s->mac_reg[GPTC] = s->mac_reg[TPT];
    635    s->mac_reg[GOTCL] = s->mac_reg[TOTL];
    636    s->mac_reg[GOTCH] = s->mac_reg[TOTH];
    637}
    638
    639static void
    640process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
    641{
    642    PCIDevice *d = PCI_DEVICE(s);
    643    uint32_t txd_lower = le32_to_cpu(dp->lower.data);
    644    uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
    645    unsigned int split_size = txd_lower & 0xffff, bytes, sz;
    646    unsigned int msh = 0xfffff;
    647    uint64_t addr;
    648    struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
    649    struct e1000_tx *tp = &s->tx;
    650
    651    s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
    652    if (dtype == E1000_TXD_CMD_DEXT) {    /* context descriptor */
    653        if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
    654            e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
    655            s->use_tso_for_migration = 1;
    656            tp->tso_frames = 0;
    657        } else {
    658            e1000x_read_tx_ctx_descr(xp, &tp->props);
    659            s->use_tso_for_migration = 0;
    660        }
    661        return;
    662    } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
    663        // data descriptor
    664        if (tp->size == 0) {
    665            tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
    666        }
    667        tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
    668    } else {
    669        // legacy descriptor
    670        tp->cptse = 0;
    671    }
    672
    673    if (e1000x_vlan_enabled(s->mac_reg) &&
    674        e1000x_is_vlan_txd(txd_lower) &&
    675        (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
    676        tp->vlan_needed = 1;
    677        stw_be_p(tp->vlan_header,
    678                      le16_to_cpu(s->mac_reg[VET]));
    679        stw_be_p(tp->vlan_header + 2,
    680                      le16_to_cpu(dp->upper.fields.special));
    681    }
    682
    683    addr = le64_to_cpu(dp->buffer_addr);
    684    if (tp->cptse) {
    685        msh = tp->tso_props.hdr_len + tp->tso_props.mss;
    686        do {
    687            bytes = split_size;
    688            if (tp->size >= msh) {
    689                goto eop;
    690            }
    691            if (tp->size + bytes > msh)
    692                bytes = msh - tp->size;
    693
    694            bytes = MIN(sizeof(tp->data) - tp->size, bytes);
    695            pci_dma_read(d, addr, tp->data + tp->size, bytes);
    696            sz = tp->size + bytes;
    697            if (sz >= tp->tso_props.hdr_len
    698                && tp->size < tp->tso_props.hdr_len) {
    699                memmove(tp->header, tp->data, tp->tso_props.hdr_len);
    700            }
    701            tp->size = sz;
    702            addr += bytes;
    703            if (sz == msh) {
    704                xmit_seg(s);
    705                memmove(tp->data, tp->header, tp->tso_props.hdr_len);
    706                tp->size = tp->tso_props.hdr_len;
    707            }
    708            split_size -= bytes;
    709        } while (bytes && split_size);
    710    } else {
    711        split_size = MIN(sizeof(tp->data) - tp->size, split_size);
    712        pci_dma_read(d, addr, tp->data + tp->size, split_size);
    713        tp->size += split_size;
    714    }
    715
    716eop:
    717    if (!(txd_lower & E1000_TXD_CMD_EOP))
    718        return;
    719    if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
    720        xmit_seg(s);
    721    }
    722    tp->tso_frames = 0;
    723    tp->sum_needed = 0;
    724    tp->vlan_needed = 0;
    725    tp->size = 0;
    726    tp->cptse = 0;
    727}
    728
    729static uint32_t
    730txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
    731{
    732    PCIDevice *d = PCI_DEVICE(s);
    733    uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
    734
    735    if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
    736        return 0;
    737    txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
    738                ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
    739    dp->upper.data = cpu_to_le32(txd_upper);
    740    pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
    741                  &dp->upper, sizeof(dp->upper));
    742    return E1000_ICR_TXDW;
    743}
    744
    745static uint64_t tx_desc_base(E1000State *s)
    746{
    747    uint64_t bah = s->mac_reg[TDBAH];
    748    uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
    749
    750    return (bah << 32) + bal;
    751}
    752
    753static void
    754start_xmit(E1000State *s)
    755{
    756    PCIDevice *d = PCI_DEVICE(s);
    757    dma_addr_t base;
    758    struct e1000_tx_desc desc;
    759    uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
    760
    761    if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
    762        DBGOUT(TX, "tx disabled\n");
    763        return;
    764    }
    765
    766    while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
    767        base = tx_desc_base(s) +
    768               sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
    769        pci_dma_read(d, base, &desc, sizeof(desc));
    770
    771        DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
    772               (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
    773               desc.upper.data);
    774
    775        process_tx_desc(s, &desc);
    776        cause |= txdesc_writeback(s, base, &desc);
    777
    778        if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
    779            s->mac_reg[TDH] = 0;
    780        /*
    781         * the following could happen only if guest sw assigns
    782         * bogus values to TDT/TDLEN.
    783         * there's nothing too intelligent we could do about this.
    784         */
    785        if (s->mac_reg[TDH] == tdh_start ||
    786            tdh_start >= s->mac_reg[TDLEN] / sizeof(desc)) {
    787            DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
    788                   tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
    789            break;
    790        }
    791    }
    792    set_ics(s, 0, cause);
    793}
    794
    795static int
    796receive_filter(E1000State *s, const uint8_t *buf, int size)
    797{
    798    uint32_t rctl = s->mac_reg[RCTL];
    799    int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1);
    800
    801    if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
    802        e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
    803        uint16_t vid = lduw_be_p(buf + 14);
    804        uint32_t vfta = ldl_le_p((uint32_t*)(s->mac_reg + VFTA) +
    805                                 ((vid >> 5) & 0x7f));
    806        if ((vfta & (1 << (vid & 0x1f))) == 0)
    807            return 0;
    808    }
    809
    810    if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
    811        return 1;
    812    }
    813
    814    if (ismcast && (rctl & E1000_RCTL_MPE)) {          /* promiscuous mcast */
    815        e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
    816        return 1;
    817    }
    818
    819    if (isbcast && (rctl & E1000_RCTL_BAM)) {          /* broadcast enabled */
    820        e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
    821        return 1;
    822    }
    823
    824    return e1000x_rx_group_filter(s->mac_reg, buf);
    825}
    826
    827static void
    828e1000_set_link_status(NetClientState *nc)
    829{
    830    E1000State *s = qemu_get_nic_opaque(nc);
    831    uint32_t old_status = s->mac_reg[STATUS];
    832
    833    if (nc->link_down) {
    834        e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg);
    835    } else {
    836        if (have_autoneg(s) &&
    837            !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
    838            e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer);
    839        } else {
    840            e1000_link_up(s);
    841        }
    842    }
    843
    844    if (s->mac_reg[STATUS] != old_status)
    845        set_ics(s, 0, E1000_ICR_LSC);
    846}
    847
    848static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
    849{
    850    int bufs;
    851    /* Fast-path short packets */
    852    if (total_size <= s->rxbuf_size) {
    853        return s->mac_reg[RDH] != s->mac_reg[RDT];
    854    }
    855    if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
    856        bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
    857    } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
    858        bufs = s->mac_reg[RDLEN] /  sizeof(struct e1000_rx_desc) +
    859            s->mac_reg[RDT] - s->mac_reg[RDH];
    860    } else {
    861        return false;
    862    }
    863    return total_size <= bufs * s->rxbuf_size;
    864}
    865
    866static bool
    867e1000_can_receive(NetClientState *nc)
    868{
    869    E1000State *s = qemu_get_nic_opaque(nc);
    870
    871    return e1000x_rx_ready(&s->parent_obj, s->mac_reg) &&
    872        e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer);
    873}
    874
    875static uint64_t rx_desc_base(E1000State *s)
    876{
    877    uint64_t bah = s->mac_reg[RDBAH];
    878    uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
    879
    880    return (bah << 32) + bal;
    881}
    882
    883static void
    884e1000_receiver_overrun(E1000State *s, size_t size)
    885{
    886    trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
    887    e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
    888    e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
    889    set_ics(s, 0, E1000_ICS_RXO);
    890}
    891
    892static ssize_t
    893e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
    894{
    895    E1000State *s = qemu_get_nic_opaque(nc);
    896    PCIDevice *d = PCI_DEVICE(s);
    897    struct e1000_rx_desc desc;
    898    dma_addr_t base;
    899    unsigned int n, rdt;
    900    uint32_t rdh_start;
    901    uint16_t vlan_special = 0;
    902    uint8_t vlan_status = 0;
    903    uint8_t min_buf[MIN_BUF_SIZE];
    904    struct iovec min_iov;
    905    uint8_t *filter_buf = iov->iov_base;
    906    size_t size = iov_size(iov, iovcnt);
    907    size_t iov_ofs = 0;
    908    size_t desc_offset;
    909    size_t desc_size;
    910    size_t total_size;
    911
    912    if (!e1000x_hw_rx_enabled(s->mac_reg)) {
    913        return -1;
    914    }
    915
    916    if (timer_pending(s->flush_queue_timer)) {
    917        return 0;
    918    }
    919
    920    /* Pad to minimum Ethernet frame length */
    921    if (size < sizeof(min_buf)) {
    922        iov_to_buf(iov, iovcnt, 0, min_buf, size);
    923        memset(&min_buf[size], 0, sizeof(min_buf) - size);
    924        min_iov.iov_base = filter_buf = min_buf;
    925        min_iov.iov_len = size = sizeof(min_buf);
    926        iovcnt = 1;
    927        iov = &min_iov;
    928    } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
    929        /* This is very unlikely, but may happen. */
    930        iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
    931        filter_buf = min_buf;
    932    }
    933
    934    /* Discard oversized packets if !LPE and !SBP. */
    935    if (e1000x_is_oversized(s->mac_reg, size)) {
    936        return size;
    937    }
    938
    939    if (!receive_filter(s, filter_buf, size)) {
    940        return size;
    941    }
    942
    943    if (e1000x_vlan_enabled(s->mac_reg) &&
    944        e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) {
    945        vlan_special = cpu_to_le16(lduw_be_p(filter_buf + 14));
    946        iov_ofs = 4;
    947        if (filter_buf == iov->iov_base) {
    948            memmove(filter_buf + 4, filter_buf, 12);
    949        } else {
    950            iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
    951            while (iov->iov_len <= iov_ofs) {
    952                iov_ofs -= iov->iov_len;
    953                iov++;
    954            }
    955        }
    956        vlan_status = E1000_RXD_STAT_VP;
    957        size -= 4;
    958    }
    959
    960    rdh_start = s->mac_reg[RDH];
    961    desc_offset = 0;
    962    total_size = size + e1000x_fcs_len(s->mac_reg);
    963    if (!e1000_has_rxbufs(s, total_size)) {
    964        e1000_receiver_overrun(s, total_size);
    965        return -1;
    966    }
    967    do {
    968        desc_size = total_size - desc_offset;
    969        if (desc_size > s->rxbuf_size) {
    970            desc_size = s->rxbuf_size;
    971        }
    972        base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
    973        pci_dma_read(d, base, &desc, sizeof(desc));
    974        desc.special = vlan_special;
    975        desc.status |= (vlan_status | E1000_RXD_STAT_DD);
    976        if (desc.buffer_addr) {
    977            if (desc_offset < size) {
    978                size_t iov_copy;
    979                hwaddr ba = le64_to_cpu(desc.buffer_addr);
    980                size_t copy_size = size - desc_offset;
    981                if (copy_size > s->rxbuf_size) {
    982                    copy_size = s->rxbuf_size;
    983                }
    984                do {
    985                    iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
    986                    pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
    987                    copy_size -= iov_copy;
    988                    ba += iov_copy;
    989                    iov_ofs += iov_copy;
    990                    if (iov_ofs == iov->iov_len) {
    991                        iov++;
    992                        iov_ofs = 0;
    993                    }
    994                } while (copy_size);
    995            }
    996            desc_offset += desc_size;
    997            desc.length = cpu_to_le16(desc_size);
    998            if (desc_offset >= total_size) {
    999                desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
   1000            } else {
   1001                /* Guest zeroing out status is not a hardware requirement.
   1002                   Clear EOP in case guest didn't do it. */
   1003                desc.status &= ~E1000_RXD_STAT_EOP;
   1004            }
   1005        } else { // as per intel docs; skip descriptors with null buf addr
   1006            DBGOUT(RX, "Null RX descriptor!!\n");
   1007        }
   1008        pci_dma_write(d, base, &desc, sizeof(desc));
   1009
   1010        if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
   1011            s->mac_reg[RDH] = 0;
   1012        /* see comment in start_xmit; same here */
   1013        if (s->mac_reg[RDH] == rdh_start ||
   1014            rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
   1015            DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
   1016                   rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
   1017            e1000_receiver_overrun(s, total_size);
   1018            return -1;
   1019        }
   1020    } while (desc_offset < total_size);
   1021
   1022    e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
   1023
   1024    n = E1000_ICS_RXT0;
   1025    if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
   1026        rdt += s->mac_reg[RDLEN] / sizeof(desc);
   1027    if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
   1028        s->rxbuf_min_shift)
   1029        n |= E1000_ICS_RXDMT0;
   1030
   1031    set_ics(s, 0, n);
   1032
   1033    return size;
   1034}
   1035
   1036static ssize_t
   1037e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
   1038{
   1039    const struct iovec iov = {
   1040        .iov_base = (uint8_t *)buf,
   1041        .iov_len = size
   1042    };
   1043
   1044    return e1000_receive_iov(nc, &iov, 1);
   1045}
   1046
   1047static uint32_t
   1048mac_readreg(E1000State *s, int index)
   1049{
   1050    return s->mac_reg[index];
   1051}
   1052
   1053static uint32_t
   1054mac_low4_read(E1000State *s, int index)
   1055{
   1056    return s->mac_reg[index] & 0xf;
   1057}
   1058
   1059static uint32_t
   1060mac_low11_read(E1000State *s, int index)
   1061{
   1062    return s->mac_reg[index] & 0x7ff;
   1063}
   1064
   1065static uint32_t
   1066mac_low13_read(E1000State *s, int index)
   1067{
   1068    return s->mac_reg[index] & 0x1fff;
   1069}
   1070
   1071static uint32_t
   1072mac_low16_read(E1000State *s, int index)
   1073{
   1074    return s->mac_reg[index] & 0xffff;
   1075}
   1076
   1077static uint32_t
   1078mac_icr_read(E1000State *s, int index)
   1079{
   1080    uint32_t ret = s->mac_reg[ICR];
   1081
   1082    DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
   1083    set_interrupt_cause(s, 0, 0);
   1084    return ret;
   1085}
   1086
   1087static uint32_t
   1088mac_read_clr4(E1000State *s, int index)
   1089{
   1090    uint32_t ret = s->mac_reg[index];
   1091
   1092    s->mac_reg[index] = 0;
   1093    return ret;
   1094}
   1095
   1096static uint32_t
   1097mac_read_clr8(E1000State *s, int index)
   1098{
   1099    uint32_t ret = s->mac_reg[index];
   1100
   1101    s->mac_reg[index] = 0;
   1102    s->mac_reg[index-1] = 0;
   1103    return ret;
   1104}
   1105
   1106static void
   1107mac_writereg(E1000State *s, int index, uint32_t val)
   1108{
   1109    uint32_t macaddr[2];
   1110
   1111    s->mac_reg[index] = val;
   1112
   1113    if (index == RA + 1) {
   1114        macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
   1115        macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
   1116        qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
   1117    }
   1118}
   1119
   1120static void
   1121set_rdt(E1000State *s, int index, uint32_t val)
   1122{
   1123    s->mac_reg[index] = val & 0xffff;
   1124    if (e1000_has_rxbufs(s, 1)) {
   1125        qemu_flush_queued_packets(qemu_get_queue(s->nic));
   1126    }
   1127}
   1128
   1129static void
   1130set_16bit(E1000State *s, int index, uint32_t val)
   1131{
   1132    s->mac_reg[index] = val & 0xffff;
   1133}
   1134
   1135static void
   1136set_dlen(E1000State *s, int index, uint32_t val)
   1137{
   1138    s->mac_reg[index] = val & 0xfff80;
   1139}
   1140
   1141static void
   1142set_tctl(E1000State *s, int index, uint32_t val)
   1143{
   1144    s->mac_reg[index] = val;
   1145    s->mac_reg[TDT] &= 0xffff;
   1146    start_xmit(s);
   1147}
   1148
   1149static void
   1150set_icr(E1000State *s, int index, uint32_t val)
   1151{
   1152    DBGOUT(INTERRUPT, "set_icr %x\n", val);
   1153    set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
   1154}
   1155
   1156static void
   1157set_imc(E1000State *s, int index, uint32_t val)
   1158{
   1159    s->mac_reg[IMS] &= ~val;
   1160    set_ics(s, 0, 0);
   1161}
   1162
   1163static void
   1164set_ims(E1000State *s, int index, uint32_t val)
   1165{
   1166    s->mac_reg[IMS] |= val;
   1167    set_ics(s, 0, 0);
   1168}
   1169
   1170#define getreg(x)    [x] = mac_readreg
   1171typedef uint32_t (*readops)(E1000State *, int);
   1172static const readops macreg_readops[] = {
   1173    getreg(PBA),      getreg(RCTL),     getreg(TDH),      getreg(TXDCTL),
   1174    getreg(WUFC),     getreg(TDT),      getreg(CTRL),     getreg(LEDCTL),
   1175    getreg(MANC),     getreg(MDIC),     getreg(SWSM),     getreg(STATUS),
   1176    getreg(TORL),     getreg(TOTL),     getreg(IMS),      getreg(TCTL),
   1177    getreg(RDH),      getreg(RDT),      getreg(VET),      getreg(ICS),
   1178    getreg(TDBAL),    getreg(TDBAH),    getreg(RDBAH),    getreg(RDBAL),
   1179    getreg(TDLEN),    getreg(RDLEN),    getreg(RDTR),     getreg(RADV),
   1180    getreg(TADV),     getreg(ITR),      getreg(FCRUC),    getreg(IPAV),
   1181    getreg(WUC),      getreg(WUS),      getreg(SCC),      getreg(ECOL),
   1182    getreg(MCC),      getreg(LATECOL),  getreg(COLC),     getreg(DC),
   1183    getreg(TNCRS),    getreg(SEQEC),    getreg(CEXTERR),  getreg(RLEC),
   1184    getreg(XONRXC),   getreg(XONTXC),   getreg(XOFFRXC),  getreg(XOFFTXC),
   1185    getreg(RFC),      getreg(RJC),      getreg(RNBC),     getreg(TSCTFC),
   1186    getreg(MGTPRC),   getreg(MGTPDC),   getreg(MGTPTC),   getreg(GORCL),
   1187    getreg(GOTCL),
   1188
   1189    [TOTH]    = mac_read_clr8,      [TORH]    = mac_read_clr8,
   1190    [GOTCH]   = mac_read_clr8,      [GORCH]   = mac_read_clr8,
   1191    [PRC64]   = mac_read_clr4,      [PRC127]  = mac_read_clr4,
   1192    [PRC255]  = mac_read_clr4,      [PRC511]  = mac_read_clr4,
   1193    [PRC1023] = mac_read_clr4,      [PRC1522] = mac_read_clr4,
   1194    [PTC64]   = mac_read_clr4,      [PTC127]  = mac_read_clr4,
   1195    [PTC255]  = mac_read_clr4,      [PTC511]  = mac_read_clr4,
   1196    [PTC1023] = mac_read_clr4,      [PTC1522] = mac_read_clr4,
   1197    [GPRC]    = mac_read_clr4,      [GPTC]    = mac_read_clr4,
   1198    [TPT]     = mac_read_clr4,      [TPR]     = mac_read_clr4,
   1199    [RUC]     = mac_read_clr4,      [ROC]     = mac_read_clr4,
   1200    [BPRC]    = mac_read_clr4,      [MPRC]    = mac_read_clr4,
   1201    [TSCTC]   = mac_read_clr4,      [BPTC]    = mac_read_clr4,
   1202    [MPTC]    = mac_read_clr4,
   1203    [ICR]     = mac_icr_read,       [EECD]    = get_eecd,
   1204    [EERD]    = flash_eerd_read,
   1205    [RDFH]    = mac_low13_read,     [RDFT]    = mac_low13_read,
   1206    [RDFHS]   = mac_low13_read,     [RDFTS]   = mac_low13_read,
   1207    [RDFPC]   = mac_low13_read,
   1208    [TDFH]    = mac_low11_read,     [TDFT]    = mac_low11_read,
   1209    [TDFHS]   = mac_low13_read,     [TDFTS]   = mac_low13_read,
   1210    [TDFPC]   = mac_low13_read,
   1211    [AIT]     = mac_low16_read,
   1212
   1213    [CRCERRS ... MPC]   = &mac_readreg,
   1214    [IP6AT ... IP6AT+3] = &mac_readreg,    [IP4AT ... IP4AT+6] = &mac_readreg,
   1215    [FFLT ... FFLT+6]   = &mac_low11_read,
   1216    [RA ... RA+31]      = &mac_readreg,
   1217    [WUPM ... WUPM+31]  = &mac_readreg,
   1218    [MTA ... MTA+127]   = &mac_readreg,
   1219    [VFTA ... VFTA+127] = &mac_readreg,
   1220    [FFMT ... FFMT+254] = &mac_low4_read,
   1221    [FFVT ... FFVT+254] = &mac_readreg,
   1222    [PBM ... PBM+16383] = &mac_readreg,
   1223};
   1224enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
   1225
   1226#define putreg(x)    [x] = mac_writereg
   1227typedef void (*writeops)(E1000State *, int, uint32_t);
   1228static const writeops macreg_writeops[] = {
   1229    putreg(PBA),      putreg(EERD),     putreg(SWSM),     putreg(WUFC),
   1230    putreg(TDBAL),    putreg(TDBAH),    putreg(TXDCTL),   putreg(RDBAH),
   1231    putreg(RDBAL),    putreg(LEDCTL),   putreg(VET),      putreg(FCRUC),
   1232    putreg(TDFH),     putreg(TDFT),     putreg(TDFHS),    putreg(TDFTS),
   1233    putreg(TDFPC),    putreg(RDFH),     putreg(RDFT),     putreg(RDFHS),
   1234    putreg(RDFTS),    putreg(RDFPC),    putreg(IPAV),     putreg(WUC),
   1235    putreg(WUS),      putreg(AIT),
   1236
   1237    [TDLEN]  = set_dlen,   [RDLEN]  = set_dlen,       [TCTL] = set_tctl,
   1238    [TDT]    = set_tctl,   [MDIC]   = set_mdic,       [ICS]  = set_ics,
   1239    [TDH]    = set_16bit,  [RDH]    = set_16bit,      [RDT]  = set_rdt,
   1240    [IMC]    = set_imc,    [IMS]    = set_ims,        [ICR]  = set_icr,
   1241    [EECD]   = set_eecd,   [RCTL]   = set_rx_control, [CTRL] = set_ctrl,
   1242    [RDTR]   = set_16bit,  [RADV]   = set_16bit,      [TADV] = set_16bit,
   1243    [ITR]    = set_16bit,
   1244
   1245    [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
   1246    [FFLT ... FFLT+6]   = &mac_writereg,
   1247    [RA ... RA+31]      = &mac_writereg,
   1248    [WUPM ... WUPM+31]  = &mac_writereg,
   1249    [MTA ... MTA+127]   = &mac_writereg,
   1250    [VFTA ... VFTA+127] = &mac_writereg,
   1251    [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
   1252    [PBM ... PBM+16383] = &mac_writereg,
   1253};
   1254
   1255enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
   1256
   1257enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
   1258
   1259#define markflag(x)    ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
   1260/* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
   1261 * f - flag bits (up to 6 possible flags)
   1262 * n - flag needed
   1263 * p - partially implenented */
   1264static const uint8_t mac_reg_access[0x8000] = {
   1265    [RDTR]    = markflag(MIT),    [TADV]    = markflag(MIT),
   1266    [RADV]    = markflag(MIT),    [ITR]     = markflag(MIT),
   1267
   1268    [IPAV]    = markflag(MAC),    [WUC]     = markflag(MAC),
   1269    [IP6AT]   = markflag(MAC),    [IP4AT]   = markflag(MAC),
   1270    [FFVT]    = markflag(MAC),    [WUPM]    = markflag(MAC),
   1271    [ECOL]    = markflag(MAC),    [MCC]     = markflag(MAC),
   1272    [DC]      = markflag(MAC),    [TNCRS]   = markflag(MAC),
   1273    [RLEC]    = markflag(MAC),    [XONRXC]  = markflag(MAC),
   1274    [XOFFTXC] = markflag(MAC),    [RFC]     = markflag(MAC),
   1275    [TSCTFC]  = markflag(MAC),    [MGTPRC]  = markflag(MAC),
   1276    [WUS]     = markflag(MAC),    [AIT]     = markflag(MAC),
   1277    [FFLT]    = markflag(MAC),    [FFMT]    = markflag(MAC),
   1278    [SCC]     = markflag(MAC),    [FCRUC]   = markflag(MAC),
   1279    [LATECOL] = markflag(MAC),    [COLC]    = markflag(MAC),
   1280    [SEQEC]   = markflag(MAC),    [CEXTERR] = markflag(MAC),
   1281    [XONTXC]  = markflag(MAC),    [XOFFRXC] = markflag(MAC),
   1282    [RJC]     = markflag(MAC),    [RNBC]    = markflag(MAC),
   1283    [MGTPDC]  = markflag(MAC),    [MGTPTC]  = markflag(MAC),
   1284    [RUC]     = markflag(MAC),    [ROC]     = markflag(MAC),
   1285    [GORCL]   = markflag(MAC),    [GORCH]   = markflag(MAC),
   1286    [GOTCL]   = markflag(MAC),    [GOTCH]   = markflag(MAC),
   1287    [BPRC]    = markflag(MAC),    [MPRC]    = markflag(MAC),
   1288    [TSCTC]   = markflag(MAC),    [PRC64]   = markflag(MAC),
   1289    [PRC127]  = markflag(MAC),    [PRC255]  = markflag(MAC),
   1290    [PRC511]  = markflag(MAC),    [PRC1023] = markflag(MAC),
   1291    [PRC1522] = markflag(MAC),    [PTC64]   = markflag(MAC),
   1292    [PTC127]  = markflag(MAC),    [PTC255]  = markflag(MAC),
   1293    [PTC511]  = markflag(MAC),    [PTC1023] = markflag(MAC),
   1294    [PTC1522] = markflag(MAC),    [MPTC]    = markflag(MAC),
   1295    [BPTC]    = markflag(MAC),
   1296
   1297    [TDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
   1298    [TDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
   1299    [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
   1300    [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
   1301    [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
   1302    [RDFH]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
   1303    [RDFT]  = markflag(MAC) | MAC_ACCESS_PARTIAL,
   1304    [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
   1305    [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
   1306    [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
   1307    [PBM]   = markflag(MAC) | MAC_ACCESS_PARTIAL,
   1308};
   1309
   1310static void
   1311e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
   1312                 unsigned size)
   1313{
   1314    E1000State *s = opaque;
   1315    unsigned int index = (addr & 0x1ffff) >> 2;
   1316
   1317    if (index < NWRITEOPS && macreg_writeops[index]) {
   1318        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
   1319            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
   1320            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
   1321                DBGOUT(GENERAL, "Writing to register at offset: 0x%08x. "
   1322                       "It is not fully implemented.\n", index<<2);
   1323            }
   1324            macreg_writeops[index](s, index, val);
   1325        } else {    /* "flag needed" bit is set, but the flag is not active */
   1326            DBGOUT(MMIO, "MMIO write attempt to disabled reg. addr=0x%08x\n",
   1327                   index<<2);
   1328        }
   1329    } else if (index < NREADOPS && macreg_readops[index]) {
   1330        DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n",
   1331               index<<2, val);
   1332    } else {
   1333        DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
   1334               index<<2, val);
   1335    }
   1336}
   1337
   1338static uint64_t
   1339e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
   1340{
   1341    E1000State *s = opaque;
   1342    unsigned int index = (addr & 0x1ffff) >> 2;
   1343
   1344    if (index < NREADOPS && macreg_readops[index]) {
   1345        if (!(mac_reg_access[index] & MAC_ACCESS_FLAG_NEEDED)
   1346            || (s->compat_flags & (mac_reg_access[index] >> 2))) {
   1347            if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
   1348                DBGOUT(GENERAL, "Reading register at offset: 0x%08x. "
   1349                       "It is not fully implemented.\n", index<<2);
   1350            }
   1351            return macreg_readops[index](s, index);
   1352        } else {    /* "flag needed" bit is set, but the flag is not active */
   1353            DBGOUT(MMIO, "MMIO read attempt of disabled reg. addr=0x%08x\n",
   1354                   index<<2);
   1355        }
   1356    } else {
   1357        DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
   1358    }
   1359    return 0;
   1360}
   1361
   1362static const MemoryRegionOps e1000_mmio_ops = {
   1363    .read = e1000_mmio_read,
   1364    .write = e1000_mmio_write,
   1365    .endianness = DEVICE_LITTLE_ENDIAN,
   1366    .impl = {
   1367        .min_access_size = 4,
   1368        .max_access_size = 4,
   1369    },
   1370};
   1371
   1372static uint64_t e1000_io_read(void *opaque, hwaddr addr,
   1373                              unsigned size)
   1374{
   1375    E1000State *s = opaque;
   1376
   1377    (void)s;
   1378    return 0;
   1379}
   1380
   1381static void e1000_io_write(void *opaque, hwaddr addr,
   1382                           uint64_t val, unsigned size)
   1383{
   1384    E1000State *s = opaque;
   1385
   1386    (void)s;
   1387}
   1388
   1389static const MemoryRegionOps e1000_io_ops = {
   1390    .read = e1000_io_read,
   1391    .write = e1000_io_write,
   1392    .endianness = DEVICE_LITTLE_ENDIAN,
   1393};
   1394
   1395static bool is_version_1(void *opaque, int version_id)
   1396{
   1397    return version_id == 1;
   1398}
   1399
   1400static int e1000_pre_save(void *opaque)
   1401{
   1402    E1000State *s = opaque;
   1403    NetClientState *nc = qemu_get_queue(s->nic);
   1404
   1405    /*
   1406     * If link is down and auto-negotiation is supported and ongoing,
   1407     * complete auto-negotiation immediately. This allows us to look
   1408     * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
   1409     */
   1410    if (nc->link_down && have_autoneg(s)) {
   1411        s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
   1412    }
   1413
   1414    /* Decide which set of props to migrate in the main structure */
   1415    if (chkflag(TSO) || !s->use_tso_for_migration) {
   1416        /* Either we're migrating with the extra subsection, in which
   1417         * case the mig_props is always 'props' OR
   1418         * we've not got the subsection, but 'props' was the last
   1419         * updated.
   1420         */
   1421        s->mig_props = s->tx.props;
   1422    } else {
   1423        /* We're not using the subsection, and 'tso_props' was
   1424         * the last updated.
   1425         */
   1426        s->mig_props = s->tx.tso_props;
   1427    }
   1428    return 0;
   1429}
   1430
   1431static int e1000_post_load(void *opaque, int version_id)
   1432{
   1433    E1000State *s = opaque;
   1434    NetClientState *nc = qemu_get_queue(s->nic);
   1435
   1436    if (!chkflag(MIT)) {
   1437        s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
   1438            s->mac_reg[TADV] = 0;
   1439        s->mit_irq_level = false;
   1440    }
   1441    s->mit_ide = 0;
   1442    s->mit_timer_on = true;
   1443    timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 1);
   1444
   1445    /* nc.link_down can't be migrated, so infer link_down according
   1446     * to link status bit in mac_reg[STATUS].
   1447     * Alternatively, restart link negotiation if it was in progress. */
   1448    nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
   1449
   1450    if (have_autoneg(s) &&
   1451        !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
   1452        nc->link_down = false;
   1453        timer_mod(s->autoneg_timer,
   1454                  qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
   1455    }
   1456
   1457    s->tx.props = s->mig_props;
   1458    if (!s->received_tx_tso) {
   1459        /* We received only one set of offload data (tx.props)
   1460         * and haven't got tx.tso_props.  The best we can do
   1461         * is dupe the data.
   1462         */
   1463        s->tx.tso_props = s->mig_props;
   1464    }
   1465    return 0;
   1466}
   1467
   1468static int e1000_tx_tso_post_load(void *opaque, int version_id)
   1469{
   1470    E1000State *s = opaque;
   1471    s->received_tx_tso = true;
   1472    return 0;
   1473}
   1474
   1475static bool e1000_mit_state_needed(void *opaque)
   1476{
   1477    E1000State *s = opaque;
   1478
   1479    return chkflag(MIT);
   1480}
   1481
   1482static bool e1000_full_mac_needed(void *opaque)
   1483{
   1484    E1000State *s = opaque;
   1485
   1486    return chkflag(MAC);
   1487}
   1488
   1489static bool e1000_tso_state_needed(void *opaque)
   1490{
   1491    E1000State *s = opaque;
   1492
   1493    return chkflag(TSO);
   1494}
   1495
   1496static const VMStateDescription vmstate_e1000_mit_state = {
   1497    .name = "e1000/mit_state",
   1498    .version_id = 1,
   1499    .minimum_version_id = 1,
   1500    .needed = e1000_mit_state_needed,
   1501    .fields = (VMStateField[]) {
   1502        VMSTATE_UINT32(mac_reg[RDTR], E1000State),
   1503        VMSTATE_UINT32(mac_reg[RADV], E1000State),
   1504        VMSTATE_UINT32(mac_reg[TADV], E1000State),
   1505        VMSTATE_UINT32(mac_reg[ITR], E1000State),
   1506        VMSTATE_BOOL(mit_irq_level, E1000State),
   1507        VMSTATE_END_OF_LIST()
   1508    }
   1509};
   1510
   1511static const VMStateDescription vmstate_e1000_full_mac_state = {
   1512    .name = "e1000/full_mac_state",
   1513    .version_id = 1,
   1514    .minimum_version_id = 1,
   1515    .needed = e1000_full_mac_needed,
   1516    .fields = (VMStateField[]) {
   1517        VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
   1518        VMSTATE_END_OF_LIST()
   1519    }
   1520};
   1521
   1522static const VMStateDescription vmstate_e1000_tx_tso_state = {
   1523    .name = "e1000/tx_tso_state",
   1524    .version_id = 1,
   1525    .minimum_version_id = 1,
   1526    .needed = e1000_tso_state_needed,
   1527    .post_load = e1000_tx_tso_post_load,
   1528    .fields = (VMStateField[]) {
   1529        VMSTATE_UINT8(tx.tso_props.ipcss, E1000State),
   1530        VMSTATE_UINT8(tx.tso_props.ipcso, E1000State),
   1531        VMSTATE_UINT16(tx.tso_props.ipcse, E1000State),
   1532        VMSTATE_UINT8(tx.tso_props.tucss, E1000State),
   1533        VMSTATE_UINT8(tx.tso_props.tucso, E1000State),
   1534        VMSTATE_UINT16(tx.tso_props.tucse, E1000State),
   1535        VMSTATE_UINT32(tx.tso_props.paylen, E1000State),
   1536        VMSTATE_UINT8(tx.tso_props.hdr_len, E1000State),
   1537        VMSTATE_UINT16(tx.tso_props.mss, E1000State),
   1538        VMSTATE_INT8(tx.tso_props.ip, E1000State),
   1539        VMSTATE_INT8(tx.tso_props.tcp, E1000State),
   1540        VMSTATE_END_OF_LIST()
   1541    }
   1542};
   1543
   1544static const VMStateDescription vmstate_e1000 = {
   1545    .name = "e1000",
   1546    .version_id = 2,
   1547    .minimum_version_id = 1,
   1548    .pre_save = e1000_pre_save,
   1549    .post_load = e1000_post_load,
   1550    .fields = (VMStateField[]) {
   1551        VMSTATE_PCI_DEVICE(parent_obj, E1000State),
   1552        VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
   1553        VMSTATE_UNUSED(4), /* Was mmio_base.  */
   1554        VMSTATE_UINT32(rxbuf_size, E1000State),
   1555        VMSTATE_UINT32(rxbuf_min_shift, E1000State),
   1556        VMSTATE_UINT32(eecd_state.val_in, E1000State),
   1557        VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
   1558        VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
   1559        VMSTATE_UINT16(eecd_state.reading, E1000State),
   1560        VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
   1561        VMSTATE_UINT8(mig_props.ipcss, E1000State),
   1562        VMSTATE_UINT8(mig_props.ipcso, E1000State),
   1563        VMSTATE_UINT16(mig_props.ipcse, E1000State),
   1564        VMSTATE_UINT8(mig_props.tucss, E1000State),
   1565        VMSTATE_UINT8(mig_props.tucso, E1000State),
   1566        VMSTATE_UINT16(mig_props.tucse, E1000State),
   1567        VMSTATE_UINT32(mig_props.paylen, E1000State),
   1568        VMSTATE_UINT8(mig_props.hdr_len, E1000State),
   1569        VMSTATE_UINT16(mig_props.mss, E1000State),
   1570        VMSTATE_UINT16(tx.size, E1000State),
   1571        VMSTATE_UINT16(tx.tso_frames, E1000State),
   1572        VMSTATE_UINT8(tx.sum_needed, E1000State),
   1573        VMSTATE_INT8(mig_props.ip, E1000State),
   1574        VMSTATE_INT8(mig_props.tcp, E1000State),
   1575        VMSTATE_BUFFER(tx.header, E1000State),
   1576        VMSTATE_BUFFER(tx.data, E1000State),
   1577        VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
   1578        VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
   1579        VMSTATE_UINT32(mac_reg[CTRL], E1000State),
   1580        VMSTATE_UINT32(mac_reg[EECD], E1000State),
   1581        VMSTATE_UINT32(mac_reg[EERD], E1000State),
   1582        VMSTATE_UINT32(mac_reg[GPRC], E1000State),
   1583        VMSTATE_UINT32(mac_reg[GPTC], E1000State),
   1584        VMSTATE_UINT32(mac_reg[ICR], E1000State),
   1585        VMSTATE_UINT32(mac_reg[ICS], E1000State),
   1586        VMSTATE_UINT32(mac_reg[IMC], E1000State),
   1587        VMSTATE_UINT32(mac_reg[IMS], E1000State),
   1588        VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
   1589        VMSTATE_UINT32(mac_reg[MANC], E1000State),
   1590        VMSTATE_UINT32(mac_reg[MDIC], E1000State),
   1591        VMSTATE_UINT32(mac_reg[MPC], E1000State),
   1592        VMSTATE_UINT32(mac_reg[PBA], E1000State),
   1593        VMSTATE_UINT32(mac_reg[RCTL], E1000State),
   1594        VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
   1595        VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
   1596        VMSTATE_UINT32(mac_reg[RDH], E1000State),
   1597        VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
   1598        VMSTATE_UINT32(mac_reg[RDT], E1000State),
   1599        VMSTATE_UINT32(mac_reg[STATUS], E1000State),
   1600        VMSTATE_UINT32(mac_reg[SWSM], E1000State),
   1601        VMSTATE_UINT32(mac_reg[TCTL], E1000State),
   1602        VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
   1603        VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
   1604        VMSTATE_UINT32(mac_reg[TDH], E1000State),
   1605        VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
   1606        VMSTATE_UINT32(mac_reg[TDT], E1000State),
   1607        VMSTATE_UINT32(mac_reg[TORH], E1000State),
   1608        VMSTATE_UINT32(mac_reg[TORL], E1000State),
   1609        VMSTATE_UINT32(mac_reg[TOTH], E1000State),
   1610        VMSTATE_UINT32(mac_reg[TOTL], E1000State),
   1611        VMSTATE_UINT32(mac_reg[TPR], E1000State),
   1612        VMSTATE_UINT32(mac_reg[TPT], E1000State),
   1613        VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
   1614        VMSTATE_UINT32(mac_reg[WUFC], E1000State),
   1615        VMSTATE_UINT32(mac_reg[VET], E1000State),
   1616        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
   1617        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
   1618        VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
   1619        VMSTATE_END_OF_LIST()
   1620    },
   1621    .subsections = (const VMStateDescription*[]) {
   1622        &vmstate_e1000_mit_state,
   1623        &vmstate_e1000_full_mac_state,
   1624        &vmstate_e1000_tx_tso_state,
   1625        NULL
   1626    }
   1627};
   1628
   1629/*
   1630 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
   1631 * Note: A valid DevId will be inserted during pci_e1000_realize().
   1632 */
   1633static const uint16_t e1000_eeprom_template[64] = {
   1634    0x0000, 0x0000, 0x0000, 0x0000,      0xffff, 0x0000,      0x0000, 0x0000,
   1635    0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
   1636    0x0008, 0x2000, 0x7e14, 0x0048,      0x1000, 0x00d8,      0x0000, 0x2700,
   1637    0x6cc9, 0x3150, 0x0722, 0x040b,      0x0984, 0x0000,      0xc000, 0x0706,
   1638    0x1008, 0x0000, 0x0f04, 0x7fff,      0x4d01, 0xffff,      0xffff, 0xffff,
   1639    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
   1640    0x0100, 0x4000, 0x121c, 0xffff,      0xffff, 0xffff,      0xffff, 0xffff,
   1641    0xffff, 0xffff, 0xffff, 0xffff,      0xffff, 0xffff,      0xffff, 0x0000,
   1642};
   1643
   1644/* PCI interface */
   1645
   1646static void
   1647e1000_mmio_setup(E1000State *d)
   1648{
   1649    int i;
   1650    const uint32_t excluded_regs[] = {
   1651        E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
   1652        E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
   1653    };
   1654
   1655    memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
   1656                          "e1000-mmio", PNPMMIO_SIZE);
   1657    memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
   1658    for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
   1659        memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
   1660                                     excluded_regs[i+1] - excluded_regs[i] - 4);
   1661    memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
   1662}
   1663
   1664static void
   1665pci_e1000_uninit(PCIDevice *dev)
   1666{
   1667    E1000State *d = E1000(dev);
   1668
   1669    timer_free(d->autoneg_timer);
   1670    timer_free(d->mit_timer);
   1671    timer_free(d->flush_queue_timer);
   1672    qemu_del_nic(d->nic);
   1673}
   1674
   1675static NetClientInfo net_e1000_info = {
   1676    .type = NET_CLIENT_DRIVER_NIC,
   1677    .size = sizeof(NICState),
   1678    .can_receive = e1000_can_receive,
   1679    .receive = e1000_receive,
   1680    .receive_iov = e1000_receive_iov,
   1681    .link_status_changed = e1000_set_link_status,
   1682};
   1683
   1684static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
   1685                                uint32_t val, int len)
   1686{
   1687    E1000State *s = E1000(pci_dev);
   1688
   1689    pci_default_write_config(pci_dev, address, val, len);
   1690
   1691    if (range_covers_byte(address, len, PCI_COMMAND) &&
   1692        (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
   1693        qemu_flush_queued_packets(qemu_get_queue(s->nic));
   1694    }
   1695}
   1696
   1697static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
   1698{
   1699    DeviceState *dev = DEVICE(pci_dev);
   1700    E1000State *d = E1000(pci_dev);
   1701    uint8_t *pci_conf;
   1702    uint8_t *macaddr;
   1703
   1704    pci_dev->config_write = e1000_write_config;
   1705
   1706    pci_conf = pci_dev->config;
   1707
   1708    /* TODO: RST# value should be 0, PCI spec 6.2.4 */
   1709    pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
   1710
   1711    pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
   1712
   1713    e1000_mmio_setup(d);
   1714
   1715    pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
   1716
   1717    pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
   1718
   1719    qemu_macaddr_default_if_unset(&d->conf.macaddr);
   1720    macaddr = d->conf.macaddr.a;
   1721
   1722    e1000x_core_prepare_eeprom(d->eeprom_data,
   1723                               e1000_eeprom_template,
   1724                               sizeof(e1000_eeprom_template),
   1725                               PCI_DEVICE_GET_CLASS(pci_dev)->device_id,
   1726                               macaddr);
   1727
   1728    d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
   1729                          object_get_typename(OBJECT(d)), dev->id, d);
   1730
   1731    qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
   1732
   1733    d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
   1734    d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
   1735    d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
   1736                                        e1000_flush_queue_timer, d);
   1737}
   1738
   1739static void qdev_e1000_reset(DeviceState *dev)
   1740{
   1741    E1000State *d = E1000(dev);
   1742    e1000_reset(d);
   1743}
   1744
   1745static Property e1000_properties[] = {
   1746    DEFINE_NIC_PROPERTIES(E1000State, conf),
   1747    DEFINE_PROP_BIT("autonegotiation", E1000State,
   1748                    compat_flags, E1000_FLAG_AUTONEG_BIT, true),
   1749    DEFINE_PROP_BIT("mitigation", E1000State,
   1750                    compat_flags, E1000_FLAG_MIT_BIT, true),
   1751    DEFINE_PROP_BIT("extra_mac_registers", E1000State,
   1752                    compat_flags, E1000_FLAG_MAC_BIT, true),
   1753    DEFINE_PROP_BIT("migrate_tso_props", E1000State,
   1754                    compat_flags, E1000_FLAG_TSO_BIT, true),
   1755    DEFINE_PROP_BIT("init-vet", E1000State,
   1756                    compat_flags, E1000_FLAG_VET_BIT, true),
   1757    DEFINE_PROP_END_OF_LIST(),
   1758};
   1759
   1760typedef struct E1000Info {
   1761    const char *name;
   1762    uint16_t   device_id;
   1763    uint8_t    revision;
   1764    uint16_t   phy_id2;
   1765} E1000Info;
   1766
   1767static void e1000_class_init(ObjectClass *klass, void *data)
   1768{
   1769    DeviceClass *dc = DEVICE_CLASS(klass);
   1770    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
   1771    E1000BaseClass *e = E1000_CLASS(klass);
   1772    const E1000Info *info = data;
   1773
   1774    k->realize = pci_e1000_realize;
   1775    k->exit = pci_e1000_uninit;
   1776    k->romfile = "efi-e1000.rom";
   1777    k->vendor_id = PCI_VENDOR_ID_INTEL;
   1778    k->device_id = info->device_id;
   1779    k->revision = info->revision;
   1780    e->phy_id2 = info->phy_id2;
   1781    k->class_id = PCI_CLASS_NETWORK_ETHERNET;
   1782    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
   1783    dc->desc = "Intel Gigabit Ethernet";
   1784    dc->reset = qdev_e1000_reset;
   1785    dc->vmsd = &vmstate_e1000;
   1786    device_class_set_props(dc, e1000_properties);
   1787}
   1788
   1789static void e1000_instance_init(Object *obj)
   1790{
   1791    E1000State *n = E1000(obj);
   1792    device_add_bootindex_property(obj, &n->conf.bootindex,
   1793                                  "bootindex", "/ethernet-phy@0",
   1794                                  DEVICE(n));
   1795}
   1796
   1797static const TypeInfo e1000_base_info = {
   1798    .name          = TYPE_E1000_BASE,
   1799    .parent        = TYPE_PCI_DEVICE,
   1800    .instance_size = sizeof(E1000State),
   1801    .instance_init = e1000_instance_init,
   1802    .class_size    = sizeof(E1000BaseClass),
   1803    .abstract      = true,
   1804    .interfaces = (InterfaceInfo[]) {
   1805        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
   1806        { },
   1807    },
   1808};
   1809
   1810static const E1000Info e1000_devices[] = {
   1811    {
   1812        .name      = "e1000",
   1813        .device_id = E1000_DEV_ID_82540EM,
   1814        .revision  = 0x03,
   1815        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
   1816    },
   1817    {
   1818        .name      = "e1000-82544gc",
   1819        .device_id = E1000_DEV_ID_82544GC_COPPER,
   1820        .revision  = 0x03,
   1821        .phy_id2   = E1000_PHY_ID2_82544x,
   1822    },
   1823    {
   1824        .name      = "e1000-82545em",
   1825        .device_id = E1000_DEV_ID_82545EM_COPPER,
   1826        .revision  = 0x03,
   1827        .phy_id2   = E1000_PHY_ID2_8254xx_DEFAULT,
   1828    },
   1829};
   1830
   1831static void e1000_register_types(void)
   1832{
   1833    int i;
   1834
   1835    type_register_static(&e1000_base_info);
   1836    for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
   1837        const E1000Info *info = &e1000_devices[i];
   1838        TypeInfo type_info = {};
   1839
   1840        type_info.name = info->name;
   1841        type_info.parent = TYPE_E1000_BASE;
   1842        type_info.class_data = (void *)info;
   1843        type_info.class_init = e1000_class_init;
   1844
   1845        type_register(&type_info);
   1846    }
   1847}
   1848
   1849type_init(e1000_register_types)