cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

filter-rewriter.c (13927B)


      1/*
      2 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
      3 * Copyright (c) 2016 FUJITSU LIMITED
      4 * Copyright (c) 2016 Intel Corporation
      5 *
      6 * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
      7 *
      8 * This work is licensed under the terms of the GNU GPL, version 2 or
      9 * later.  See the COPYING file in the top-level directory.
     10 */
     11
     12#include "qemu/osdep.h"
     13#include "trace.h"
     14#include "colo.h"
     15#include "net/filter.h"
     16#include "net/net.h"
     17#include "qemu/error-report.h"
     18#include "qom/object.h"
     19#include "qemu/main-loop.h"
     20#include "qemu/iov.h"
     21#include "net/checksum.h"
     22#include "net/colo.h"
     23#include "migration/colo.h"
     24#include "util.h"
     25
     26#define TYPE_FILTER_REWRITER "filter-rewriter"
     27OBJECT_DECLARE_SIMPLE_TYPE(RewriterState, FILTER_REWRITER)
     28
     29#define FAILOVER_MODE_ON  true
     30#define FAILOVER_MODE_OFF false
     31
     32struct RewriterState {
     33    NetFilterState parent_obj;
     34    NetQueue *incoming_queue;
     35    /* hashtable to save connection */
     36    GHashTable *connection_track_table;
     37    bool vnet_hdr;
     38    bool failover_mode;
     39};
     40
     41static void filter_rewriter_failover_mode(RewriterState *s)
     42{
     43    s->failover_mode = FAILOVER_MODE_ON;
     44}
     45
     46static void filter_rewriter_flush(NetFilterState *nf)
     47{
     48    RewriterState *s = FILTER_REWRITER(nf);
     49
     50    if (!qemu_net_queue_flush(s->incoming_queue)) {
     51        /* Unable to empty the queue, purge remaining packets */
     52        qemu_net_queue_purge(s->incoming_queue, nf->netdev);
     53    }
     54}
     55
     56/*
     57 * Return 1 on success, if return 0 means the pkt
     58 * is not TCP packet
     59 */
     60static int is_tcp_packet(Packet *pkt)
     61{
     62    if (!parse_packet_early(pkt) &&
     63        pkt->ip->ip_p == IPPROTO_TCP) {
     64        return 1;
     65    } else {
     66        return 0;
     67    }
     68}
     69
     70/* handle tcp packet from primary guest */
     71static int handle_primary_tcp_pkt(RewriterState *rf,
     72                                  Connection *conn,
     73                                  Packet *pkt, ConnectionKey *key)
     74{
     75    struct tcp_hdr *tcp_pkt;
     76
     77    tcp_pkt = (struct tcp_hdr *)pkt->transport_header;
     78    if (trace_event_get_state_backends(TRACE_COLO_FILTER_REWRITER_PKT_INFO)) {
     79        trace_colo_filter_rewriter_pkt_info(__func__,
     80                    inet_ntoa(pkt->ip->ip_src), inet_ntoa(pkt->ip->ip_dst),
     81                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
     82                    tcp_pkt->th_flags);
     83    }
     84    if (trace_event_get_state_backends(
     85          TRACE_COLO_FILTER_REWRITER_CONN_OFFSET)) {
     86        trace_colo_filter_rewriter_conn_offset(conn->offset);
     87    }
     88
     89    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN)) &&
     90        conn->tcp_state == TCPS_SYN_SENT) {
     91        conn->tcp_state = TCPS_ESTABLISHED;
     92    }
     93
     94    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
     95        /*
     96         * we use this flag update offset func
     97         * run once in independent tcp connection
     98         */
     99        conn->tcp_state = TCPS_SYN_RECEIVED;
    100    }
    101
    102    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
    103        if (conn->tcp_state == TCPS_SYN_RECEIVED) {
    104            /*
    105             * offset = secondary_seq - primary seq
    106             * ack packet sent by guest from primary node,
    107             * so we use th_ack - 1 get primary_seq
    108             */
    109            conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
    110            conn->tcp_state = TCPS_ESTABLISHED;
    111        }
    112        if (conn->offset) {
    113            /* handle packets to the secondary from the primary */
    114            tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset);
    115
    116            net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len,
    117                                   pkt->size - pkt->vnet_hdr_len, CSUM_TCP);
    118        }
    119
    120        /*
    121         * Passive close step 3
    122         */
    123        if ((conn->tcp_state == TCPS_LAST_ACK) &&
    124            (ntohl(tcp_pkt->th_ack) == (conn->fin_ack_seq + 1))) {
    125            conn->tcp_state = TCPS_CLOSED;
    126            g_hash_table_remove(rf->connection_track_table, key);
    127        }
    128    }
    129
    130    if ((tcp_pkt->th_flags & TH_FIN) == TH_FIN) {
    131        /*
    132         * Passive close.
    133         * Step 1:
    134         * The *server* side of this connect is VM, *client* tries to close
    135         * the connection. We will into CLOSE_WAIT status.
    136         *
    137         * Step 2:
    138         * In this step we will into LAST_ACK status.
    139         *
    140         * We got 'fin=1, ack=1' packet from server side, we need to
    141         * record the seq of 'fin=1, ack=1' packet.
    142         *
    143         * Step 3:
    144         * We got 'ack=1' packets from client side, it acks 'fin=1, ack=1'
    145         * packet from server side. From this point, we can ensure that there
    146         * will be no packets in the connection, except that, some errors
    147         * happen between the path of 'filter object' and vNIC, if this rare
    148         * case really happen, we can still create a new connection,
    149         * So it is safe to remove the connection from connection_track_table.
    150         *
    151         */
    152        if (conn->tcp_state == TCPS_ESTABLISHED) {
    153            conn->tcp_state = TCPS_CLOSE_WAIT;
    154        }
    155
    156        /*
    157         * Active close step 2.
    158         */
    159        if (conn->tcp_state == TCPS_FIN_WAIT_1) {
    160            /*
    161             * For simplify implementation, we needn't wait 2MSL time
    162             * in filter rewriter. Because guest kernel will track the
    163             * TCP status and wait 2MSL time, if client resend the FIN
    164             * packet, guest will apply the last ACK too.
    165             * So, we skip the TCPS_TIME_WAIT state here and go straight
    166             * to TCPS_CLOSED state.
    167             */
    168            conn->tcp_state = TCPS_CLOSED;
    169            g_hash_table_remove(rf->connection_track_table, key);
    170        }
    171    }
    172
    173    return 0;
    174}
    175
    176/* handle tcp packet from secondary guest */
    177static int handle_secondary_tcp_pkt(RewriterState *rf,
    178                                    Connection *conn,
    179                                    Packet *pkt, ConnectionKey *key)
    180{
    181    struct tcp_hdr *tcp_pkt;
    182
    183    tcp_pkt = (struct tcp_hdr *)pkt->transport_header;
    184
    185    if (trace_event_get_state_backends(TRACE_COLO_FILTER_REWRITER_PKT_INFO)) {
    186        trace_colo_filter_rewriter_pkt_info(__func__,
    187                    inet_ntoa(pkt->ip->ip_src), inet_ntoa(pkt->ip->ip_dst),
    188                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
    189                    tcp_pkt->th_flags);
    190    }
    191    if (trace_event_get_state_backends(
    192          TRACE_COLO_FILTER_REWRITER_CONN_OFFSET)) {
    193        trace_colo_filter_rewriter_conn_offset(conn->offset);
    194    }
    195
    196    if (conn->tcp_state == TCPS_SYN_RECEIVED &&
    197        ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
    198        /*
    199         * save offset = secondary_seq and then
    200         * in handle_primary_tcp_pkt make offset
    201         * = secondary_seq - primary_seq
    202         */
    203        conn->offset = ntohl(tcp_pkt->th_seq);
    204    }
    205
    206    /* VM active connect */
    207    if (conn->tcp_state == TCPS_CLOSED &&
    208        ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
    209        conn->tcp_state = TCPS_SYN_SENT;
    210    }
    211
    212    if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
    213        /* Only need to adjust seq while offset is Non-zero */
    214        if (conn->offset) {
    215            /* handle packets to the primary from the secondary*/
    216            tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset);
    217
    218            net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len,
    219                                   pkt->size - pkt->vnet_hdr_len, CSUM_TCP);
    220        }
    221    }
    222
    223    /*
    224     * Passive close step 2:
    225     */
    226    if (conn->tcp_state == TCPS_CLOSE_WAIT &&
    227        (tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == (TH_ACK | TH_FIN)) {
    228        conn->fin_ack_seq = ntohl(tcp_pkt->th_seq);
    229        conn->tcp_state = TCPS_LAST_ACK;
    230    }
    231
    232    /*
    233     * Active close
    234     *
    235     * Step 1:
    236     * The *server* side of this connect is VM, *server* tries to close
    237     * the connection.
    238     *
    239     * Step 2:
    240     * We will into CLOSE_WAIT status.
    241     * We simplify the TCPS_FIN_WAIT_2, TCPS_TIME_WAIT and
    242     * CLOSING status.
    243     */
    244    if (conn->tcp_state == TCPS_ESTABLISHED &&
    245        (tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == TH_FIN) {
    246        conn->tcp_state = TCPS_FIN_WAIT_1;
    247    }
    248
    249    return 0;
    250}
    251
    252static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
    253                                         NetClientState *sender,
    254                                         unsigned flags,
    255                                         const struct iovec *iov,
    256                                         int iovcnt,
    257                                         NetPacketSent *sent_cb)
    258{
    259    RewriterState *s = FILTER_REWRITER(nf);
    260    Connection *conn;
    261    ConnectionKey key;
    262    Packet *pkt;
    263    ssize_t size = iov_size(iov, iovcnt);
    264    ssize_t vnet_hdr_len = 0;
    265    char *buf = g_malloc0(size);
    266
    267    iov_to_buf(iov, iovcnt, 0, buf, size);
    268
    269    if (s->vnet_hdr) {
    270        vnet_hdr_len = nf->netdev->vnet_hdr_len;
    271    }
    272
    273    pkt = packet_new_nocopy(buf, size, vnet_hdr_len);
    274
    275    /*
    276     * if we get tcp packet
    277     * we will rewrite it to make secondary guest's
    278     * connection established successfully
    279     */
    280    if (pkt && is_tcp_packet(pkt)) {
    281
    282        fill_connection_key(pkt, &key);
    283
    284        if (sender == nf->netdev) {
    285            /*
    286             * We need make tcp TX and RX packet
    287             * into one connection.
    288             */
    289            reverse_connection_key(&key);
    290        }
    291
    292        /* After failover we needn't change new TCP packet */
    293        if (s->failover_mode &&
    294            !connection_has_tracked(s->connection_track_table, &key)) {
    295            goto out;
    296        }
    297
    298        conn = connection_get(s->connection_track_table,
    299                              &key,
    300                              NULL);
    301
    302        if (sender == nf->netdev) {
    303            /* NET_FILTER_DIRECTION_TX */
    304            if (!handle_primary_tcp_pkt(s, conn, pkt, &key)) {
    305                qemu_net_queue_send(s->incoming_queue, sender, 0,
    306                (const uint8_t *)pkt->data, pkt->size, NULL);
    307                packet_destroy(pkt, NULL);
    308                pkt = NULL;
    309                /*
    310                 * We block the packet here,after rewrite pkt
    311                 * and will send it
    312                 */
    313                return 1;
    314            }
    315        } else {
    316            /* NET_FILTER_DIRECTION_RX */
    317            if (!handle_secondary_tcp_pkt(s, conn, pkt, &key)) {
    318                qemu_net_queue_send(s->incoming_queue, sender, 0,
    319                (const uint8_t *)pkt->data, pkt->size, NULL);
    320                packet_destroy(pkt, NULL);
    321                pkt = NULL;
    322                /*
    323                 * We block the packet here,after rewrite pkt
    324                 * and will send it
    325                 */
    326                return 1;
    327            }
    328        }
    329    }
    330
    331out:
    332    packet_destroy(pkt, NULL);
    333    pkt = NULL;
    334    return 0;
    335}
    336
    337static void reset_seq_offset(gpointer key, gpointer value, gpointer user_data)
    338{
    339    Connection *conn = (Connection *)value;
    340
    341    conn->offset = 0;
    342}
    343
    344static gboolean offset_is_nonzero(gpointer key,
    345                                  gpointer value,
    346                                  gpointer user_data)
    347{
    348    Connection *conn = (Connection *)value;
    349
    350    return conn->offset ? true : false;
    351}
    352
    353static void colo_rewriter_handle_event(NetFilterState *nf, int event,
    354                                       Error **errp)
    355{
    356    RewriterState *rs = FILTER_REWRITER(nf);
    357
    358    switch (event) {
    359    case COLO_EVENT_CHECKPOINT:
    360        g_hash_table_foreach(rs->connection_track_table,
    361                            reset_seq_offset, NULL);
    362        break;
    363    case COLO_EVENT_FAILOVER:
    364        if (!g_hash_table_find(rs->connection_track_table,
    365                              offset_is_nonzero, NULL)) {
    366            filter_rewriter_failover_mode(rs);
    367        }
    368        break;
    369    default:
    370        break;
    371    }
    372}
    373
    374static void colo_rewriter_cleanup(NetFilterState *nf)
    375{
    376    RewriterState *s = FILTER_REWRITER(nf);
    377
    378    /* flush packets */
    379    if (s->incoming_queue) {
    380        filter_rewriter_flush(nf);
    381        g_free(s->incoming_queue);
    382    }
    383
    384    g_hash_table_destroy(s->connection_track_table);
    385}
    386
    387static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
    388{
    389    RewriterState *s = FILTER_REWRITER(nf);
    390
    391    s->connection_track_table = g_hash_table_new_full(connection_key_hash,
    392                                                      connection_key_equal,
    393                                                      g_free,
    394                                                      connection_destroy);
    395    s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
    396}
    397
    398static bool filter_rewriter_get_vnet_hdr(Object *obj, Error **errp)
    399{
    400    RewriterState *s = FILTER_REWRITER(obj);
    401
    402    return s->vnet_hdr;
    403}
    404
    405static void filter_rewriter_set_vnet_hdr(Object *obj,
    406                                         bool value,
    407                                         Error **errp)
    408{
    409    RewriterState *s = FILTER_REWRITER(obj);
    410
    411    s->vnet_hdr = value;
    412}
    413
    414static void filter_rewriter_init(Object *obj)
    415{
    416    RewriterState *s = FILTER_REWRITER(obj);
    417
    418    s->vnet_hdr = false;
    419    s->failover_mode = FAILOVER_MODE_OFF;
    420}
    421
    422static void colo_rewriter_class_init(ObjectClass *oc, void *data)
    423{
    424    NetFilterClass *nfc = NETFILTER_CLASS(oc);
    425
    426    object_class_property_add_bool(oc, "vnet_hdr_support",
    427                                   filter_rewriter_get_vnet_hdr,
    428                                   filter_rewriter_set_vnet_hdr);
    429
    430    nfc->setup = colo_rewriter_setup;
    431    nfc->cleanup = colo_rewriter_cleanup;
    432    nfc->receive_iov = colo_rewriter_receive_iov;
    433    nfc->handle_event = colo_rewriter_handle_event;
    434}
    435
    436static const TypeInfo colo_rewriter_info = {
    437    .name = TYPE_FILTER_REWRITER,
    438    .parent = TYPE_NETFILTER,
    439    .class_init = colo_rewriter_class_init,
    440    .instance_init = filter_rewriter_init,
    441    .instance_size = sizeof(RewriterState),
    442};
    443
    444static void register_types(void)
    445{
    446    type_register_static(&colo_rewriter_info);
    447}
    448
    449type_init(register_types);