cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

vhdx-log.c (30987B)


      1/*
      2 * Block driver for Hyper-V VHDX Images
      3 *
      4 * Copyright (c) 2013 Red Hat, Inc.,
      5 *
      6 * Authors:
      7 *  Jeff Cody <jcody@redhat.com>
      8 *
      9 *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
     10 *  by Microsoft:
     11 *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
     12 *
     13 * This file covers the functionality of the metadata log writing, parsing, and
     14 * replay.
     15 *
     16 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
     17 * See the COPYING.LIB file in the top-level directory.
     18 *
     19 */
     20
     21#include "qemu/osdep.h"
     22#include "qapi/error.h"
     23#include "block/block_int.h"
     24#include "qemu/error-report.h"
     25#include "qemu/bswap.h"
     26#include "vhdx.h"
     27
     28
     29typedef struct VHDXLogSequence {
     30    bool valid;
     31    uint32_t count;
     32    VHDXLogEntries log;
     33    VHDXLogEntryHeader hdr;
     34} VHDXLogSequence;
     35
     36typedef struct VHDXLogDescEntries {
     37    VHDXLogEntryHeader hdr;
     38    VHDXLogDescriptor desc[];
     39} VHDXLogDescEntries;
     40
     41static const MSGUID zero_guid = { 0 };
     42
     43/* The log located on the disk is circular buffer containing
     44 * sectors of 4096 bytes each.
     45 *
     46 * It is assumed for the read/write functions below that the
     47 * circular buffer scheme uses a 'one sector open' to indicate
     48 * the buffer is full.  Given the validation methods used for each
     49 * sector, this method should be compatible with other methods that
     50 * do not waste a sector.
     51 */
     52
     53
     54/* Allow peeking at the hdr entry at the beginning of the current
     55 * read index, without advancing the read index */
     56static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
     57                             VHDXLogEntryHeader *hdr)
     58{
     59    int ret = 0;
     60    uint64_t offset;
     61    uint32_t read;
     62
     63    assert(hdr != NULL);
     64
     65    /* peek is only supported on sector boundaries */
     66    if (log->read % VHDX_LOG_SECTOR_SIZE) {
     67        ret = -EFAULT;
     68        goto exit;
     69    }
     70
     71    read = log->read;
     72    /* we are guaranteed that a) log sectors are 4096 bytes,
     73     * and b) the log length is a multiple of 1MB. So, there
     74     * is always a round number of sectors in the buffer */
     75    if ((read + sizeof(VHDXLogEntryHeader)) > log->length) {
     76        read = 0;
     77    }
     78
     79    if (read == log->write) {
     80        ret = -EINVAL;
     81        goto exit;
     82    }
     83
     84    offset = log->offset + read;
     85
     86    ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader));
     87    if (ret < 0) {
     88        goto exit;
     89    }
     90    vhdx_log_entry_hdr_le_import(hdr);
     91
     92exit:
     93    return ret;
     94}
     95
     96/* Index increment for log, based on sector boundaries */
     97static int vhdx_log_inc_idx(uint32_t idx, uint64_t length)
     98{
     99    idx += VHDX_LOG_SECTOR_SIZE;
    100    /* we are guaranteed that a) log sectors are 4096 bytes,
    101     * and b) the log length is a multiple of 1MB. So, there
    102     * is always a round number of sectors in the buffer */
    103    return idx >= length ? 0 : idx;
    104}
    105
    106
    107/* Reset the log to empty */
    108static void vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s)
    109{
    110    MSGUID guid = { 0 };
    111    s->log.read = s->log.write = 0;
    112    /* a log guid of 0 indicates an empty log to any parser of v0
    113     * VHDX logs */
    114    vhdx_update_headers(bs, s, false, &guid);
    115}
    116
    117/* Reads num_sectors from the log (all log sectors are 4096 bytes),
    118 * into buffer 'buffer'.  Upon return, *sectors_read will contain
    119 * the number of sectors successfully read.
    120 *
    121 * It is assumed that 'buffer' is already allocated, and of sufficient
    122 * size (i.e. >= 4096*num_sectors).
    123 *
    124 * If 'peek' is true, then the tail (read) pointer for the circular buffer is
    125 * not modified.
    126 *
    127 * 0 is returned on success, -errno otherwise.  */
    128static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
    129                                 uint32_t *sectors_read, void *buffer,
    130                                 uint32_t num_sectors, bool peek)
    131{
    132    int ret = 0;
    133    uint64_t offset;
    134    uint32_t read;
    135
    136    read = log->read;
    137
    138    *sectors_read = 0;
    139    while (num_sectors) {
    140        if (read == log->write) {
    141            /* empty */
    142            break;
    143        }
    144        offset = log->offset + read;
    145
    146        ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE);
    147        if (ret < 0) {
    148            goto exit;
    149        }
    150        read = vhdx_log_inc_idx(read, log->length);
    151
    152        *sectors_read = *sectors_read + 1;
    153        num_sectors--;
    154    }
    155
    156exit:
    157    if (!peek) {
    158        log->read = read;
    159    }
    160    return ret;
    161}
    162
    163/* Writes num_sectors to the log (all log sectors are 4096 bytes),
    164 * from buffer 'buffer'.  Upon return, *sectors_written will contain
    165 * the number of sectors successfully written.
    166 *
    167 * It is assumed that 'buffer' is at least 4096*num_sectors large.
    168 *
    169 * 0 is returned on success, -errno otherwise */
    170static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
    171                                  uint32_t *sectors_written, void *buffer,
    172                                  uint32_t num_sectors)
    173{
    174    int ret = 0;
    175    uint64_t offset;
    176    uint32_t write;
    177    void *buffer_tmp;
    178    BDRVVHDXState *s = bs->opaque;
    179
    180    ret = vhdx_user_visible_write(bs, s);
    181    if (ret < 0) {
    182        goto exit;
    183    }
    184
    185    write = log->write;
    186
    187    buffer_tmp = buffer;
    188    while (num_sectors) {
    189
    190        offset = log->offset + write;
    191        write = vhdx_log_inc_idx(write, log->length);
    192        if (write == log->read) {
    193            /* full */
    194            break;
    195        }
    196        ret = bdrv_pwrite(bs->file, offset, buffer_tmp,
    197                          VHDX_LOG_SECTOR_SIZE);
    198        if (ret < 0) {
    199            goto exit;
    200        }
    201        buffer_tmp += VHDX_LOG_SECTOR_SIZE;
    202
    203        log->write = write;
    204        *sectors_written = *sectors_written + 1;
    205        num_sectors--;
    206    }
    207
    208exit:
    209    return ret;
    210}
    211
    212
    213/* Validates a log entry header */
    214static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
    215                                  BDRVVHDXState *s)
    216{
    217    int valid = false;
    218
    219    if (hdr->signature != VHDX_LOG_SIGNATURE) {
    220        goto exit;
    221    }
    222
    223    /* if the individual entry length is larger than the whole log
    224     * buffer, that is obviously invalid */
    225    if (log->length < hdr->entry_length) {
    226        goto exit;
    227    }
    228
    229    /* length of entire entry must be in units of 4KB (log sector size) */
    230    if (hdr->entry_length % (VHDX_LOG_SECTOR_SIZE)) {
    231        goto exit;
    232    }
    233
    234    /* per spec, sequence # must be > 0 */
    235    if (hdr->sequence_number == 0) {
    236        goto exit;
    237    }
    238
    239    /* log entries are only valid if they match the file-wide log guid
    240     * found in the active header */
    241    if (!guid_eq(hdr->log_guid, s->headers[s->curr_header]->log_guid)) {
    242        goto exit;
    243    }
    244
    245    if (hdr->descriptor_count * sizeof(VHDXLogDescriptor) > hdr->entry_length) {
    246        goto exit;
    247    }
    248
    249    valid = true;
    250
    251exit:
    252    return valid;
    253}
    254
    255/*
    256 * Given a log header, this will validate that the descriptors and the
    257 * corresponding data sectors (if applicable)
    258 *
    259 * Validation consists of:
    260 *      1. Making sure the sequence numbers matches the entry header
    261 *      2. Verifying a valid signature ('zero' or 'desc' for descriptors)
    262 *      3. File offset field is a multiple of 4KB
    263 *      4. If a data descriptor, the corresponding data sector
    264 *         has its signature ('data') and matching sequence number
    265 *
    266 * @desc: the data buffer containing the descriptor
    267 * @hdr:  the log entry header
    268 *
    269 * Returns true if valid
    270 */
    271static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc,
    272                                   VHDXLogEntryHeader *hdr)
    273{
    274    bool ret = false;
    275
    276    if (desc->sequence_number != hdr->sequence_number) {
    277        goto exit;
    278    }
    279    if (desc->file_offset % VHDX_LOG_SECTOR_SIZE) {
    280        goto exit;
    281    }
    282
    283    if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
    284        if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) {
    285            /* valid */
    286            ret = true;
    287        }
    288    } else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
    289            /* valid */
    290            ret = true;
    291    }
    292
    293exit:
    294    return ret;
    295}
    296
    297
    298/* Prior to sector data for a log entry, there is the header
    299 * and the descriptors referenced in the header:
    300 *
    301 * [] = 4KB sector
    302 *
    303 * [ hdr, desc ][   desc   ][ ... ][ data ][ ... ]
    304 *
    305 * The first sector in a log entry has a 64 byte header, and
    306 * up to 126 32-byte descriptors.  If more descriptors than
    307 * 126 are required, then subsequent sectors can have up to 128
    308 * descriptors.  Each sector is 4KB.  Data follows the descriptor
    309 * sectors.
    310 *
    311 * This will return the number of sectors needed to encompass
    312 * the passed number of descriptors in desc_cnt.
    313 *
    314 * This will never return 0, even if desc_cnt is 0.
    315 */
    316static int vhdx_compute_desc_sectors(uint32_t desc_cnt)
    317{
    318    uint32_t desc_sectors;
    319
    320    desc_cnt += 2; /* account for header in first sector */
    321    desc_sectors = desc_cnt / 128;
    322    if (desc_cnt % 128) {
    323        desc_sectors++;
    324    }
    325
    326    return desc_sectors;
    327}
    328
    329
    330/* Reads the log header, and subsequent descriptors (if any).  This
    331 * will allocate all the space for buffer, which must be NULL when
    332 * passed into this function. Each descriptor will also be validated,
    333 * and error returned if any are invalid. */
    334static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
    335                              VHDXLogEntries *log, VHDXLogDescEntries **buffer,
    336                              bool convert_endian)
    337{
    338    int ret = 0;
    339    uint32_t desc_sectors;
    340    uint32_t sectors_read;
    341    VHDXLogEntryHeader hdr;
    342    VHDXLogDescEntries *desc_entries = NULL;
    343    VHDXLogDescriptor desc;
    344    int i;
    345
    346    assert(*buffer == NULL);
    347
    348    ret = vhdx_log_peek_hdr(bs, log, &hdr);
    349    if (ret < 0) {
    350        goto exit;
    351    }
    352
    353    if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
    354        ret = -EINVAL;
    355        goto exit;
    356    }
    357
    358    desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
    359    desc_entries = qemu_try_blockalign(bs->file->bs,
    360                                       desc_sectors * VHDX_LOG_SECTOR_SIZE);
    361    if (desc_entries == NULL) {
    362        ret = -ENOMEM;
    363        goto exit;
    364    }
    365
    366    ret = vhdx_log_read_sectors(bs, log, &sectors_read, desc_entries,
    367                                desc_sectors, false);
    368    if (ret < 0) {
    369        goto free_and_exit;
    370    }
    371    if (sectors_read != desc_sectors) {
    372        ret = -EINVAL;
    373        goto free_and_exit;
    374    }
    375
    376    /* put in proper endianness, and validate each desc */
    377    for (i = 0; i < hdr.descriptor_count; i++) {
    378        desc = desc_entries->desc[i];
    379        vhdx_log_desc_le_import(&desc);
    380        if (convert_endian) {
    381            desc_entries->desc[i] = desc;
    382        }
    383        if (vhdx_log_desc_is_valid(&desc, &hdr) == false) {
    384            ret = -EINVAL;
    385            goto free_and_exit;
    386        }
    387    }
    388    if (convert_endian) {
    389        desc_entries->hdr = hdr;
    390    }
    391
    392    *buffer = desc_entries;
    393    goto exit;
    394
    395free_and_exit:
    396    qemu_vfree(desc_entries);
    397exit:
    398    return ret;
    399}
    400
    401
    402/* Flushes the descriptor described by desc to the VHDX image file.
    403 * If the descriptor is a data descriptor, than 'data' must be non-NULL,
    404 * and >= 4096 bytes (VHDX_LOG_SECTOR_SIZE), containing the data to be
    405 * written.
    406 *
    407 * Verification is performed to make sure the sequence numbers of a data
    408 * descriptor match the sequence number in the desc.
    409 *
    410 * For a zero descriptor, it may describe multiple sectors to fill with zeroes.
    411 * In this case, it should be noted that zeroes are written to disk, and the
    412 * image file is not extended as a sparse file.  */
    413static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
    414                               VHDXLogDataSector *data)
    415{
    416    int ret = 0;
    417    uint64_t seq, file_offset;
    418    uint32_t offset = 0;
    419    void *buffer = NULL;
    420    uint64_t count = 1;
    421    int i;
    422
    423    buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
    424
    425    if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
    426        /* data sector */
    427        if (data == NULL) {
    428            ret = -EFAULT;
    429            goto exit;
    430        }
    431
    432        /* The sequence number of the data sector must match that
    433         * in the descriptor */
    434        seq = data->sequence_high;
    435        seq <<= 32;
    436        seq |= data->sequence_low & 0xffffffff;
    437
    438        if (seq != desc->sequence_number) {
    439            ret = -EINVAL;
    440            goto exit;
    441        }
    442
    443        /* Each data sector is in total 4096 bytes, however the first
    444         * 8 bytes, and last 4 bytes, are located in the descriptor */
    445        memcpy(buffer, &desc->leading_bytes, 8);
    446        offset += 8;
    447
    448        memcpy(buffer+offset, data->data, 4084);
    449        offset += 4084;
    450
    451        memcpy(buffer+offset, &desc->trailing_bytes, 4);
    452
    453    } else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
    454        /* write 'count' sectors of sector */
    455        memset(buffer, 0, VHDX_LOG_SECTOR_SIZE);
    456        count = desc->zero_length / VHDX_LOG_SECTOR_SIZE;
    457    } else {
    458        error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32,
    459                      desc->signature);
    460        ret = -EINVAL;
    461        goto exit;
    462    }
    463
    464    file_offset = desc->file_offset;
    465
    466    /* count is only > 1 if we are writing zeroes */
    467    for (i = 0; i < count; i++) {
    468        ret = bdrv_pwrite_sync(bs->file, file_offset, buffer,
    469                               VHDX_LOG_SECTOR_SIZE);
    470        if (ret < 0) {
    471            goto exit;
    472        }
    473        file_offset += VHDX_LOG_SECTOR_SIZE;
    474    }
    475
    476exit:
    477    qemu_vfree(buffer);
    478    return ret;
    479}
    480
    481/* Flush the entire log (as described by 'logs') to the VHDX image
    482 * file, and then set the log to 'empty' status once complete.
    483 *
    484 * The log entries should be validate prior to flushing */
    485static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
    486                          VHDXLogSequence *logs)
    487{
    488    int ret = 0;
    489    int i;
    490    uint32_t cnt, sectors_read;
    491    uint64_t new_file_size;
    492    void *data = NULL;
    493    int64_t file_length;
    494    VHDXLogDescEntries *desc_entries = NULL;
    495    VHDXLogEntryHeader hdr_tmp = { 0 };
    496
    497    cnt = logs->count;
    498
    499    data = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
    500
    501    ret = vhdx_user_visible_write(bs, s);
    502    if (ret < 0) {
    503        goto exit;
    504    }
    505
    506    /* each iteration represents one log sequence, which may span multiple
    507     * sectors */
    508    while (cnt--) {
    509        ret = vhdx_log_peek_hdr(bs, &logs->log, &hdr_tmp);
    510        if (ret < 0) {
    511            goto exit;
    512        }
    513        file_length = bdrv_getlength(bs->file->bs);
    514        if (file_length < 0) {
    515            ret = file_length;
    516            goto exit;
    517        }
    518        /* if the log shows a FlushedFileOffset larger than our current file
    519         * size, then that means the file has been truncated / corrupted, and
    520         * we must refused to open it / use it */
    521        if (hdr_tmp.flushed_file_offset > file_length) {
    522            ret = -EINVAL;
    523            goto exit;
    524        }
    525
    526        ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true);
    527        if (ret < 0) {
    528            goto exit;
    529        }
    530
    531        for (i = 0; i < desc_entries->hdr.descriptor_count; i++) {
    532            if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) {
    533                /* data sector, so read a sector to flush */
    534                ret = vhdx_log_read_sectors(bs, &logs->log, &sectors_read,
    535                                            data, 1, false);
    536                if (ret < 0) {
    537                    goto exit;
    538                }
    539                if (sectors_read != 1) {
    540                    ret = -EINVAL;
    541                    goto exit;
    542                }
    543                vhdx_log_data_le_import(data);
    544            }
    545
    546            ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data);
    547            if (ret < 0) {
    548                goto exit;
    549            }
    550        }
    551        if (file_length < desc_entries->hdr.last_file_offset) {
    552            new_file_size = desc_entries->hdr.last_file_offset;
    553            if (new_file_size % (1 * MiB)) {
    554                /* round up to nearest 1MB boundary */
    555                new_file_size = QEMU_ALIGN_UP(new_file_size, MiB);
    556                if (new_file_size > INT64_MAX) {
    557                    ret = -EINVAL;
    558                    goto exit;
    559                }
    560                ret = bdrv_truncate(bs->file, new_file_size, false,
    561                                    PREALLOC_MODE_OFF, 0, NULL);
    562                if (ret < 0) {
    563                    goto exit;
    564                }
    565            }
    566        }
    567        qemu_vfree(desc_entries);
    568        desc_entries = NULL;
    569    }
    570
    571    ret = bdrv_flush(bs);
    572    if (ret < 0) {
    573        goto exit;
    574    }
    575    /* once the log is fully flushed, indicate that we have an empty log
    576     * now.  This also sets the log guid to 0, to indicate an empty log */
    577    vhdx_log_reset(bs, s);
    578
    579exit:
    580    qemu_vfree(data);
    581    qemu_vfree(desc_entries);
    582    return ret;
    583}
    584
    585static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
    586                                   VHDXLogEntries *log, uint64_t seq,
    587                                   bool *valid, VHDXLogEntryHeader *entry)
    588{
    589    int ret = 0;
    590    VHDXLogEntryHeader hdr;
    591    void *buffer = NULL;
    592    uint32_t i, desc_sectors, total_sectors, crc;
    593    uint32_t sectors_read = 0;
    594    VHDXLogDescEntries *desc_buffer = NULL;
    595
    596    *valid = false;
    597
    598    ret = vhdx_log_peek_hdr(bs, log, &hdr);
    599    if (ret < 0) {
    600        goto inc_and_exit;
    601    }
    602
    603    if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
    604        goto inc_and_exit;
    605    }
    606
    607    if (seq > 0) {
    608        if (hdr.sequence_number != seq + 1) {
    609            goto inc_and_exit;
    610        }
    611    }
    612
    613    desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
    614
    615    /* Read all log sectors, and calculate log checksum */
    616
    617    total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE;
    618
    619
    620    /* read_desc() will increment the read idx */
    621    ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false);
    622    if (ret < 0) {
    623        goto free_and_exit;
    624    }
    625
    626    crc = vhdx_checksum_calc(0xffffffff, (void *)desc_buffer,
    627                            desc_sectors * VHDX_LOG_SECTOR_SIZE, 4);
    628    crc ^= 0xffffffff;
    629
    630    buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
    631    if (total_sectors > desc_sectors) {
    632        for (i = 0; i < total_sectors - desc_sectors; i++) {
    633            sectors_read = 0;
    634            ret = vhdx_log_read_sectors(bs, log, &sectors_read, buffer,
    635                                        1, false);
    636            if (ret < 0 || sectors_read != 1) {
    637                goto free_and_exit;
    638            }
    639            crc = vhdx_checksum_calc(crc, buffer, VHDX_LOG_SECTOR_SIZE, -1);
    640            crc ^= 0xffffffff;
    641        }
    642    }
    643    crc ^= 0xffffffff;
    644    if (crc != hdr.checksum) {
    645        goto free_and_exit;
    646    }
    647
    648    *valid = true;
    649    *entry = hdr;
    650    goto free_and_exit;
    651
    652inc_and_exit:
    653    log->read = vhdx_log_inc_idx(log->read, log->length);
    654
    655free_and_exit:
    656    qemu_vfree(buffer);
    657    qemu_vfree(desc_buffer);
    658    return ret;
    659}
    660
    661/* Search through the log circular buffer, and find the valid, active
    662 * log sequence, if any exists
    663 * */
    664static int vhdx_log_search(BlockDriverState *bs, BDRVVHDXState *s,
    665                           VHDXLogSequence *logs)
    666{
    667    int ret = 0;
    668    uint32_t tail;
    669    bool seq_valid = false;
    670    VHDXLogSequence candidate = { 0 };
    671    VHDXLogEntryHeader hdr = { 0 };
    672    VHDXLogEntries curr_log;
    673
    674    memcpy(&curr_log, &s->log, sizeof(VHDXLogEntries));
    675    curr_log.write = curr_log.length;   /* assume log is full */
    676    curr_log.read = 0;
    677
    678
    679    /* now we will go through the whole log sector by sector, until
    680     * we find a valid, active log sequence, or reach the end of the
    681     * log buffer */
    682    for (;;) {
    683        uint64_t curr_seq = 0;
    684        VHDXLogSequence current = { 0 };
    685
    686        tail = curr_log.read;
    687
    688        ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq,
    689                                      &seq_valid, &hdr);
    690        if (ret < 0) {
    691            goto exit;
    692        }
    693
    694        if (seq_valid) {
    695            current.valid     = true;
    696            current.log       = curr_log;
    697            current.log.read  = tail;
    698            current.log.write = curr_log.read;
    699            current.count     = 1;
    700            current.hdr       = hdr;
    701
    702
    703            for (;;) {
    704                ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq,
    705                                              &seq_valid, &hdr);
    706                if (ret < 0) {
    707                    goto exit;
    708                }
    709                if (seq_valid == false) {
    710                    break;
    711                }
    712                current.log.write = curr_log.read;
    713                current.count++;
    714
    715                curr_seq = hdr.sequence_number;
    716            }
    717        }
    718
    719        if (current.valid) {
    720            if (candidate.valid == false ||
    721                current.hdr.sequence_number > candidate.hdr.sequence_number) {
    722                candidate = current;
    723            }
    724        }
    725
    726        if (curr_log.read < tail) {
    727            break;
    728        }
    729    }
    730
    731    *logs = candidate;
    732
    733    if (candidate.valid) {
    734        /* this is the next sequence number, for writes */
    735        s->log.sequence = candidate.hdr.sequence_number + 1;
    736    }
    737
    738
    739exit:
    740    return ret;
    741}
    742
    743/* Parse the replay log.  Per the VHDX spec, if the log is present
    744 * it must be replayed prior to opening the file, even read-only.
    745 *
    746 * If read-only, we must replay the log in RAM (or refuse to open
    747 * a dirty VHDX file read-only) */
    748int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
    749                   Error **errp)
    750{
    751    int ret = 0;
    752    VHDXHeader *hdr;
    753    VHDXLogSequence logs = { 0 };
    754
    755    hdr = s->headers[s->curr_header];
    756
    757    *flushed = false;
    758
    759    /* s->log.hdr is freed in vhdx_close() */
    760    if (s->log.hdr == NULL) {
    761        s->log.hdr = qemu_blockalign(bs, sizeof(VHDXLogEntryHeader));
    762    }
    763
    764    s->log.offset = hdr->log_offset;
    765    s->log.length = hdr->log_length;
    766
    767    if (s->log.offset < VHDX_LOG_MIN_SIZE ||
    768        s->log.offset % VHDX_LOG_MIN_SIZE) {
    769        ret = -EINVAL;
    770        goto exit;
    771    }
    772
    773    /* per spec, only log version of 0 is supported */
    774    if (hdr->log_version != 0) {
    775        ret = -EINVAL;
    776        goto exit;
    777    }
    778
    779    /* If either the log guid, or log length is zero,
    780     * then a replay log is not present */
    781    if (guid_eq(hdr->log_guid, zero_guid)) {
    782        goto exit;
    783    }
    784
    785    if (hdr->log_length == 0) {
    786        goto exit;
    787    }
    788
    789    if (hdr->log_length % VHDX_LOG_MIN_SIZE) {
    790        ret = -EINVAL;
    791        goto exit;
    792    }
    793
    794
    795    /* The log is present, we need to find if and where there is an active
    796     * sequence of valid entries present in the log.  */
    797
    798    ret = vhdx_log_search(bs, s, &logs);
    799    if (ret < 0) {
    800        goto exit;
    801    }
    802
    803    if (logs.valid) {
    804        if (bdrv_is_read_only(bs)) {
    805            bdrv_refresh_filename(bs);
    806            ret = -EPERM;
    807            error_setg(errp,
    808                       "VHDX image file '%s' opened read-only, but "
    809                       "contains a log that needs to be replayed",
    810                       bs->filename);
    811            error_append_hint(errp,  "To replay the log, run:\n"
    812                              "qemu-img check -r all '%s'\n",
    813                              bs->filename);
    814            goto exit;
    815        }
    816        /* now flush the log */
    817        ret = vhdx_log_flush(bs, s, &logs);
    818        if (ret < 0) {
    819            goto exit;
    820        }
    821        *flushed = true;
    822    }
    823
    824
    825exit:
    826    return ret;
    827}
    828
    829
    830
    831static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc,
    832                                      VHDXLogDataSector *sector, void *data,
    833                                      uint64_t seq)
    834{
    835    /* 8 + 4084 + 4 = 4096, 1 log sector */
    836    memcpy(&desc->leading_bytes, data, 8);
    837    data += 8;
    838    desc->leading_bytes = cpu_to_le64(desc->leading_bytes);
    839    memcpy(sector->data, data, 4084);
    840    data += 4084;
    841    memcpy(&desc->trailing_bytes, data, 4);
    842    desc->trailing_bytes = cpu_to_le32(desc->trailing_bytes);
    843    data += 4;
    844
    845    sector->sequence_high  = (uint32_t) (seq >> 32);
    846    sector->sequence_low   = (uint32_t) (seq & 0xffffffff);
    847    sector->data_signature = VHDX_LOG_DATA_SIGNATURE;
    848
    849    vhdx_log_desc_le_export(desc);
    850    vhdx_log_data_le_export(sector);
    851}
    852
    853
    854static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
    855                          void *data, uint32_t length, uint64_t offset)
    856{
    857    int ret = 0;
    858    void *buffer = NULL;
    859    void *merged_sector = NULL;
    860    void *data_tmp, *sector_write;
    861    unsigned int i;
    862    int sector_offset;
    863    uint32_t desc_sectors, sectors, total_length;
    864    uint32_t sectors_written = 0;
    865    uint32_t aligned_length;
    866    uint32_t leading_length = 0;
    867    uint32_t trailing_length = 0;
    868    uint32_t partial_sectors = 0;
    869    uint32_t bytes_written = 0;
    870    uint64_t file_offset;
    871    int64_t file_length;
    872    VHDXHeader *header;
    873    VHDXLogEntryHeader new_hdr;
    874    VHDXLogDescriptor *new_desc = NULL;
    875    VHDXLogDataSector *data_sector = NULL;
    876    MSGUID new_guid = { 0 };
    877
    878    header = s->headers[s->curr_header];
    879
    880    /* need to have offset read data, and be on 4096 byte boundary */
    881
    882    if (length > header->log_length) {
    883        /* no log present.  we could create a log here instead of failing */
    884        ret = -EINVAL;
    885        goto exit;
    886    }
    887
    888    if (guid_eq(header->log_guid, zero_guid)) {
    889        vhdx_guid_generate(&new_guid);
    890        vhdx_update_headers(bs, s, false, &new_guid);
    891    } else {
    892        /* currently, we require that the log be flushed after
    893         * every write. */
    894        ret = -ENOTSUP;
    895        goto exit;
    896    }
    897
    898    /* 0 is an invalid sequence number, but may also represent the first
    899     * log write (or a wrapped seq) */
    900    if (s->log.sequence == 0) {
    901        s->log.sequence = 1;
    902    }
    903
    904    sector_offset = offset % VHDX_LOG_SECTOR_SIZE;
    905    file_offset = QEMU_ALIGN_DOWN(offset, VHDX_LOG_SECTOR_SIZE);
    906
    907    aligned_length = length;
    908
    909    /* add in the unaligned head and tail bytes */
    910    if (sector_offset) {
    911        leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset);
    912        leading_length = leading_length > length ? length : leading_length;
    913        aligned_length -= leading_length;
    914        partial_sectors++;
    915    }
    916
    917    sectors = aligned_length / VHDX_LOG_SECTOR_SIZE;
    918    trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE);
    919    if (trailing_length) {
    920        partial_sectors++;
    921    }
    922
    923    sectors += partial_sectors;
    924
    925    file_length = bdrv_getlength(bs->file->bs);
    926    if (file_length < 0) {
    927        ret = file_length;
    928        goto exit;
    929    }
    930
    931    /* sectors is now how many sectors the data itself takes, not
    932     * including the header and descriptor metadata */
    933
    934    new_hdr = (VHDXLogEntryHeader) {
    935                .signature           = VHDX_LOG_SIGNATURE,
    936                .tail                = s->log.tail,
    937                .sequence_number     = s->log.sequence,
    938                .descriptor_count    = sectors,
    939                .reserved            = 0,
    940                .flushed_file_offset = file_length,
    941                .last_file_offset    = file_length,
    942                .log_guid            = header->log_guid,
    943              };
    944
    945
    946    desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count);
    947
    948    total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE;
    949    new_hdr.entry_length = total_length;
    950
    951    vhdx_log_entry_hdr_le_export(&new_hdr);
    952
    953    buffer = qemu_blockalign(bs, total_length);
    954    memcpy(buffer, &new_hdr, sizeof(new_hdr));
    955
    956    new_desc = buffer + sizeof(new_hdr);
    957    data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE);
    958    data_tmp = data;
    959
    960    /* All log sectors are 4KB, so for any partial sectors we must
    961     * merge the data with preexisting data from the final file
    962     * destination */
    963    merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
    964
    965    for (i = 0; i < sectors; i++) {
    966        new_desc->signature       = VHDX_LOG_DESC_SIGNATURE;
    967        new_desc->sequence_number = s->log.sequence;
    968        new_desc->file_offset     = file_offset;
    969
    970        if (i == 0 && leading_length) {
    971            /* partial sector at the front of the buffer */
    972            ret = bdrv_pread(bs->file, file_offset, merged_sector,
    973                             VHDX_LOG_SECTOR_SIZE);
    974            if (ret < 0) {
    975                goto exit;
    976            }
    977            memcpy(merged_sector + sector_offset, data_tmp, leading_length);
    978            bytes_written = leading_length;
    979            sector_write = merged_sector;
    980        } else if (i == sectors - 1 && trailing_length) {
    981            /* partial sector at the end of the buffer */
    982            ret = bdrv_pread(bs->file,
    983                            file_offset,
    984                            merged_sector + trailing_length,
    985                            VHDX_LOG_SECTOR_SIZE - trailing_length);
    986            if (ret < 0) {
    987                goto exit;
    988            }
    989            memcpy(merged_sector, data_tmp, trailing_length);
    990            bytes_written = trailing_length;
    991            sector_write = merged_sector;
    992        } else {
    993            bytes_written = VHDX_LOG_SECTOR_SIZE;
    994            sector_write = data_tmp;
    995        }
    996
    997        /* populate the raw sector data into the proper structures,
    998         * as well as update the descriptor, and convert to proper
    999         * endianness */
   1000        vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write,
   1001                                  s->log.sequence);
   1002
   1003        data_tmp += bytes_written;
   1004        data_sector++;
   1005        new_desc++;
   1006        file_offset += VHDX_LOG_SECTOR_SIZE;
   1007    }
   1008
   1009    /* checksum covers entire entry, from the log header through the
   1010     * last data sector */
   1011    vhdx_update_checksum(buffer, total_length,
   1012                         offsetof(VHDXLogEntryHeader, checksum));
   1013
   1014    /* now write to the log */
   1015    ret = vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
   1016                                 desc_sectors + sectors);
   1017    if (ret < 0) {
   1018        goto exit;
   1019    }
   1020
   1021    if (sectors_written != desc_sectors + sectors) {
   1022        /* instead of failing, we could flush the log here */
   1023        ret = -EINVAL;
   1024        goto exit;
   1025    }
   1026
   1027    s->log.sequence++;
   1028    /* write new tail */
   1029    s->log.tail = s->log.write;
   1030
   1031exit:
   1032    qemu_vfree(buffer);
   1033    qemu_vfree(merged_sector);
   1034    return ret;
   1035}
   1036
   1037/* Perform a log write, and then immediately flush the entire log */
   1038int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
   1039                             void *data, uint32_t length, uint64_t offset)
   1040{
   1041    int ret = 0;
   1042    VHDXLogSequence logs = { .valid = true,
   1043                             .count = 1,
   1044                             .hdr = { 0 } };
   1045
   1046
   1047    /* Make sure data written (new and/or changed blocks) is stable
   1048     * on disk, before creating log entry */
   1049    ret = bdrv_flush(bs);
   1050    if (ret < 0) {
   1051        goto exit;
   1052    }
   1053
   1054    ret = vhdx_log_write(bs, s, data, length, offset);
   1055    if (ret < 0) {
   1056        goto exit;
   1057    }
   1058    logs.log = s->log;
   1059
   1060    /* Make sure log is stable on disk */
   1061    ret = bdrv_flush(bs);
   1062    if (ret < 0) {
   1063        goto exit;
   1064    }
   1065
   1066    ret = vhdx_log_flush(bs, s, &logs);
   1067    if (ret < 0) {
   1068        goto exit;
   1069    }
   1070
   1071    s->log = logs.log;
   1072
   1073exit:
   1074    return ret;
   1075}
   1076