cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

dump.c (61540B)


      1/*
      2 * QEMU dump
      3 *
      4 * Copyright Fujitsu, Corp. 2011, 2012
      5 *
      6 * Authors:
      7 *     Wen Congyang <wency@cn.fujitsu.com>
      8 *
      9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
     10 * See the COPYING file in the top-level directory.
     11 *
     12 */
     13
     14#include "qemu/osdep.h"
     15#include "qemu-common.h"
     16#include "qemu/cutils.h"
     17#include "elf.h"
     18#include "exec/hwaddr.h"
     19#include "monitor/monitor.h"
     20#include "sysemu/kvm.h"
     21#include "sysemu/dump.h"
     22#include "sysemu/memory_mapping.h"
     23#include "sysemu/runstate.h"
     24#include "sysemu/cpus.h"
     25#include "qapi/error.h"
     26#include "qapi/qapi-commands-dump.h"
     27#include "qapi/qapi-events-dump.h"
     28#include "qapi/qmp/qerror.h"
     29#include "qemu/error-report.h"
     30#include "qemu/main-loop.h"
     31#include "hw/misc/vmcoreinfo.h"
     32
     33#ifdef TARGET_X86_64
     34#include "win_dump.h"
     35#endif
     36
     37#include <zlib.h>
     38#ifdef CONFIG_LZO
     39#include <lzo/lzo1x.h>
     40#endif
     41#ifdef CONFIG_SNAPPY
     42#include <snappy-c.h>
     43#endif
     44#ifndef ELF_MACHINE_UNAME
     45#define ELF_MACHINE_UNAME "Unknown"
     46#endif
     47
     48#define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */
     49
     50#define ELF_NOTE_SIZE(hdr_size, name_size, desc_size)   \
     51    ((DIV_ROUND_UP((hdr_size), 4) +                     \
     52      DIV_ROUND_UP((name_size), 4) +                    \
     53      DIV_ROUND_UP((desc_size), 4)) * 4)
     54
     55uint16_t cpu_to_dump16(DumpState *s, uint16_t val)
     56{
     57    if (s->dump_info.d_endian == ELFDATA2LSB) {
     58        val = cpu_to_le16(val);
     59    } else {
     60        val = cpu_to_be16(val);
     61    }
     62
     63    return val;
     64}
     65
     66uint32_t cpu_to_dump32(DumpState *s, uint32_t val)
     67{
     68    if (s->dump_info.d_endian == ELFDATA2LSB) {
     69        val = cpu_to_le32(val);
     70    } else {
     71        val = cpu_to_be32(val);
     72    }
     73
     74    return val;
     75}
     76
     77uint64_t cpu_to_dump64(DumpState *s, uint64_t val)
     78{
     79    if (s->dump_info.d_endian == ELFDATA2LSB) {
     80        val = cpu_to_le64(val);
     81    } else {
     82        val = cpu_to_be64(val);
     83    }
     84
     85    return val;
     86}
     87
     88static int dump_cleanup(DumpState *s)
     89{
     90    guest_phys_blocks_free(&s->guest_phys_blocks);
     91    memory_mapping_list_free(&s->list);
     92    close(s->fd);
     93    g_free(s->guest_note);
     94    s->guest_note = NULL;
     95    if (s->resume) {
     96        if (s->detached) {
     97            qemu_mutex_lock_iothread();
     98        }
     99        vm_start();
    100        if (s->detached) {
    101            qemu_mutex_unlock_iothread();
    102        }
    103    }
    104
    105    return 0;
    106}
    107
    108static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
    109{
    110    DumpState *s = opaque;
    111    size_t written_size;
    112
    113    written_size = qemu_write_full(s->fd, buf, size);
    114    if (written_size != size) {
    115        return -errno;
    116    }
    117
    118    return 0;
    119}
    120
    121static void write_elf64_header(DumpState *s, Error **errp)
    122{
    123    Elf64_Ehdr elf_header;
    124    int ret;
    125
    126    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
    127    memcpy(&elf_header, ELFMAG, SELFMAG);
    128    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
    129    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
    130    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
    131    elf_header.e_type = cpu_to_dump16(s, ET_CORE);
    132    elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
    133    elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
    134    elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
    135    elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr));
    136    elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
    137    elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num);
    138    if (s->have_section) {
    139        uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;
    140
    141        elf_header.e_shoff = cpu_to_dump64(s, shoff);
    142        elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
    143        elf_header.e_shnum = cpu_to_dump16(s, 1);
    144    }
    145
    146    ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
    147    if (ret < 0) {
    148        error_setg_errno(errp, -ret, "dump: failed to write elf header");
    149    }
    150}
    151
    152static void write_elf32_header(DumpState *s, Error **errp)
    153{
    154    Elf32_Ehdr elf_header;
    155    int ret;
    156
    157    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
    158    memcpy(&elf_header, ELFMAG, SELFMAG);
    159    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
    160    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
    161    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
    162    elf_header.e_type = cpu_to_dump16(s, ET_CORE);
    163    elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
    164    elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
    165    elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
    166    elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr));
    167    elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
    168    elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num);
    169    if (s->have_section) {
    170        uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;
    171
    172        elf_header.e_shoff = cpu_to_dump32(s, shoff);
    173        elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
    174        elf_header.e_shnum = cpu_to_dump16(s, 1);
    175    }
    176
    177    ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
    178    if (ret < 0) {
    179        error_setg_errno(errp, -ret, "dump: failed to write elf header");
    180    }
    181}
    182
    183static void write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
    184                             int phdr_index, hwaddr offset,
    185                             hwaddr filesz, Error **errp)
    186{
    187    Elf64_Phdr phdr;
    188    int ret;
    189
    190    memset(&phdr, 0, sizeof(Elf64_Phdr));
    191    phdr.p_type = cpu_to_dump32(s, PT_LOAD);
    192    phdr.p_offset = cpu_to_dump64(s, offset);
    193    phdr.p_paddr = cpu_to_dump64(s, memory_mapping->phys_addr);
    194    phdr.p_filesz = cpu_to_dump64(s, filesz);
    195    phdr.p_memsz = cpu_to_dump64(s, memory_mapping->length);
    196    phdr.p_vaddr = cpu_to_dump64(s, memory_mapping->virt_addr) ?: phdr.p_paddr;
    197
    198    assert(memory_mapping->length >= filesz);
    199
    200    ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
    201    if (ret < 0) {
    202        error_setg_errno(errp, -ret,
    203                         "dump: failed to write program header table");
    204    }
    205}
    206
    207static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
    208                             int phdr_index, hwaddr offset,
    209                             hwaddr filesz, Error **errp)
    210{
    211    Elf32_Phdr phdr;
    212    int ret;
    213
    214    memset(&phdr, 0, sizeof(Elf32_Phdr));
    215    phdr.p_type = cpu_to_dump32(s, PT_LOAD);
    216    phdr.p_offset = cpu_to_dump32(s, offset);
    217    phdr.p_paddr = cpu_to_dump32(s, memory_mapping->phys_addr);
    218    phdr.p_filesz = cpu_to_dump32(s, filesz);
    219    phdr.p_memsz = cpu_to_dump32(s, memory_mapping->length);
    220    phdr.p_vaddr =
    221        cpu_to_dump32(s, memory_mapping->virt_addr) ?: phdr.p_paddr;
    222
    223    assert(memory_mapping->length >= filesz);
    224
    225    ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
    226    if (ret < 0) {
    227        error_setg_errno(errp, -ret,
    228                         "dump: failed to write program header table");
    229    }
    230}
    231
    232static void write_elf64_note(DumpState *s, Error **errp)
    233{
    234    Elf64_Phdr phdr;
    235    hwaddr begin = s->memory_offset - s->note_size;
    236    int ret;
    237
    238    memset(&phdr, 0, sizeof(Elf64_Phdr));
    239    phdr.p_type = cpu_to_dump32(s, PT_NOTE);
    240    phdr.p_offset = cpu_to_dump64(s, begin);
    241    phdr.p_paddr = 0;
    242    phdr.p_filesz = cpu_to_dump64(s, s->note_size);
    243    phdr.p_memsz = cpu_to_dump64(s, s->note_size);
    244    phdr.p_vaddr = 0;
    245
    246    ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
    247    if (ret < 0) {
    248        error_setg_errno(errp, -ret,
    249                         "dump: failed to write program header table");
    250    }
    251}
    252
    253static inline int cpu_index(CPUState *cpu)
    254{
    255    return cpu->cpu_index + 1;
    256}
    257
    258static void write_guest_note(WriteCoreDumpFunction f, DumpState *s,
    259                             Error **errp)
    260{
    261    int ret;
    262
    263    if (s->guest_note) {
    264        ret = f(s->guest_note, s->guest_note_size, s);
    265        if (ret < 0) {
    266            error_setg(errp, "dump: failed to write guest note");
    267        }
    268    }
    269}
    270
    271static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s,
    272                              Error **errp)
    273{
    274    CPUState *cpu;
    275    int ret;
    276    int id;
    277
    278    CPU_FOREACH(cpu) {
    279        id = cpu_index(cpu);
    280        ret = cpu_write_elf64_note(f, cpu, id, s);
    281        if (ret < 0) {
    282            error_setg(errp, "dump: failed to write elf notes");
    283            return;
    284        }
    285    }
    286
    287    CPU_FOREACH(cpu) {
    288        ret = cpu_write_elf64_qemunote(f, cpu, s);
    289        if (ret < 0) {
    290            error_setg(errp, "dump: failed to write CPU status");
    291            return;
    292        }
    293    }
    294
    295    write_guest_note(f, s, errp);
    296}
    297
    298static void write_elf32_note(DumpState *s, Error **errp)
    299{
    300    hwaddr begin = s->memory_offset - s->note_size;
    301    Elf32_Phdr phdr;
    302    int ret;
    303
    304    memset(&phdr, 0, sizeof(Elf32_Phdr));
    305    phdr.p_type = cpu_to_dump32(s, PT_NOTE);
    306    phdr.p_offset = cpu_to_dump32(s, begin);
    307    phdr.p_paddr = 0;
    308    phdr.p_filesz = cpu_to_dump32(s, s->note_size);
    309    phdr.p_memsz = cpu_to_dump32(s, s->note_size);
    310    phdr.p_vaddr = 0;
    311
    312    ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
    313    if (ret < 0) {
    314        error_setg_errno(errp, -ret,
    315                         "dump: failed to write program header table");
    316    }
    317}
    318
    319static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s,
    320                              Error **errp)
    321{
    322    CPUState *cpu;
    323    int ret;
    324    int id;
    325
    326    CPU_FOREACH(cpu) {
    327        id = cpu_index(cpu);
    328        ret = cpu_write_elf32_note(f, cpu, id, s);
    329        if (ret < 0) {
    330            error_setg(errp, "dump: failed to write elf notes");
    331            return;
    332        }
    333    }
    334
    335    CPU_FOREACH(cpu) {
    336        ret = cpu_write_elf32_qemunote(f, cpu, s);
    337        if (ret < 0) {
    338            error_setg(errp, "dump: failed to write CPU status");
    339            return;
    340        }
    341    }
    342
    343    write_guest_note(f, s, errp);
    344}
    345
    346static void write_elf_section(DumpState *s, int type, Error **errp)
    347{
    348    Elf32_Shdr shdr32;
    349    Elf64_Shdr shdr64;
    350    int shdr_size;
    351    void *shdr;
    352    int ret;
    353
    354    if (type == 0) {
    355        shdr_size = sizeof(Elf32_Shdr);
    356        memset(&shdr32, 0, shdr_size);
    357        shdr32.sh_info = cpu_to_dump32(s, s->sh_info);
    358        shdr = &shdr32;
    359    } else {
    360        shdr_size = sizeof(Elf64_Shdr);
    361        memset(&shdr64, 0, shdr_size);
    362        shdr64.sh_info = cpu_to_dump32(s, s->sh_info);
    363        shdr = &shdr64;
    364    }
    365
    366    ret = fd_write_vmcore(shdr, shdr_size, s);
    367    if (ret < 0) {
    368        error_setg_errno(errp, -ret,
    369                         "dump: failed to write section header table");
    370    }
    371}
    372
    373static void write_data(DumpState *s, void *buf, int length, Error **errp)
    374{
    375    int ret;
    376
    377    ret = fd_write_vmcore(buf, length, s);
    378    if (ret < 0) {
    379        error_setg_errno(errp, -ret, "dump: failed to save memory");
    380    } else {
    381        s->written_size += length;
    382    }
    383}
    384
    385/* write the memory to vmcore. 1 page per I/O. */
    386static void write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
    387                         int64_t size, Error **errp)
    388{
    389    int64_t i;
    390    Error *local_err = NULL;
    391
    392    for (i = 0; i < size / s->dump_info.page_size; i++) {
    393        write_data(s, block->host_addr + start + i * s->dump_info.page_size,
    394                   s->dump_info.page_size, &local_err);
    395        if (local_err) {
    396            error_propagate(errp, local_err);
    397            return;
    398        }
    399    }
    400
    401    if ((size % s->dump_info.page_size) != 0) {
    402        write_data(s, block->host_addr + start + i * s->dump_info.page_size,
    403                   size % s->dump_info.page_size, &local_err);
    404        if (local_err) {
    405            error_propagate(errp, local_err);
    406            return;
    407        }
    408    }
    409}
    410
    411/* get the memory's offset and size in the vmcore */
    412static void get_offset_range(hwaddr phys_addr,
    413                             ram_addr_t mapping_length,
    414                             DumpState *s,
    415                             hwaddr *p_offset,
    416                             hwaddr *p_filesz)
    417{
    418    GuestPhysBlock *block;
    419    hwaddr offset = s->memory_offset;
    420    int64_t size_in_block, start;
    421
    422    /* When the memory is not stored into vmcore, offset will be -1 */
    423    *p_offset = -1;
    424    *p_filesz = 0;
    425
    426    if (s->has_filter) {
    427        if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
    428            return;
    429        }
    430    }
    431
    432    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
    433        if (s->has_filter) {
    434            if (block->target_start >= s->begin + s->length ||
    435                block->target_end <= s->begin) {
    436                /* This block is out of the range */
    437                continue;
    438            }
    439
    440            if (s->begin <= block->target_start) {
    441                start = block->target_start;
    442            } else {
    443                start = s->begin;
    444            }
    445
    446            size_in_block = block->target_end - start;
    447            if (s->begin + s->length < block->target_end) {
    448                size_in_block -= block->target_end - (s->begin + s->length);
    449            }
    450        } else {
    451            start = block->target_start;
    452            size_in_block = block->target_end - block->target_start;
    453        }
    454
    455        if (phys_addr >= start && phys_addr < start + size_in_block) {
    456            *p_offset = phys_addr - start + offset;
    457
    458            /* The offset range mapped from the vmcore file must not spill over
    459             * the GuestPhysBlock, clamp it. The rest of the mapping will be
    460             * zero-filled in memory at load time; see
    461             * <http://refspecs.linuxbase.org/elf/gabi4+/ch5.pheader.html>.
    462             */
    463            *p_filesz = phys_addr + mapping_length <= start + size_in_block ?
    464                        mapping_length :
    465                        size_in_block - (phys_addr - start);
    466            return;
    467        }
    468
    469        offset += size_in_block;
    470    }
    471}
    472
    473static void write_elf_loads(DumpState *s, Error **errp)
    474{
    475    hwaddr offset, filesz;
    476    MemoryMapping *memory_mapping;
    477    uint32_t phdr_index = 1;
    478    uint32_t max_index;
    479    Error *local_err = NULL;
    480
    481    if (s->have_section) {
    482        max_index = s->sh_info;
    483    } else {
    484        max_index = s->phdr_num;
    485    }
    486
    487    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
    488        get_offset_range(memory_mapping->phys_addr,
    489                         memory_mapping->length,
    490                         s, &offset, &filesz);
    491        if (s->dump_info.d_class == ELFCLASS64) {
    492            write_elf64_load(s, memory_mapping, phdr_index++, offset,
    493                             filesz, &local_err);
    494        } else {
    495            write_elf32_load(s, memory_mapping, phdr_index++, offset,
    496                             filesz, &local_err);
    497        }
    498
    499        if (local_err) {
    500            error_propagate(errp, local_err);
    501            return;
    502        }
    503
    504        if (phdr_index >= max_index) {
    505            break;
    506        }
    507    }
    508}
    509
    510/* write elf header, PT_NOTE and elf note to vmcore. */
    511static void dump_begin(DumpState *s, Error **errp)
    512{
    513    Error *local_err = NULL;
    514
    515    /*
    516     * the vmcore's format is:
    517     *   --------------
    518     *   |  elf header |
    519     *   --------------
    520     *   |  PT_NOTE    |
    521     *   --------------
    522     *   |  PT_LOAD    |
    523     *   --------------
    524     *   |  ......     |
    525     *   --------------
    526     *   |  PT_LOAD    |
    527     *   --------------
    528     *   |  sec_hdr    |
    529     *   --------------
    530     *   |  elf note   |
    531     *   --------------
    532     *   |  memory     |
    533     *   --------------
    534     *
    535     * we only know where the memory is saved after we write elf note into
    536     * vmcore.
    537     */
    538
    539    /* write elf header to vmcore */
    540    if (s->dump_info.d_class == ELFCLASS64) {
    541        write_elf64_header(s, &local_err);
    542    } else {
    543        write_elf32_header(s, &local_err);
    544    }
    545    if (local_err) {
    546        error_propagate(errp, local_err);
    547        return;
    548    }
    549
    550    if (s->dump_info.d_class == ELFCLASS64) {
    551        /* write PT_NOTE to vmcore */
    552        write_elf64_note(s, &local_err);
    553        if (local_err) {
    554            error_propagate(errp, local_err);
    555            return;
    556        }
    557
    558        /* write all PT_LOAD to vmcore */
    559        write_elf_loads(s, &local_err);
    560        if (local_err) {
    561            error_propagate(errp, local_err);
    562            return;
    563        }
    564
    565        /* write section to vmcore */
    566        if (s->have_section) {
    567            write_elf_section(s, 1, &local_err);
    568            if (local_err) {
    569                error_propagate(errp, local_err);
    570                return;
    571            }
    572        }
    573
    574        /* write notes to vmcore */
    575        write_elf64_notes(fd_write_vmcore, s, &local_err);
    576        if (local_err) {
    577            error_propagate(errp, local_err);
    578            return;
    579        }
    580    } else {
    581        /* write PT_NOTE to vmcore */
    582        write_elf32_note(s, &local_err);
    583        if (local_err) {
    584            error_propagate(errp, local_err);
    585            return;
    586        }
    587
    588        /* write all PT_LOAD to vmcore */
    589        write_elf_loads(s, &local_err);
    590        if (local_err) {
    591            error_propagate(errp, local_err);
    592            return;
    593        }
    594
    595        /* write section to vmcore */
    596        if (s->have_section) {
    597            write_elf_section(s, 0, &local_err);
    598            if (local_err) {
    599                error_propagate(errp, local_err);
    600                return;
    601            }
    602        }
    603
    604        /* write notes to vmcore */
    605        write_elf32_notes(fd_write_vmcore, s, &local_err);
    606        if (local_err) {
    607            error_propagate(errp, local_err);
    608            return;
    609        }
    610    }
    611}
    612
    613static int get_next_block(DumpState *s, GuestPhysBlock *block)
    614{
    615    while (1) {
    616        block = QTAILQ_NEXT(block, next);
    617        if (!block) {
    618            /* no more block */
    619            return 1;
    620        }
    621
    622        s->start = 0;
    623        s->next_block = block;
    624        if (s->has_filter) {
    625            if (block->target_start >= s->begin + s->length ||
    626                block->target_end <= s->begin) {
    627                /* This block is out of the range */
    628                continue;
    629            }
    630
    631            if (s->begin > block->target_start) {
    632                s->start = s->begin - block->target_start;
    633            }
    634        }
    635
    636        return 0;
    637    }
    638}
    639
    640/* write all memory to vmcore */
    641static void dump_iterate(DumpState *s, Error **errp)
    642{
    643    GuestPhysBlock *block;
    644    int64_t size;
    645    Error *local_err = NULL;
    646
    647    do {
    648        block = s->next_block;
    649
    650        size = block->target_end - block->target_start;
    651        if (s->has_filter) {
    652            size -= s->start;
    653            if (s->begin + s->length < block->target_end) {
    654                size -= block->target_end - (s->begin + s->length);
    655            }
    656        }
    657        write_memory(s, block, s->start, size, &local_err);
    658        if (local_err) {
    659            error_propagate(errp, local_err);
    660            return;
    661        }
    662
    663    } while (!get_next_block(s, block));
    664}
    665
    666static void create_vmcore(DumpState *s, Error **errp)
    667{
    668    Error *local_err = NULL;
    669
    670    dump_begin(s, &local_err);
    671    if (local_err) {
    672        error_propagate(errp, local_err);
    673        return;
    674    }
    675
    676    dump_iterate(s, errp);
    677}
    678
    679static int write_start_flat_header(int fd)
    680{
    681    MakedumpfileHeader *mh;
    682    int ret = 0;
    683
    684    QEMU_BUILD_BUG_ON(sizeof *mh > MAX_SIZE_MDF_HEADER);
    685    mh = g_malloc0(MAX_SIZE_MDF_HEADER);
    686
    687    memcpy(mh->signature, MAKEDUMPFILE_SIGNATURE,
    688           MIN(sizeof mh->signature, sizeof MAKEDUMPFILE_SIGNATURE));
    689
    690    mh->type = cpu_to_be64(TYPE_FLAT_HEADER);
    691    mh->version = cpu_to_be64(VERSION_FLAT_HEADER);
    692
    693    size_t written_size;
    694    written_size = qemu_write_full(fd, mh, MAX_SIZE_MDF_HEADER);
    695    if (written_size != MAX_SIZE_MDF_HEADER) {
    696        ret = -1;
    697    }
    698
    699    g_free(mh);
    700    return ret;
    701}
    702
    703static int write_end_flat_header(int fd)
    704{
    705    MakedumpfileDataHeader mdh;
    706
    707    mdh.offset = END_FLAG_FLAT_HEADER;
    708    mdh.buf_size = END_FLAG_FLAT_HEADER;
    709
    710    size_t written_size;
    711    written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
    712    if (written_size != sizeof(mdh)) {
    713        return -1;
    714    }
    715
    716    return 0;
    717}
    718
    719static int write_buffer(int fd, off_t offset, const void *buf, size_t size)
    720{
    721    size_t written_size;
    722    MakedumpfileDataHeader mdh;
    723
    724    mdh.offset = cpu_to_be64(offset);
    725    mdh.buf_size = cpu_to_be64(size);
    726
    727    written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
    728    if (written_size != sizeof(mdh)) {
    729        return -1;
    730    }
    731
    732    written_size = qemu_write_full(fd, buf, size);
    733    if (written_size != size) {
    734        return -1;
    735    }
    736
    737    return 0;
    738}
    739
    740static int buf_write_note(const void *buf, size_t size, void *opaque)
    741{
    742    DumpState *s = opaque;
    743
    744    /* note_buf is not enough */
    745    if (s->note_buf_offset + size > s->note_size) {
    746        return -1;
    747    }
    748
    749    memcpy(s->note_buf + s->note_buf_offset, buf, size);
    750
    751    s->note_buf_offset += size;
    752
    753    return 0;
    754}
    755
    756/*
    757 * This function retrieves various sizes from an elf header.
    758 *
    759 * @note has to be a valid ELF note. The return sizes are unmodified
    760 * (not padded or rounded up to be multiple of 4).
    761 */
    762static void get_note_sizes(DumpState *s, const void *note,
    763                           uint64_t *note_head_size,
    764                           uint64_t *name_size,
    765                           uint64_t *desc_size)
    766{
    767    uint64_t note_head_sz;
    768    uint64_t name_sz;
    769    uint64_t desc_sz;
    770
    771    if (s->dump_info.d_class == ELFCLASS64) {
    772        const Elf64_Nhdr *hdr = note;
    773        note_head_sz = sizeof(Elf64_Nhdr);
    774        name_sz = tswap64(hdr->n_namesz);
    775        desc_sz = tswap64(hdr->n_descsz);
    776    } else {
    777        const Elf32_Nhdr *hdr = note;
    778        note_head_sz = sizeof(Elf32_Nhdr);
    779        name_sz = tswap32(hdr->n_namesz);
    780        desc_sz = tswap32(hdr->n_descsz);
    781    }
    782
    783    if (note_head_size) {
    784        *note_head_size = note_head_sz;
    785    }
    786    if (name_size) {
    787        *name_size = name_sz;
    788    }
    789    if (desc_size) {
    790        *desc_size = desc_sz;
    791    }
    792}
    793
    794static bool note_name_equal(DumpState *s,
    795                            const uint8_t *note, const char *name)
    796{
    797    int len = strlen(name) + 1;
    798    uint64_t head_size, name_size;
    799
    800    get_note_sizes(s, note, &head_size, &name_size, NULL);
    801    head_size = ROUND_UP(head_size, 4);
    802
    803    return name_size == len && memcmp(note + head_size, name, len) == 0;
    804}
    805
    806/* write common header, sub header and elf note to vmcore */
    807static void create_header32(DumpState *s, Error **errp)
    808{
    809    DiskDumpHeader32 *dh = NULL;
    810    KdumpSubHeader32 *kh = NULL;
    811    size_t size;
    812    uint32_t block_size;
    813    uint32_t sub_hdr_size;
    814    uint32_t bitmap_blocks;
    815    uint32_t status = 0;
    816    uint64_t offset_note;
    817    Error *local_err = NULL;
    818
    819    /* write common header, the version of kdump-compressed format is 6th */
    820    size = sizeof(DiskDumpHeader32);
    821    dh = g_malloc0(size);
    822
    823    memcpy(dh->signature, KDUMP_SIGNATURE, SIG_LEN);
    824    dh->header_version = cpu_to_dump32(s, 6);
    825    block_size = s->dump_info.page_size;
    826    dh->block_size = cpu_to_dump32(s, block_size);
    827    sub_hdr_size = sizeof(struct KdumpSubHeader32) + s->note_size;
    828    sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
    829    dh->sub_hdr_size = cpu_to_dump32(s, sub_hdr_size);
    830    /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
    831    dh->max_mapnr = cpu_to_dump32(s, MIN(s->max_mapnr, UINT_MAX));
    832    dh->nr_cpus = cpu_to_dump32(s, s->nr_cpus);
    833    bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
    834    dh->bitmap_blocks = cpu_to_dump32(s, bitmap_blocks);
    835    strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
    836
    837    if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
    838        status |= DUMP_DH_COMPRESSED_ZLIB;
    839    }
    840#ifdef CONFIG_LZO
    841    if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
    842        status |= DUMP_DH_COMPRESSED_LZO;
    843    }
    844#endif
    845#ifdef CONFIG_SNAPPY
    846    if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
    847        status |= DUMP_DH_COMPRESSED_SNAPPY;
    848    }
    849#endif
    850    dh->status = cpu_to_dump32(s, status);
    851
    852    if (write_buffer(s->fd, 0, dh, size) < 0) {
    853        error_setg(errp, "dump: failed to write disk dump header");
    854        goto out;
    855    }
    856
    857    /* write sub header */
    858    size = sizeof(KdumpSubHeader32);
    859    kh = g_malloc0(size);
    860
    861    /* 64bit max_mapnr_64 */
    862    kh->max_mapnr_64 = cpu_to_dump64(s, s->max_mapnr);
    863    kh->phys_base = cpu_to_dump32(s, s->dump_info.phys_base);
    864    kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL);
    865
    866    offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
    867    if (s->guest_note &&
    868        note_name_equal(s, s->guest_note, "VMCOREINFO")) {
    869        uint64_t hsize, name_size, size_vmcoreinfo_desc, offset_vmcoreinfo;
    870
    871        get_note_sizes(s, s->guest_note,
    872                       &hsize, &name_size, &size_vmcoreinfo_desc);
    873        offset_vmcoreinfo = offset_note + s->note_size - s->guest_note_size +
    874            (DIV_ROUND_UP(hsize, 4) + DIV_ROUND_UP(name_size, 4)) * 4;
    875        kh->offset_vmcoreinfo = cpu_to_dump64(s, offset_vmcoreinfo);
    876        kh->size_vmcoreinfo = cpu_to_dump32(s, size_vmcoreinfo_desc);
    877    }
    878
    879    kh->offset_note = cpu_to_dump64(s, offset_note);
    880    kh->note_size = cpu_to_dump32(s, s->note_size);
    881
    882    if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
    883                     block_size, kh, size) < 0) {
    884        error_setg(errp, "dump: failed to write kdump sub header");
    885        goto out;
    886    }
    887
    888    /* write note */
    889    s->note_buf = g_malloc0(s->note_size);
    890    s->note_buf_offset = 0;
    891
    892    /* use s->note_buf to store notes temporarily */
    893    write_elf32_notes(buf_write_note, s, &local_err);
    894    if (local_err) {
    895        error_propagate(errp, local_err);
    896        goto out;
    897    }
    898    if (write_buffer(s->fd, offset_note, s->note_buf,
    899                     s->note_size) < 0) {
    900        error_setg(errp, "dump: failed to write notes");
    901        goto out;
    902    }
    903
    904    /* get offset of dump_bitmap */
    905    s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
    906                             block_size;
    907
    908    /* get offset of page */
    909    s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
    910                     block_size;
    911
    912out:
    913    g_free(dh);
    914    g_free(kh);
    915    g_free(s->note_buf);
    916}
    917
    918/* write common header, sub header and elf note to vmcore */
    919static void create_header64(DumpState *s, Error **errp)
    920{
    921    DiskDumpHeader64 *dh = NULL;
    922    KdumpSubHeader64 *kh = NULL;
    923    size_t size;
    924    uint32_t block_size;
    925    uint32_t sub_hdr_size;
    926    uint32_t bitmap_blocks;
    927    uint32_t status = 0;
    928    uint64_t offset_note;
    929    Error *local_err = NULL;
    930
    931    /* write common header, the version of kdump-compressed format is 6th */
    932    size = sizeof(DiskDumpHeader64);
    933    dh = g_malloc0(size);
    934
    935    memcpy(dh->signature, KDUMP_SIGNATURE, SIG_LEN);
    936    dh->header_version = cpu_to_dump32(s, 6);
    937    block_size = s->dump_info.page_size;
    938    dh->block_size = cpu_to_dump32(s, block_size);
    939    sub_hdr_size = sizeof(struct KdumpSubHeader64) + s->note_size;
    940    sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
    941    dh->sub_hdr_size = cpu_to_dump32(s, sub_hdr_size);
    942    /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
    943    dh->max_mapnr = cpu_to_dump32(s, MIN(s->max_mapnr, UINT_MAX));
    944    dh->nr_cpus = cpu_to_dump32(s, s->nr_cpus);
    945    bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
    946    dh->bitmap_blocks = cpu_to_dump32(s, bitmap_blocks);
    947    strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
    948
    949    if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
    950        status |= DUMP_DH_COMPRESSED_ZLIB;
    951    }
    952#ifdef CONFIG_LZO
    953    if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
    954        status |= DUMP_DH_COMPRESSED_LZO;
    955    }
    956#endif
    957#ifdef CONFIG_SNAPPY
    958    if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
    959        status |= DUMP_DH_COMPRESSED_SNAPPY;
    960    }
    961#endif
    962    dh->status = cpu_to_dump32(s, status);
    963
    964    if (write_buffer(s->fd, 0, dh, size) < 0) {
    965        error_setg(errp, "dump: failed to write disk dump header");
    966        goto out;
    967    }
    968
    969    /* write sub header */
    970    size = sizeof(KdumpSubHeader64);
    971    kh = g_malloc0(size);
    972
    973    /* 64bit max_mapnr_64 */
    974    kh->max_mapnr_64 = cpu_to_dump64(s, s->max_mapnr);
    975    kh->phys_base = cpu_to_dump64(s, s->dump_info.phys_base);
    976    kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL);
    977
    978    offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
    979    if (s->guest_note &&
    980        note_name_equal(s, s->guest_note, "VMCOREINFO")) {
    981        uint64_t hsize, name_size, size_vmcoreinfo_desc, offset_vmcoreinfo;
    982
    983        get_note_sizes(s, s->guest_note,
    984                       &hsize, &name_size, &size_vmcoreinfo_desc);
    985        offset_vmcoreinfo = offset_note + s->note_size - s->guest_note_size +
    986            (DIV_ROUND_UP(hsize, 4) + DIV_ROUND_UP(name_size, 4)) * 4;
    987        kh->offset_vmcoreinfo = cpu_to_dump64(s, offset_vmcoreinfo);
    988        kh->size_vmcoreinfo = cpu_to_dump64(s, size_vmcoreinfo_desc);
    989    }
    990
    991    kh->offset_note = cpu_to_dump64(s, offset_note);
    992    kh->note_size = cpu_to_dump64(s, s->note_size);
    993
    994    if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
    995                     block_size, kh, size) < 0) {
    996        error_setg(errp, "dump: failed to write kdump sub header");
    997        goto out;
    998    }
    999
   1000    /* write note */
   1001    s->note_buf = g_malloc0(s->note_size);
   1002    s->note_buf_offset = 0;
   1003
   1004    /* use s->note_buf to store notes temporarily */
   1005    write_elf64_notes(buf_write_note, s, &local_err);
   1006    if (local_err) {
   1007        error_propagate(errp, local_err);
   1008        goto out;
   1009    }
   1010
   1011    if (write_buffer(s->fd, offset_note, s->note_buf,
   1012                     s->note_size) < 0) {
   1013        error_setg(errp, "dump: failed to write notes");
   1014        goto out;
   1015    }
   1016
   1017    /* get offset of dump_bitmap */
   1018    s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
   1019                             block_size;
   1020
   1021    /* get offset of page */
   1022    s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
   1023                     block_size;
   1024
   1025out:
   1026    g_free(dh);
   1027    g_free(kh);
   1028    g_free(s->note_buf);
   1029}
   1030
   1031static void write_dump_header(DumpState *s, Error **errp)
   1032{
   1033    if (s->dump_info.d_class == ELFCLASS32) {
   1034        create_header32(s, errp);
   1035    } else {
   1036        create_header64(s, errp);
   1037    }
   1038}
   1039
   1040static size_t dump_bitmap_get_bufsize(DumpState *s)
   1041{
   1042    return s->dump_info.page_size;
   1043}
   1044
   1045/*
   1046 * set dump_bitmap sequencely. the bit before last_pfn is not allowed to be
   1047 * rewritten, so if need to set the first bit, set last_pfn and pfn to 0.
   1048 * set_dump_bitmap will always leave the recently set bit un-sync. And setting
   1049 * (last bit + sizeof(buf) * 8) to 0 will do flushing the content in buf into
   1050 * vmcore, ie. synchronizing un-sync bit into vmcore.
   1051 */
   1052static int set_dump_bitmap(uint64_t last_pfn, uint64_t pfn, bool value,
   1053                           uint8_t *buf, DumpState *s)
   1054{
   1055    off_t old_offset, new_offset;
   1056    off_t offset_bitmap1, offset_bitmap2;
   1057    uint32_t byte, bit;
   1058    size_t bitmap_bufsize = dump_bitmap_get_bufsize(s);
   1059    size_t bits_per_buf = bitmap_bufsize * CHAR_BIT;
   1060
   1061    /* should not set the previous place */
   1062    assert(last_pfn <= pfn);
   1063
   1064    /*
   1065     * if the bit needed to be set is not cached in buf, flush the data in buf
   1066     * to vmcore firstly.
   1067     * making new_offset be bigger than old_offset can also sync remained data
   1068     * into vmcore.
   1069     */
   1070    old_offset = bitmap_bufsize * (last_pfn / bits_per_buf);
   1071    new_offset = bitmap_bufsize * (pfn / bits_per_buf);
   1072
   1073    while (old_offset < new_offset) {
   1074        /* calculate the offset and write dump_bitmap */
   1075        offset_bitmap1 = s->offset_dump_bitmap + old_offset;
   1076        if (write_buffer(s->fd, offset_bitmap1, buf,
   1077                         bitmap_bufsize) < 0) {
   1078            return -1;
   1079        }
   1080
   1081        /* dump level 1 is chosen, so 1st and 2nd bitmap are same */
   1082        offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap +
   1083                         old_offset;
   1084        if (write_buffer(s->fd, offset_bitmap2, buf,
   1085                         bitmap_bufsize) < 0) {
   1086            return -1;
   1087        }
   1088
   1089        memset(buf, 0, bitmap_bufsize);
   1090        old_offset += bitmap_bufsize;
   1091    }
   1092
   1093    /* get the exact place of the bit in the buf, and set it */
   1094    byte = (pfn % bits_per_buf) / CHAR_BIT;
   1095    bit = (pfn % bits_per_buf) % CHAR_BIT;
   1096    if (value) {
   1097        buf[byte] |= 1u << bit;
   1098    } else {
   1099        buf[byte] &= ~(1u << bit);
   1100    }
   1101
   1102    return 0;
   1103}
   1104
   1105static uint64_t dump_paddr_to_pfn(DumpState *s, uint64_t addr)
   1106{
   1107    int target_page_shift = ctz32(s->dump_info.page_size);
   1108
   1109    return (addr >> target_page_shift) - ARCH_PFN_OFFSET;
   1110}
   1111
   1112static uint64_t dump_pfn_to_paddr(DumpState *s, uint64_t pfn)
   1113{
   1114    int target_page_shift = ctz32(s->dump_info.page_size);
   1115
   1116    return (pfn + ARCH_PFN_OFFSET) << target_page_shift;
   1117}
   1118
   1119/*
   1120 * exam every page and return the page frame number and the address of the page.
   1121 * bufptr can be NULL. note: the blocks here is supposed to reflect guest-phys
   1122 * blocks, so block->target_start and block->target_end should be interal
   1123 * multiples of the target page size.
   1124 */
   1125static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
   1126                          uint8_t **bufptr, DumpState *s)
   1127{
   1128    GuestPhysBlock *block = *blockptr;
   1129    hwaddr addr, target_page_mask = ~((hwaddr)s->dump_info.page_size - 1);
   1130    uint8_t *buf;
   1131
   1132    /* block == NULL means the start of the iteration */
   1133    if (!block) {
   1134        block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
   1135        *blockptr = block;
   1136        assert((block->target_start & ~target_page_mask) == 0);
   1137        assert((block->target_end & ~target_page_mask) == 0);
   1138        *pfnptr = dump_paddr_to_pfn(s, block->target_start);
   1139        if (bufptr) {
   1140            *bufptr = block->host_addr;
   1141        }
   1142        return true;
   1143    }
   1144
   1145    *pfnptr = *pfnptr + 1;
   1146    addr = dump_pfn_to_paddr(s, *pfnptr);
   1147
   1148    if ((addr >= block->target_start) &&
   1149        (addr + s->dump_info.page_size <= block->target_end)) {
   1150        buf = block->host_addr + (addr - block->target_start);
   1151    } else {
   1152        /* the next page is in the next block */
   1153        block = QTAILQ_NEXT(block, next);
   1154        *blockptr = block;
   1155        if (!block) {
   1156            return false;
   1157        }
   1158        assert((block->target_start & ~target_page_mask) == 0);
   1159        assert((block->target_end & ~target_page_mask) == 0);
   1160        *pfnptr = dump_paddr_to_pfn(s, block->target_start);
   1161        buf = block->host_addr;
   1162    }
   1163
   1164    if (bufptr) {
   1165        *bufptr = buf;
   1166    }
   1167
   1168    return true;
   1169}
   1170
   1171static void write_dump_bitmap(DumpState *s, Error **errp)
   1172{
   1173    int ret = 0;
   1174    uint64_t last_pfn, pfn;
   1175    void *dump_bitmap_buf;
   1176    size_t num_dumpable;
   1177    GuestPhysBlock *block_iter = NULL;
   1178    size_t bitmap_bufsize = dump_bitmap_get_bufsize(s);
   1179    size_t bits_per_buf = bitmap_bufsize * CHAR_BIT;
   1180
   1181    /* dump_bitmap_buf is used to store dump_bitmap temporarily */
   1182    dump_bitmap_buf = g_malloc0(bitmap_bufsize);
   1183
   1184    num_dumpable = 0;
   1185    last_pfn = 0;
   1186
   1187    /*
   1188     * exam memory page by page, and set the bit in dump_bitmap corresponded
   1189     * to the existing page.
   1190     */
   1191    while (get_next_page(&block_iter, &pfn, NULL, s)) {
   1192        ret = set_dump_bitmap(last_pfn, pfn, true, dump_bitmap_buf, s);
   1193        if (ret < 0) {
   1194            error_setg(errp, "dump: failed to set dump_bitmap");
   1195            goto out;
   1196        }
   1197
   1198        last_pfn = pfn;
   1199        num_dumpable++;
   1200    }
   1201
   1202    /*
   1203     * set_dump_bitmap will always leave the recently set bit un-sync. Here we
   1204     * set the remaining bits from last_pfn to the end of the bitmap buffer to
   1205     * 0. With those set, the un-sync bit will be synchronized into the vmcore.
   1206     */
   1207    if (num_dumpable > 0) {
   1208        ret = set_dump_bitmap(last_pfn, last_pfn + bits_per_buf, false,
   1209                              dump_bitmap_buf, s);
   1210        if (ret < 0) {
   1211            error_setg(errp, "dump: failed to sync dump_bitmap");
   1212            goto out;
   1213        }
   1214    }
   1215
   1216    /* number of dumpable pages that will be dumped later */
   1217    s->num_dumpable = num_dumpable;
   1218
   1219out:
   1220    g_free(dump_bitmap_buf);
   1221}
   1222
   1223static void prepare_data_cache(DataCache *data_cache, DumpState *s,
   1224                               off_t offset)
   1225{
   1226    data_cache->fd = s->fd;
   1227    data_cache->data_size = 0;
   1228    data_cache->buf_size = 4 * dump_bitmap_get_bufsize(s);
   1229    data_cache->buf = g_malloc0(data_cache->buf_size);
   1230    data_cache->offset = offset;
   1231}
   1232
   1233static int write_cache(DataCache *dc, const void *buf, size_t size,
   1234                       bool flag_sync)
   1235{
   1236    /*
   1237     * dc->buf_size should not be less than size, otherwise dc will never be
   1238     * enough
   1239     */
   1240    assert(size <= dc->buf_size);
   1241
   1242    /*
   1243     * if flag_sync is set, synchronize data in dc->buf into vmcore.
   1244     * otherwise check if the space is enough for caching data in buf, if not,
   1245     * write the data in dc->buf to dc->fd and reset dc->buf
   1246     */
   1247    if ((!flag_sync && dc->data_size + size > dc->buf_size) ||
   1248        (flag_sync && dc->data_size > 0)) {
   1249        if (write_buffer(dc->fd, dc->offset, dc->buf, dc->data_size) < 0) {
   1250            return -1;
   1251        }
   1252
   1253        dc->offset += dc->data_size;
   1254        dc->data_size = 0;
   1255    }
   1256
   1257    if (!flag_sync) {
   1258        memcpy(dc->buf + dc->data_size, buf, size);
   1259        dc->data_size += size;
   1260    }
   1261
   1262    return 0;
   1263}
   1264
   1265static void free_data_cache(DataCache *data_cache)
   1266{
   1267    g_free(data_cache->buf);
   1268}
   1269
   1270static size_t get_len_buf_out(size_t page_size, uint32_t flag_compress)
   1271{
   1272    switch (flag_compress) {
   1273    case DUMP_DH_COMPRESSED_ZLIB:
   1274        return compressBound(page_size);
   1275
   1276    case DUMP_DH_COMPRESSED_LZO:
   1277        /*
   1278         * LZO will expand incompressible data by a little amount. Please check
   1279         * the following URL to see the expansion calculation:
   1280         * http://www.oberhumer.com/opensource/lzo/lzofaq.php
   1281         */
   1282        return page_size + page_size / 16 + 64 + 3;
   1283
   1284#ifdef CONFIG_SNAPPY
   1285    case DUMP_DH_COMPRESSED_SNAPPY:
   1286        return snappy_max_compressed_length(page_size);
   1287#endif
   1288    }
   1289    return 0;
   1290}
   1291
   1292/*
   1293 * check if the page is all 0
   1294 */
   1295static inline bool is_zero_page(const uint8_t *buf, size_t page_size)
   1296{
   1297    return buffer_is_zero(buf, page_size);
   1298}
   1299
   1300static void write_dump_pages(DumpState *s, Error **errp)
   1301{
   1302    int ret = 0;
   1303    DataCache page_desc, page_data;
   1304    size_t len_buf_out, size_out;
   1305#ifdef CONFIG_LZO
   1306    lzo_bytep wrkmem = NULL;
   1307#endif
   1308    uint8_t *buf_out = NULL;
   1309    off_t offset_desc, offset_data;
   1310    PageDescriptor pd, pd_zero;
   1311    uint8_t *buf;
   1312    GuestPhysBlock *block_iter = NULL;
   1313    uint64_t pfn_iter;
   1314
   1315    /* get offset of page_desc and page_data in dump file */
   1316    offset_desc = s->offset_page;
   1317    offset_data = offset_desc + sizeof(PageDescriptor) * s->num_dumpable;
   1318
   1319    prepare_data_cache(&page_desc, s, offset_desc);
   1320    prepare_data_cache(&page_data, s, offset_data);
   1321
   1322    /* prepare buffer to store compressed data */
   1323    len_buf_out = get_len_buf_out(s->dump_info.page_size, s->flag_compress);
   1324    assert(len_buf_out != 0);
   1325
   1326#ifdef CONFIG_LZO
   1327    wrkmem = g_malloc(LZO1X_1_MEM_COMPRESS);
   1328#endif
   1329
   1330    buf_out = g_malloc(len_buf_out);
   1331
   1332    /*
   1333     * init zero page's page_desc and page_data, because every zero page
   1334     * uses the same page_data
   1335     */
   1336    pd_zero.size = cpu_to_dump32(s, s->dump_info.page_size);
   1337    pd_zero.flags = cpu_to_dump32(s, 0);
   1338    pd_zero.offset = cpu_to_dump64(s, offset_data);
   1339    pd_zero.page_flags = cpu_to_dump64(s, 0);
   1340    buf = g_malloc0(s->dump_info.page_size);
   1341    ret = write_cache(&page_data, buf, s->dump_info.page_size, false);
   1342    g_free(buf);
   1343    if (ret < 0) {
   1344        error_setg(errp, "dump: failed to write page data (zero page)");
   1345        goto out;
   1346    }
   1347
   1348    offset_data += s->dump_info.page_size;
   1349
   1350    /*
   1351     * dump memory to vmcore page by page. zero page will all be resided in the
   1352     * first page of page section
   1353     */
   1354    while (get_next_page(&block_iter, &pfn_iter, &buf, s)) {
   1355        /* check zero page */
   1356        if (is_zero_page(buf, s->dump_info.page_size)) {
   1357            ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor),
   1358                              false);
   1359            if (ret < 0) {
   1360                error_setg(errp, "dump: failed to write page desc");
   1361                goto out;
   1362            }
   1363        } else {
   1364            /*
   1365             * not zero page, then:
   1366             * 1. compress the page
   1367             * 2. write the compressed page into the cache of page_data
   1368             * 3. get page desc of the compressed page and write it into the
   1369             *    cache of page_desc
   1370             *
   1371             * only one compression format will be used here, for
   1372             * s->flag_compress is set. But when compression fails to work,
   1373             * we fall back to save in plaintext.
   1374             */
   1375             size_out = len_buf_out;
   1376             if ((s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) &&
   1377                    (compress2(buf_out, (uLongf *)&size_out, buf,
   1378                               s->dump_info.page_size, Z_BEST_SPEED) == Z_OK) &&
   1379                    (size_out < s->dump_info.page_size)) {
   1380                pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_ZLIB);
   1381                pd.size  = cpu_to_dump32(s, size_out);
   1382
   1383                ret = write_cache(&page_data, buf_out, size_out, false);
   1384                if (ret < 0) {
   1385                    error_setg(errp, "dump: failed to write page data");
   1386                    goto out;
   1387                }
   1388#ifdef CONFIG_LZO
   1389            } else if ((s->flag_compress & DUMP_DH_COMPRESSED_LZO) &&
   1390                    (lzo1x_1_compress(buf, s->dump_info.page_size, buf_out,
   1391                    (lzo_uint *)&size_out, wrkmem) == LZO_E_OK) &&
   1392                    (size_out < s->dump_info.page_size)) {
   1393                pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_LZO);
   1394                pd.size  = cpu_to_dump32(s, size_out);
   1395
   1396                ret = write_cache(&page_data, buf_out, size_out, false);
   1397                if (ret < 0) {
   1398                    error_setg(errp, "dump: failed to write page data");
   1399                    goto out;
   1400                }
   1401#endif
   1402#ifdef CONFIG_SNAPPY
   1403            } else if ((s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) &&
   1404                    (snappy_compress((char *)buf, s->dump_info.page_size,
   1405                    (char *)buf_out, &size_out) == SNAPPY_OK) &&
   1406                    (size_out < s->dump_info.page_size)) {
   1407                pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_SNAPPY);
   1408                pd.size  = cpu_to_dump32(s, size_out);
   1409
   1410                ret = write_cache(&page_data, buf_out, size_out, false);
   1411                if (ret < 0) {
   1412                    error_setg(errp, "dump: failed to write page data");
   1413                    goto out;
   1414                }
   1415#endif
   1416            } else {
   1417                /*
   1418                 * fall back to save in plaintext, size_out should be
   1419                 * assigned the target's page size
   1420                 */
   1421                pd.flags = cpu_to_dump32(s, 0);
   1422                size_out = s->dump_info.page_size;
   1423                pd.size = cpu_to_dump32(s, size_out);
   1424
   1425                ret = write_cache(&page_data, buf,
   1426                                  s->dump_info.page_size, false);
   1427                if (ret < 0) {
   1428                    error_setg(errp, "dump: failed to write page data");
   1429                    goto out;
   1430                }
   1431            }
   1432
   1433            /* get and write page desc here */
   1434            pd.page_flags = cpu_to_dump64(s, 0);
   1435            pd.offset = cpu_to_dump64(s, offset_data);
   1436            offset_data += size_out;
   1437
   1438            ret = write_cache(&page_desc, &pd, sizeof(PageDescriptor), false);
   1439            if (ret < 0) {
   1440                error_setg(errp, "dump: failed to write page desc");
   1441                goto out;
   1442            }
   1443        }
   1444        s->written_size += s->dump_info.page_size;
   1445    }
   1446
   1447    ret = write_cache(&page_desc, NULL, 0, true);
   1448    if (ret < 0) {
   1449        error_setg(errp, "dump: failed to sync cache for page_desc");
   1450        goto out;
   1451    }
   1452    ret = write_cache(&page_data, NULL, 0, true);
   1453    if (ret < 0) {
   1454        error_setg(errp, "dump: failed to sync cache for page_data");
   1455        goto out;
   1456    }
   1457
   1458out:
   1459    free_data_cache(&page_desc);
   1460    free_data_cache(&page_data);
   1461
   1462#ifdef CONFIG_LZO
   1463    g_free(wrkmem);
   1464#endif
   1465
   1466    g_free(buf_out);
   1467}
   1468
   1469static void create_kdump_vmcore(DumpState *s, Error **errp)
   1470{
   1471    int ret;
   1472    Error *local_err = NULL;
   1473
   1474    /*
   1475     * the kdump-compressed format is:
   1476     *                                               File offset
   1477     *  +------------------------------------------+ 0x0
   1478     *  |    main header (struct disk_dump_header) |
   1479     *  |------------------------------------------+ block 1
   1480     *  |    sub header (struct kdump_sub_header)  |
   1481     *  |------------------------------------------+ block 2
   1482     *  |            1st-dump_bitmap               |
   1483     *  |------------------------------------------+ block 2 + X blocks
   1484     *  |            2nd-dump_bitmap               | (aligned by block)
   1485     *  |------------------------------------------+ block 2 + 2 * X blocks
   1486     *  |  page desc for pfn 0 (struct page_desc)  | (aligned by block)
   1487     *  |  page desc for pfn 1 (struct page_desc)  |
   1488     *  |                    :                     |
   1489     *  |------------------------------------------| (not aligned by block)
   1490     *  |         page data (pfn 0)                |
   1491     *  |         page data (pfn 1)                |
   1492     *  |                    :                     |
   1493     *  +------------------------------------------+
   1494     */
   1495
   1496    ret = write_start_flat_header(s->fd);
   1497    if (ret < 0) {
   1498        error_setg(errp, "dump: failed to write start flat header");
   1499        return;
   1500    }
   1501
   1502    write_dump_header(s, &local_err);
   1503    if (local_err) {
   1504        error_propagate(errp, local_err);
   1505        return;
   1506    }
   1507
   1508    write_dump_bitmap(s, &local_err);
   1509    if (local_err) {
   1510        error_propagate(errp, local_err);
   1511        return;
   1512    }
   1513
   1514    write_dump_pages(s, &local_err);
   1515    if (local_err) {
   1516        error_propagate(errp, local_err);
   1517        return;
   1518    }
   1519
   1520    ret = write_end_flat_header(s->fd);
   1521    if (ret < 0) {
   1522        error_setg(errp, "dump: failed to write end flat header");
   1523        return;
   1524    }
   1525}
   1526
   1527static ram_addr_t get_start_block(DumpState *s)
   1528{
   1529    GuestPhysBlock *block;
   1530
   1531    if (!s->has_filter) {
   1532        s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
   1533        return 0;
   1534    }
   1535
   1536    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
   1537        if (block->target_start >= s->begin + s->length ||
   1538            block->target_end <= s->begin) {
   1539            /* This block is out of the range */
   1540            continue;
   1541        }
   1542
   1543        s->next_block = block;
   1544        if (s->begin > block->target_start) {
   1545            s->start = s->begin - block->target_start;
   1546        } else {
   1547            s->start = 0;
   1548        }
   1549        return s->start;
   1550    }
   1551
   1552    return -1;
   1553}
   1554
   1555static void get_max_mapnr(DumpState *s)
   1556{
   1557    GuestPhysBlock *last_block;
   1558
   1559    last_block = QTAILQ_LAST(&s->guest_phys_blocks.head);
   1560    s->max_mapnr = dump_paddr_to_pfn(s, last_block->target_end);
   1561}
   1562
   1563static DumpState dump_state_global = { .status = DUMP_STATUS_NONE };
   1564
   1565static void dump_state_prepare(DumpState *s)
   1566{
   1567    /* zero the struct, setting status to active */
   1568    *s = (DumpState) { .status = DUMP_STATUS_ACTIVE };
   1569}
   1570
   1571bool dump_in_progress(void)
   1572{
   1573    DumpState *state = &dump_state_global;
   1574    return (qatomic_read(&state->status) == DUMP_STATUS_ACTIVE);
   1575}
   1576
   1577/* calculate total size of memory to be dumped (taking filter into
   1578 * acoount.) */
   1579static int64_t dump_calculate_size(DumpState *s)
   1580{
   1581    GuestPhysBlock *block;
   1582    int64_t size = 0, total = 0, left = 0, right = 0;
   1583
   1584    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
   1585        if (s->has_filter) {
   1586            /* calculate the overlapped region. */
   1587            left = MAX(s->begin, block->target_start);
   1588            right = MIN(s->begin + s->length, block->target_end);
   1589            size = right - left;
   1590            size = size > 0 ? size : 0;
   1591        } else {
   1592            /* count the whole region in */
   1593            size = (block->target_end - block->target_start);
   1594        }
   1595        total += size;
   1596    }
   1597
   1598    return total;
   1599}
   1600
   1601static void vmcoreinfo_update_phys_base(DumpState *s)
   1602{
   1603    uint64_t size, note_head_size, name_size, phys_base;
   1604    char **lines;
   1605    uint8_t *vmci;
   1606    size_t i;
   1607
   1608    if (!note_name_equal(s, s->guest_note, "VMCOREINFO")) {
   1609        return;
   1610    }
   1611
   1612    get_note_sizes(s, s->guest_note, &note_head_size, &name_size, &size);
   1613    note_head_size = ROUND_UP(note_head_size, 4);
   1614
   1615    vmci = s->guest_note + note_head_size + ROUND_UP(name_size, 4);
   1616    *(vmci + size) = '\0';
   1617
   1618    lines = g_strsplit((char *)vmci, "\n", -1);
   1619    for (i = 0; lines[i]; i++) {
   1620        const char *prefix = NULL;
   1621
   1622        if (s->dump_info.d_machine == EM_X86_64) {
   1623            prefix = "NUMBER(phys_base)=";
   1624        } else if (s->dump_info.d_machine == EM_AARCH64) {
   1625            prefix = "NUMBER(PHYS_OFFSET)=";
   1626        }
   1627
   1628        if (prefix && g_str_has_prefix(lines[i], prefix)) {
   1629            if (qemu_strtou64(lines[i] + strlen(prefix), NULL, 16,
   1630                              &phys_base) < 0) {
   1631                warn_report("Failed to read %s", prefix);
   1632            } else {
   1633                s->dump_info.phys_base = phys_base;
   1634            }
   1635            break;
   1636        }
   1637    }
   1638
   1639    g_strfreev(lines);
   1640}
   1641
   1642static void dump_init(DumpState *s, int fd, bool has_format,
   1643                      DumpGuestMemoryFormat format, bool paging, bool has_filter,
   1644                      int64_t begin, int64_t length, Error **errp)
   1645{
   1646    VMCoreInfoState *vmci = vmcoreinfo_find();
   1647    CPUState *cpu;
   1648    int nr_cpus;
   1649    Error *err = NULL;
   1650    int ret;
   1651
   1652    s->has_format = has_format;
   1653    s->format = format;
   1654    s->written_size = 0;
   1655
   1656    /* kdump-compressed is conflict with paging and filter */
   1657    if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
   1658        assert(!paging && !has_filter);
   1659    }
   1660
   1661    if (runstate_is_running()) {
   1662        vm_stop(RUN_STATE_SAVE_VM);
   1663        s->resume = true;
   1664    } else {
   1665        s->resume = false;
   1666    }
   1667
   1668    /* If we use KVM, we should synchronize the registers before we get dump
   1669     * info or physmap info.
   1670     */
   1671    cpu_synchronize_all_states();
   1672    nr_cpus = 0;
   1673    CPU_FOREACH(cpu) {
   1674        nr_cpus++;
   1675    }
   1676
   1677    s->fd = fd;
   1678    s->has_filter = has_filter;
   1679    s->begin = begin;
   1680    s->length = length;
   1681
   1682    memory_mapping_list_init(&s->list);
   1683
   1684    guest_phys_blocks_init(&s->guest_phys_blocks);
   1685    guest_phys_blocks_append(&s->guest_phys_blocks);
   1686    s->total_size = dump_calculate_size(s);
   1687#ifdef DEBUG_DUMP_GUEST_MEMORY
   1688    fprintf(stderr, "DUMP: total memory to dump: %lu\n", s->total_size);
   1689#endif
   1690
   1691    /* it does not make sense to dump non-existent memory */
   1692    if (!s->total_size) {
   1693        error_setg(errp, "dump: no guest memory to dump");
   1694        goto cleanup;
   1695    }
   1696
   1697    s->start = get_start_block(s);
   1698    if (s->start == -1) {
   1699        error_setg(errp, QERR_INVALID_PARAMETER, "begin");
   1700        goto cleanup;
   1701    }
   1702
   1703    /* get dump info: endian, class and architecture.
   1704     * If the target architecture is not supported, cpu_get_dump_info() will
   1705     * return -1.
   1706     */
   1707    ret = cpu_get_dump_info(&s->dump_info, &s->guest_phys_blocks);
   1708    if (ret < 0) {
   1709        error_setg(errp, QERR_UNSUPPORTED);
   1710        goto cleanup;
   1711    }
   1712
   1713    if (!s->dump_info.page_size) {
   1714        s->dump_info.page_size = TARGET_PAGE_SIZE;
   1715    }
   1716
   1717    s->note_size = cpu_get_note_size(s->dump_info.d_class,
   1718                                     s->dump_info.d_machine, nr_cpus);
   1719    if (s->note_size < 0) {
   1720        error_setg(errp, QERR_UNSUPPORTED);
   1721        goto cleanup;
   1722    }
   1723
   1724    /*
   1725     * The goal of this block is to (a) update the previously guessed
   1726     * phys_base, (b) copy the guest note out of the guest.
   1727     * Failure to do so is not fatal for dumping.
   1728     */
   1729    if (vmci) {
   1730        uint64_t addr, note_head_size, name_size, desc_size;
   1731        uint32_t size;
   1732        uint16_t format;
   1733
   1734        note_head_size = s->dump_info.d_class == ELFCLASS32 ?
   1735            sizeof(Elf32_Nhdr) : sizeof(Elf64_Nhdr);
   1736
   1737        format = le16_to_cpu(vmci->vmcoreinfo.guest_format);
   1738        size = le32_to_cpu(vmci->vmcoreinfo.size);
   1739        addr = le64_to_cpu(vmci->vmcoreinfo.paddr);
   1740        if (!vmci->has_vmcoreinfo) {
   1741            warn_report("guest note is not present");
   1742        } else if (size < note_head_size || size > MAX_GUEST_NOTE_SIZE) {
   1743            warn_report("guest note size is invalid: %" PRIu32, size);
   1744        } else if (format != FW_CFG_VMCOREINFO_FORMAT_ELF) {
   1745            warn_report("guest note format is unsupported: %" PRIu16, format);
   1746        } else {
   1747            s->guest_note = g_malloc(size + 1); /* +1 for adding \0 */
   1748            cpu_physical_memory_read(addr, s->guest_note, size);
   1749
   1750            get_note_sizes(s, s->guest_note, NULL, &name_size, &desc_size);
   1751            s->guest_note_size = ELF_NOTE_SIZE(note_head_size, name_size,
   1752                                               desc_size);
   1753            if (name_size > MAX_GUEST_NOTE_SIZE ||
   1754                desc_size > MAX_GUEST_NOTE_SIZE ||
   1755                s->guest_note_size > size) {
   1756                warn_report("Invalid guest note header");
   1757                g_free(s->guest_note);
   1758                s->guest_note = NULL;
   1759            } else {
   1760                vmcoreinfo_update_phys_base(s);
   1761                s->note_size += s->guest_note_size;
   1762            }
   1763        }
   1764    }
   1765
   1766    /* get memory mapping */
   1767    if (paging) {
   1768        qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err);
   1769        if (err != NULL) {
   1770            error_propagate(errp, err);
   1771            goto cleanup;
   1772        }
   1773    } else {
   1774        qemu_get_guest_simple_memory_mapping(&s->list, &s->guest_phys_blocks);
   1775    }
   1776
   1777    s->nr_cpus = nr_cpus;
   1778
   1779    get_max_mapnr(s);
   1780
   1781    uint64_t tmp;
   1782    tmp = DIV_ROUND_UP(DIV_ROUND_UP(s->max_mapnr, CHAR_BIT),
   1783                       s->dump_info.page_size);
   1784    s->len_dump_bitmap = tmp * s->dump_info.page_size;
   1785
   1786    /* init for kdump-compressed format */
   1787    if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
   1788        switch (format) {
   1789        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB:
   1790            s->flag_compress = DUMP_DH_COMPRESSED_ZLIB;
   1791            break;
   1792
   1793        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO:
   1794#ifdef CONFIG_LZO
   1795            if (lzo_init() != LZO_E_OK) {
   1796                error_setg(errp, "failed to initialize the LZO library");
   1797                goto cleanup;
   1798            }
   1799#endif
   1800            s->flag_compress = DUMP_DH_COMPRESSED_LZO;
   1801            break;
   1802
   1803        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY:
   1804            s->flag_compress = DUMP_DH_COMPRESSED_SNAPPY;
   1805            break;
   1806
   1807        default:
   1808            s->flag_compress = 0;
   1809        }
   1810
   1811        return;
   1812    }
   1813
   1814    if (s->has_filter) {
   1815        memory_mapping_filter(&s->list, s->begin, s->length);
   1816    }
   1817
   1818    /*
   1819     * calculate phdr_num
   1820     *
   1821     * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
   1822     */
   1823    s->phdr_num = 1; /* PT_NOTE */
   1824    if (s->list.num < UINT16_MAX - 2) {
   1825        s->phdr_num += s->list.num;
   1826        s->have_section = false;
   1827    } else {
   1828        s->have_section = true;
   1829        s->phdr_num = PN_XNUM;
   1830        s->sh_info = 1; /* PT_NOTE */
   1831
   1832        /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
   1833        if (s->list.num <= UINT32_MAX - 1) {
   1834            s->sh_info += s->list.num;
   1835        } else {
   1836            s->sh_info = UINT32_MAX;
   1837        }
   1838    }
   1839
   1840    if (s->dump_info.d_class == ELFCLASS64) {
   1841        if (s->have_section) {
   1842            s->memory_offset = sizeof(Elf64_Ehdr) +
   1843                               sizeof(Elf64_Phdr) * s->sh_info +
   1844                               sizeof(Elf64_Shdr) + s->note_size;
   1845        } else {
   1846            s->memory_offset = sizeof(Elf64_Ehdr) +
   1847                               sizeof(Elf64_Phdr) * s->phdr_num + s->note_size;
   1848        }
   1849    } else {
   1850        if (s->have_section) {
   1851            s->memory_offset = sizeof(Elf32_Ehdr) +
   1852                               sizeof(Elf32_Phdr) * s->sh_info +
   1853                               sizeof(Elf32_Shdr) + s->note_size;
   1854        } else {
   1855            s->memory_offset = sizeof(Elf32_Ehdr) +
   1856                               sizeof(Elf32_Phdr) * s->phdr_num + s->note_size;
   1857        }
   1858    }
   1859
   1860    return;
   1861
   1862cleanup:
   1863    dump_cleanup(s);
   1864}
   1865
   1866/* this operation might be time consuming. */
   1867static void dump_process(DumpState *s, Error **errp)
   1868{
   1869    Error *local_err = NULL;
   1870    DumpQueryResult *result = NULL;
   1871
   1872    if (s->has_format && s->format == DUMP_GUEST_MEMORY_FORMAT_WIN_DMP) {
   1873#ifdef TARGET_X86_64
   1874        create_win_dump(s, &local_err);
   1875#endif
   1876    } else if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
   1877        create_kdump_vmcore(s, &local_err);
   1878    } else {
   1879        create_vmcore(s, &local_err);
   1880    }
   1881
   1882    /* make sure status is written after written_size updates */
   1883    smp_wmb();
   1884    qatomic_set(&s->status,
   1885               (local_err ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED));
   1886
   1887    /* send DUMP_COMPLETED message (unconditionally) */
   1888    result = qmp_query_dump(NULL);
   1889    /* should never fail */
   1890    assert(result);
   1891    qapi_event_send_dump_completed(result, !!local_err, (local_err ?
   1892                                   error_get_pretty(local_err) : NULL));
   1893    qapi_free_DumpQueryResult(result);
   1894
   1895    error_propagate(errp, local_err);
   1896    dump_cleanup(s);
   1897}
   1898
   1899static void *dump_thread(void *data)
   1900{
   1901    DumpState *s = (DumpState *)data;
   1902    dump_process(s, NULL);
   1903    return NULL;
   1904}
   1905
   1906DumpQueryResult *qmp_query_dump(Error **errp)
   1907{
   1908    DumpQueryResult *result = g_new(DumpQueryResult, 1);
   1909    DumpState *state = &dump_state_global;
   1910    result->status = qatomic_read(&state->status);
   1911    /* make sure we are reading status and written_size in order */
   1912    smp_rmb();
   1913    result->completed = state->written_size;
   1914    result->total = state->total_size;
   1915    return result;
   1916}
   1917
   1918void qmp_dump_guest_memory(bool paging, const char *file,
   1919                           bool has_detach, bool detach,
   1920                           bool has_begin, int64_t begin, bool has_length,
   1921                           int64_t length, bool has_format,
   1922                           DumpGuestMemoryFormat format, Error **errp)
   1923{
   1924    const char *p;
   1925    int fd = -1;
   1926    DumpState *s;
   1927    Error *local_err = NULL;
   1928    bool detach_p = false;
   1929
   1930    if (runstate_check(RUN_STATE_INMIGRATE)) {
   1931        error_setg(errp, "Dump not allowed during incoming migration.");
   1932        return;
   1933    }
   1934
   1935    /* if there is a dump in background, we should wait until the dump
   1936     * finished */
   1937    if (dump_in_progress()) {
   1938        error_setg(errp, "There is a dump in process, please wait.");
   1939        return;
   1940    }
   1941
   1942    /*
   1943     * kdump-compressed format need the whole memory dumped, so paging or
   1944     * filter is not supported here.
   1945     */
   1946    if ((has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) &&
   1947        (paging || has_begin || has_length)) {
   1948        error_setg(errp, "kdump-compressed format doesn't support paging or "
   1949                         "filter");
   1950        return;
   1951    }
   1952    if (has_begin && !has_length) {
   1953        error_setg(errp, QERR_MISSING_PARAMETER, "length");
   1954        return;
   1955    }
   1956    if (!has_begin && has_length) {
   1957        error_setg(errp, QERR_MISSING_PARAMETER, "begin");
   1958        return;
   1959    }
   1960    if (has_detach) {
   1961        detach_p = detach;
   1962    }
   1963
   1964    /* check whether lzo/snappy is supported */
   1965#ifndef CONFIG_LZO
   1966    if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO) {
   1967        error_setg(errp, "kdump-lzo is not available now");
   1968        return;
   1969    }
   1970#endif
   1971
   1972#ifndef CONFIG_SNAPPY
   1973    if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY) {
   1974        error_setg(errp, "kdump-snappy is not available now");
   1975        return;
   1976    }
   1977#endif
   1978
   1979#ifndef TARGET_X86_64
   1980    if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_WIN_DMP) {
   1981        error_setg(errp, "Windows dump is only available for x86-64");
   1982        return;
   1983    }
   1984#endif
   1985
   1986#if !defined(WIN32)
   1987    if (strstart(file, "fd:", &p)) {
   1988        fd = monitor_get_fd(monitor_cur(), p, errp);
   1989        if (fd == -1) {
   1990            return;
   1991        }
   1992    }
   1993#endif
   1994
   1995    if  (strstart(file, "file:", &p)) {
   1996        fd = qemu_open_old(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
   1997        if (fd < 0) {
   1998            error_setg_file_open(errp, errno, p);
   1999            return;
   2000        }
   2001    }
   2002
   2003    if (fd == -1) {
   2004        error_setg(errp, QERR_INVALID_PARAMETER, "protocol");
   2005        return;
   2006    }
   2007
   2008    s = &dump_state_global;
   2009    dump_state_prepare(s);
   2010
   2011    dump_init(s, fd, has_format, format, paging, has_begin,
   2012              begin, length, &local_err);
   2013    if (local_err) {
   2014        error_propagate(errp, local_err);
   2015        qatomic_set(&s->status, DUMP_STATUS_FAILED);
   2016        return;
   2017    }
   2018
   2019    if (detach_p) {
   2020        /* detached dump */
   2021        s->detached = true;
   2022        qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
   2023                           s, QEMU_THREAD_DETACHED);
   2024    } else {
   2025        /* sync dump */
   2026        dump_process(s, errp);
   2027    }
   2028}
   2029
   2030DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp)
   2031{
   2032    DumpGuestMemoryCapability *cap =
   2033                                  g_malloc0(sizeof(DumpGuestMemoryCapability));
   2034    DumpGuestMemoryFormatList **tail = &cap->formats;
   2035
   2036    /* elf is always available */
   2037    QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_ELF);
   2038
   2039    /* kdump-zlib is always available */
   2040    QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB);
   2041
   2042    /* add new item if kdump-lzo is available */
   2043#ifdef CONFIG_LZO
   2044    QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO);
   2045#endif
   2046
   2047    /* add new item if kdump-snappy is available */
   2048#ifdef CONFIG_SNAPPY
   2049    QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY);
   2050#endif
   2051
   2052    /* Windows dump is available only if target is x86_64 */
   2053#ifdef TARGET_X86_64
   2054    QAPI_LIST_APPEND(tail, DUMP_GUEST_MEMORY_FORMAT_WIN_DMP);
   2055#endif
   2056
   2057    return cap;
   2058}