cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

dump-guest-memory.py (20723B)


      1"""
      2This python script adds a new gdb command, "dump-guest-memory". It
      3should be loaded with "source dump-guest-memory.py" at the (gdb)
      4prompt.
      5
      6Copyright (C) 2013, Red Hat, Inc.
      7
      8Authors:
      9   Laszlo Ersek <lersek@redhat.com>
     10   Janosch Frank <frankja@linux.vnet.ibm.com>
     11
     12This work is licensed under the terms of the GNU GPL, version 2 or later. See
     13the COPYING file in the top-level directory.
     14"""
     15
     16import ctypes
     17import struct
     18
     19try:
     20    UINTPTR_T = gdb.lookup_type("uintptr_t")
     21except Exception as inst:
     22    raise gdb.GdbError("Symbols must be loaded prior to sourcing dump-guest-memory.\n"
     23                       "Symbols may be loaded by 'attach'ing a QEMU process id or by "
     24                       "'load'ing a QEMU binary.")
     25
     26TARGET_PAGE_SIZE = 0x1000
     27TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
     28
     29# Special value for e_phnum. This indicates that the real number of
     30# program headers is too large to fit into e_phnum. Instead the real
     31# value is in the field sh_info of section 0.
     32PN_XNUM = 0xFFFF
     33
     34EV_CURRENT = 1
     35
     36ELFCLASS32 = 1
     37ELFCLASS64 = 2
     38
     39ELFDATA2LSB = 1
     40ELFDATA2MSB = 2
     41
     42ET_CORE = 4
     43
     44PT_LOAD = 1
     45PT_NOTE = 4
     46
     47EM_386 = 3
     48EM_PPC = 20
     49EM_PPC64 = 21
     50EM_S390 = 22
     51EM_AARCH = 183
     52EM_X86_64 = 62
     53
     54VMCOREINFO_FORMAT_ELF = 1
     55
     56def le16_to_cpu(val):
     57    return struct.unpack("<H", struct.pack("=H", val))[0]
     58
     59def le32_to_cpu(val):
     60    return struct.unpack("<I", struct.pack("=I", val))[0]
     61
     62def le64_to_cpu(val):
     63    return struct.unpack("<Q", struct.pack("=Q", val))[0]
     64
     65class ELF(object):
     66    """Representation of a ELF file."""
     67
     68    def __init__(self, arch):
     69        self.ehdr = None
     70        self.notes = []
     71        self.segments = []
     72        self.notes_size = 0
     73        self.endianness = None
     74        self.elfclass = ELFCLASS64
     75
     76        if arch == 'aarch64-le':
     77            self.endianness = ELFDATA2LSB
     78            self.elfclass = ELFCLASS64
     79            self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
     80            self.ehdr.e_machine = EM_AARCH
     81
     82        elif arch == 'aarch64-be':
     83            self.endianness = ELFDATA2MSB
     84            self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
     85            self.ehdr.e_machine = EM_AARCH
     86
     87        elif arch == 'X86_64':
     88            self.endianness = ELFDATA2LSB
     89            self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
     90            self.ehdr.e_machine = EM_X86_64
     91
     92        elif arch == '386':
     93            self.endianness = ELFDATA2LSB
     94            self.elfclass = ELFCLASS32
     95            self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
     96            self.ehdr.e_machine = EM_386
     97
     98        elif arch == 's390':
     99            self.endianness = ELFDATA2MSB
    100            self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
    101            self.ehdr.e_machine = EM_S390
    102
    103        elif arch == 'ppc64-le':
    104            self.endianness = ELFDATA2LSB
    105            self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
    106            self.ehdr.e_machine = EM_PPC64
    107
    108        elif arch == 'ppc64-be':
    109            self.endianness = ELFDATA2MSB
    110            self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
    111            self.ehdr.e_machine = EM_PPC64
    112
    113        else:
    114            raise gdb.GdbError("No valid arch type specified.\n"
    115                               "Currently supported types:\n"
    116                               "aarch64-be, aarch64-le, X86_64, 386, s390, "
    117                               "ppc64-be, ppc64-le")
    118
    119        self.add_segment(PT_NOTE, 0, 0)
    120
    121    def add_note(self, n_name, n_desc, n_type):
    122        """Adds a note to the ELF."""
    123
    124        note = get_arch_note(self.endianness, len(n_name), len(n_desc))
    125        note.n_namesz = len(n_name) + 1
    126        note.n_descsz = len(n_desc)
    127        note.n_name = n_name.encode()
    128        note.n_type = n_type
    129
    130        # Desc needs to be 4 byte aligned (although the 64bit spec
    131        # specifies 8 byte). When defining n_desc as uint32 it will be
    132        # automatically aligned but we need the memmove to copy the
    133        # string into it.
    134        ctypes.memmove(note.n_desc, n_desc.encode(), len(n_desc))
    135
    136        self.notes.append(note)
    137        self.segments[0].p_filesz += ctypes.sizeof(note)
    138        self.segments[0].p_memsz += ctypes.sizeof(note)
    139
    140
    141    def add_vmcoreinfo_note(self, vmcoreinfo):
    142        """Adds a vmcoreinfo note to the ELF dump."""
    143        # compute the header size, and copy that many bytes from the note
    144        header = get_arch_note(self.endianness, 0, 0)
    145        ctypes.memmove(ctypes.pointer(header),
    146                       vmcoreinfo, ctypes.sizeof(header))
    147        if header.n_descsz > 1 << 20:
    148            print('warning: invalid vmcoreinfo size')
    149            return
    150        # now get the full note
    151        note = get_arch_note(self.endianness,
    152                             header.n_namesz - 1, header.n_descsz)
    153        ctypes.memmove(ctypes.pointer(note), vmcoreinfo, ctypes.sizeof(note))
    154
    155        self.notes.append(note)
    156        self.segments[0].p_filesz += ctypes.sizeof(note)
    157        self.segments[0].p_memsz += ctypes.sizeof(note)
    158
    159    def add_segment(self, p_type, p_paddr, p_size):
    160        """Adds a segment to the elf."""
    161
    162        phdr = get_arch_phdr(self.endianness, self.elfclass)
    163        phdr.p_type = p_type
    164        phdr.p_paddr = p_paddr
    165        phdr.p_vaddr = p_paddr
    166        phdr.p_filesz = p_size
    167        phdr.p_memsz = p_size
    168        self.segments.append(phdr)
    169        self.ehdr.e_phnum += 1
    170
    171    def to_file(self, elf_file):
    172        """Writes all ELF structures to the passed file.
    173
    174        Structure:
    175        Ehdr
    176        Segment 0:PT_NOTE
    177        Segment 1:PT_LOAD
    178        Segment N:PT_LOAD
    179        Note    0..N
    180        Dump contents
    181        """
    182        elf_file.write(self.ehdr)
    183        off = ctypes.sizeof(self.ehdr) + \
    184              len(self.segments) * ctypes.sizeof(self.segments[0])
    185
    186        for phdr in self.segments:
    187            phdr.p_offset = off
    188            elf_file.write(phdr)
    189            off += phdr.p_filesz
    190
    191        for note in self.notes:
    192            elf_file.write(note)
    193
    194
    195def get_arch_note(endianness, len_name, len_desc):
    196    """Returns a Note class with the specified endianness."""
    197
    198    if endianness == ELFDATA2LSB:
    199        superclass = ctypes.LittleEndianStructure
    200    else:
    201        superclass = ctypes.BigEndianStructure
    202
    203    len_name = len_name + 1
    204
    205    class Note(superclass):
    206        """Represents an ELF note, includes the content."""
    207
    208        _fields_ = [("n_namesz", ctypes.c_uint32),
    209                    ("n_descsz", ctypes.c_uint32),
    210                    ("n_type", ctypes.c_uint32),
    211                    ("n_name", ctypes.c_char * len_name),
    212                    ("n_desc", ctypes.c_uint32 * ((len_desc + 3) // 4))]
    213    return Note()
    214
    215
    216class Ident(ctypes.Structure):
    217    """Represents the ELF ident array in the ehdr structure."""
    218
    219    _fields_ = [('ei_mag0', ctypes.c_ubyte),
    220                ('ei_mag1', ctypes.c_ubyte),
    221                ('ei_mag2', ctypes.c_ubyte),
    222                ('ei_mag3', ctypes.c_ubyte),
    223                ('ei_class', ctypes.c_ubyte),
    224                ('ei_data', ctypes.c_ubyte),
    225                ('ei_version', ctypes.c_ubyte),
    226                ('ei_osabi', ctypes.c_ubyte),
    227                ('ei_abiversion', ctypes.c_ubyte),
    228                ('ei_pad', ctypes.c_ubyte * 7)]
    229
    230    def __init__(self, endianness, elfclass):
    231        self.ei_mag0 = 0x7F
    232        self.ei_mag1 = ord('E')
    233        self.ei_mag2 = ord('L')
    234        self.ei_mag3 = ord('F')
    235        self.ei_class = elfclass
    236        self.ei_data = endianness
    237        self.ei_version = EV_CURRENT
    238
    239
    240def get_arch_ehdr(endianness, elfclass):
    241    """Returns a EHDR64 class with the specified endianness."""
    242
    243    if endianness == ELFDATA2LSB:
    244        superclass = ctypes.LittleEndianStructure
    245    else:
    246        superclass = ctypes.BigEndianStructure
    247
    248    class EHDR64(superclass):
    249        """Represents the 64 bit ELF header struct."""
    250
    251        _fields_ = [('e_ident', Ident),
    252                    ('e_type', ctypes.c_uint16),
    253                    ('e_machine', ctypes.c_uint16),
    254                    ('e_version', ctypes.c_uint32),
    255                    ('e_entry', ctypes.c_uint64),
    256                    ('e_phoff', ctypes.c_uint64),
    257                    ('e_shoff', ctypes.c_uint64),
    258                    ('e_flags', ctypes.c_uint32),
    259                    ('e_ehsize', ctypes.c_uint16),
    260                    ('e_phentsize', ctypes.c_uint16),
    261                    ('e_phnum', ctypes.c_uint16),
    262                    ('e_shentsize', ctypes.c_uint16),
    263                    ('e_shnum', ctypes.c_uint16),
    264                    ('e_shstrndx', ctypes.c_uint16)]
    265
    266        def __init__(self):
    267            super(superclass, self).__init__()
    268            self.e_ident = Ident(endianness, elfclass)
    269            self.e_type = ET_CORE
    270            self.e_version = EV_CURRENT
    271            self.e_ehsize = ctypes.sizeof(self)
    272            self.e_phoff = ctypes.sizeof(self)
    273            self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianness, elfclass))
    274            self.e_phnum = 0
    275
    276
    277    class EHDR32(superclass):
    278        """Represents the 32 bit ELF header struct."""
    279
    280        _fields_ = [('e_ident', Ident),
    281                    ('e_type', ctypes.c_uint16),
    282                    ('e_machine', ctypes.c_uint16),
    283                    ('e_version', ctypes.c_uint32),
    284                    ('e_entry', ctypes.c_uint32),
    285                    ('e_phoff', ctypes.c_uint32),
    286                    ('e_shoff', ctypes.c_uint32),
    287                    ('e_flags', ctypes.c_uint32),
    288                    ('e_ehsize', ctypes.c_uint16),
    289                    ('e_phentsize', ctypes.c_uint16),
    290                    ('e_phnum', ctypes.c_uint16),
    291                    ('e_shentsize', ctypes.c_uint16),
    292                    ('e_shnum', ctypes.c_uint16),
    293                    ('e_shstrndx', ctypes.c_uint16)]
    294
    295        def __init__(self):
    296            super(superclass, self).__init__()
    297            self.e_ident = Ident(endianness, elfclass)
    298            self.e_type = ET_CORE
    299            self.e_version = EV_CURRENT
    300            self.e_ehsize = ctypes.sizeof(self)
    301            self.e_phoff = ctypes.sizeof(self)
    302            self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianness, elfclass))
    303            self.e_phnum = 0
    304
    305    # End get_arch_ehdr
    306    if elfclass == ELFCLASS64:
    307        return EHDR64()
    308    else:
    309        return EHDR32()
    310
    311
    312def get_arch_phdr(endianness, elfclass):
    313    """Returns a 32 or 64 bit PHDR class with the specified endianness."""
    314
    315    if endianness == ELFDATA2LSB:
    316        superclass = ctypes.LittleEndianStructure
    317    else:
    318        superclass = ctypes.BigEndianStructure
    319
    320    class PHDR64(superclass):
    321        """Represents the 64 bit ELF program header struct."""
    322
    323        _fields_ = [('p_type', ctypes.c_uint32),
    324                    ('p_flags', ctypes.c_uint32),
    325                    ('p_offset', ctypes.c_uint64),
    326                    ('p_vaddr', ctypes.c_uint64),
    327                    ('p_paddr', ctypes.c_uint64),
    328                    ('p_filesz', ctypes.c_uint64),
    329                    ('p_memsz', ctypes.c_uint64),
    330                    ('p_align', ctypes.c_uint64)]
    331
    332    class PHDR32(superclass):
    333        """Represents the 32 bit ELF program header struct."""
    334
    335        _fields_ = [('p_type', ctypes.c_uint32),
    336                    ('p_offset', ctypes.c_uint32),
    337                    ('p_vaddr', ctypes.c_uint32),
    338                    ('p_paddr', ctypes.c_uint32),
    339                    ('p_filesz', ctypes.c_uint32),
    340                    ('p_memsz', ctypes.c_uint32),
    341                    ('p_flags', ctypes.c_uint32),
    342                    ('p_align', ctypes.c_uint32)]
    343
    344    # End get_arch_phdr
    345    if elfclass == ELFCLASS64:
    346        return PHDR64()
    347    else:
    348        return PHDR32()
    349
    350
    351def int128_get64(val):
    352    """Returns low 64bit part of Int128 struct."""
    353
    354    try:
    355        assert val["hi"] == 0
    356        return val["lo"]
    357    except gdb.error:
    358        u64t = gdb.lookup_type('uint64_t').array(2)
    359        u64 = val.cast(u64t)
    360        if sys.byteorder == 'little':
    361            assert u64[1] == 0
    362            return u64[0]
    363        else:
    364            assert u64[0] == 0
    365            return u64[1]
    366
    367
    368def qlist_foreach(head, field_str):
    369    """Generator for qlists."""
    370
    371    var_p = head["lh_first"]
    372    while var_p != 0:
    373        var = var_p.dereference()
    374        var_p = var[field_str]["le_next"]
    375        yield var
    376
    377
    378def qemu_map_ram_ptr(block, offset):
    379    """Returns qemu vaddr for given guest physical address."""
    380
    381    return block["host"] + offset
    382
    383
    384def memory_region_get_ram_ptr(memory_region):
    385    if memory_region["alias"] != 0:
    386        return (memory_region_get_ram_ptr(memory_region["alias"].dereference())
    387                + memory_region["alias_offset"])
    388
    389    return qemu_map_ram_ptr(memory_region["ram_block"], 0)
    390
    391
    392def get_guest_phys_blocks():
    393    """Returns a list of ram blocks.
    394
    395    Each block entry contains:
    396    'target_start': guest block phys start address
    397    'target_end':   guest block phys end address
    398    'host_addr':    qemu vaddr of the block's start
    399    """
    400
    401    guest_phys_blocks = []
    402
    403    print("guest RAM blocks:")
    404    print("target_start     target_end       host_addr        message "
    405          "count")
    406    print("---------------- ---------------- ---------------- ------- "
    407          "-----")
    408
    409    current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
    410    current_map = current_map_p.dereference()
    411
    412    # Conversion to int is needed for python 3
    413    # compatibility. Otherwise range doesn't cast the value itself and
    414    # breaks.
    415    for cur in range(int(current_map["nr"])):
    416        flat_range = (current_map["ranges"] + cur).dereference()
    417        memory_region = flat_range["mr"].dereference()
    418
    419        # we only care about RAM
    420        if (not memory_region["ram"] or
    421            memory_region["ram_device"] or
    422            memory_region["nonvolatile"]):
    423            continue
    424
    425        section_size = int128_get64(flat_range["addr"]["size"])
    426        target_start = int128_get64(flat_range["addr"]["start"])
    427        target_end = target_start + section_size
    428        host_addr = (memory_region_get_ram_ptr(memory_region)
    429                     + flat_range["offset_in_region"])
    430        predecessor = None
    431
    432        # find continuity in guest physical address space
    433        if len(guest_phys_blocks) > 0:
    434            predecessor = guest_phys_blocks[-1]
    435            predecessor_size = (predecessor["target_end"] -
    436                                predecessor["target_start"])
    437
    438            # the memory API guarantees monotonically increasing
    439            # traversal
    440            assert predecessor["target_end"] <= target_start
    441
    442            # we want continuity in both guest-physical and
    443            # host-virtual memory
    444            if (predecessor["target_end"] < target_start or
    445                predecessor["host_addr"] + predecessor_size != host_addr):
    446                predecessor = None
    447
    448        if predecessor is None:
    449            # isolated mapping, add it to the list
    450            guest_phys_blocks.append({"target_start": target_start,
    451                                      "target_end":   target_end,
    452                                      "host_addr":    host_addr})
    453            message = "added"
    454        else:
    455            # expand predecessor until @target_end; predecessor's
    456            # start doesn't change
    457            predecessor["target_end"] = target_end
    458            message = "joined"
    459
    460        print("%016x %016x %016x %-7s %5u" %
    461              (target_start, target_end, host_addr.cast(UINTPTR_T),
    462               message, len(guest_phys_blocks)))
    463
    464    return guest_phys_blocks
    465
    466
    467# The leading docstring doesn't have idiomatic Python formatting. It is
    468# printed by gdb's "help" command (the first line is printed in the
    469# "help data" summary), and it should match how other help texts look in
    470# gdb.
    471class DumpGuestMemory(gdb.Command):
    472    """Extract guest vmcore from qemu process coredump.
    473
    474The two required arguments are FILE and ARCH:
    475FILE identifies the target file to write the guest vmcore to.
    476ARCH specifies the architecture for which the core will be generated.
    477
    478This GDB command reimplements the dump-guest-memory QMP command in
    479python, using the representation of guest memory as captured in the qemu
    480coredump. The qemu process that has been dumped must have had the
    481command line option "-machine dump-guest-core=on" which is the default.
    482
    483For simplicity, the "paging", "begin" and "end" parameters of the QMP
    484command are not supported -- no attempt is made to get the guest's
    485internal paging structures (ie. paging=false is hard-wired), and guest
    486memory is always fully dumped.
    487
    488Currently aarch64-be, aarch64-le, X86_64, 386, s390, ppc64-be,
    489ppc64-le guests are supported.
    490
    491The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are
    492not written to the vmcore. Preparing these would require context that is
    493only present in the KVM host kernel module when the guest is alive. A
    494fake ELF note is written instead, only to keep the ELF parser of "crash"
    495happy.
    496
    497Dependent on how busted the qemu process was at the time of the
    498coredump, this command might produce unpredictable results. If qemu
    499deliberately called abort(), or it was dumped in response to a signal at
    500a halfway fortunate point, then its coredump should be in reasonable
    501shape and this command should mostly work."""
    502
    503    def __init__(self):
    504        super(DumpGuestMemory, self).__init__("dump-guest-memory",
    505                                              gdb.COMMAND_DATA,
    506                                              gdb.COMPLETE_FILENAME)
    507        self.elf = None
    508        self.guest_phys_blocks = None
    509
    510    def dump_init(self, vmcore):
    511        """Prepares and writes ELF structures to core file."""
    512
    513        # Needed to make crash happy, data for more useful notes is
    514        # not available in a qemu core.
    515        self.elf.add_note("NONE", "EMPTY", 0)
    516
    517        # We should never reach PN_XNUM for paging=false dumps,
    518        # there's just a handful of discontiguous ranges after
    519        # merging.
    520        # The constant is needed to account for the PT_NOTE segment.
    521        phdr_num = len(self.guest_phys_blocks) + 1
    522        assert phdr_num < PN_XNUM
    523
    524        for block in self.guest_phys_blocks:
    525            block_size = block["target_end"] - block["target_start"]
    526            self.elf.add_segment(PT_LOAD, block["target_start"], block_size)
    527
    528        self.elf.to_file(vmcore)
    529
    530    def dump_iterate(self, vmcore):
    531        """Writes guest core to file."""
    532
    533        qemu_core = gdb.inferiors()[0]
    534        for block in self.guest_phys_blocks:
    535            cur = block["host_addr"]
    536            left = block["target_end"] - block["target_start"]
    537            print("dumping range at %016x for length %016x" %
    538                  (cur.cast(UINTPTR_T), left))
    539
    540            while left > 0:
    541                chunk_size = min(TARGET_PAGE_SIZE, left)
    542                chunk = qemu_core.read_memory(cur, chunk_size)
    543                vmcore.write(chunk)
    544                cur += chunk_size
    545                left -= chunk_size
    546
    547    def phys_memory_read(self, addr, size):
    548        qemu_core = gdb.inferiors()[0]
    549        for block in self.guest_phys_blocks:
    550            if block["target_start"] <= addr \
    551               and addr + size <= block["target_end"]:
    552                haddr = block["host_addr"] + (addr - block["target_start"])
    553                return qemu_core.read_memory(haddr, size)
    554        return None
    555
    556    def add_vmcoreinfo(self):
    557        if gdb.lookup_symbol("vmcoreinfo_realize")[0] is None:
    558            return
    559        vmci = 'vmcoreinfo_realize::vmcoreinfo_state'
    560        if not gdb.parse_and_eval("%s" % vmci) \
    561           or not gdb.parse_and_eval("(%s)->has_vmcoreinfo" % vmci):
    562            return
    563
    564        fmt = gdb.parse_and_eval("(%s)->vmcoreinfo.guest_format" % vmci)
    565        addr = gdb.parse_and_eval("(%s)->vmcoreinfo.paddr" % vmci)
    566        size = gdb.parse_and_eval("(%s)->vmcoreinfo.size" % vmci)
    567
    568        fmt = le16_to_cpu(fmt)
    569        addr = le64_to_cpu(addr)
    570        size = le32_to_cpu(size)
    571
    572        if fmt != VMCOREINFO_FORMAT_ELF:
    573            return
    574
    575        vmcoreinfo = self.phys_memory_read(addr, size)
    576        if vmcoreinfo:
    577            self.elf.add_vmcoreinfo_note(bytes(vmcoreinfo))
    578
    579    def invoke(self, args, from_tty):
    580        """Handles command invocation from gdb."""
    581
    582        # Unwittingly pressing the Enter key after the command should
    583        # not dump the same multi-gig coredump to the same file.
    584        self.dont_repeat()
    585
    586        argv = gdb.string_to_argv(args)
    587        if len(argv) != 2:
    588            raise gdb.GdbError("usage: dump-guest-memory FILE ARCH")
    589
    590        self.elf = ELF(argv[1])
    591        self.guest_phys_blocks = get_guest_phys_blocks()
    592        self.add_vmcoreinfo()
    593
    594        with open(argv[0], "wb") as vmcore:
    595            self.dump_init(vmcore)
    596            self.dump_iterate(vmcore)
    597
    598DumpGuestMemory()