cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

decompress_unxz.c (11196B)


      1/*
      2 * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd
      3 *
      4 * Author: Lasse Collin <lasse.collin@tukaani.org>
      5 *
      6 * This file has been put into the public domain.
      7 * You can do whatever you want with this file.
      8 */
      9
     10/*
     11 * Important notes about in-place decompression
     12 *
     13 * At least on x86, the kernel is decompressed in place: the compressed data
     14 * is placed to the end of the output buffer, and the decompressor overwrites
     15 * most of the compressed data. There must be enough safety margin to
     16 * guarantee that the write position is always behind the read position.
     17 *
     18 * The safety margin for XZ with LZMA2 or BCJ+LZMA2 is calculated below.
     19 * Note that the margin with XZ is bigger than with Deflate (gzip)!
     20 *
     21 * The worst case for in-place decompression is that the beginning of
     22 * the file is compressed extremely well, and the rest of the file is
     23 * incompressible. Thus, we must look for worst-case expansion when the
     24 * compressor is encoding incompressible data.
     25 *
     26 * The structure of the .xz file in case of a compressed kernel is as follows.
     27 * Sizes (as bytes) of the fields are in parenthesis.
     28 *
     29 *    Stream Header (12)
     30 *    Block Header:
     31 *      Block Header (8-12)
     32 *      Compressed Data (N)
     33 *      Block Padding (0-3)
     34 *      CRC32 (4)
     35 *    Index (8-20)
     36 *    Stream Footer (12)
     37 *
     38 * Normally there is exactly one Block, but let's assume that there are
     39 * 2-4 Blocks just in case. Because Stream Header and also Block Header
     40 * of the first Block don't make the decompressor produce any uncompressed
     41 * data, we can ignore them from our calculations. Block Headers of possible
     42 * additional Blocks have to be taken into account still. With these
     43 * assumptions, it is safe to assume that the total header overhead is
     44 * less than 128 bytes.
     45 *
     46 * Compressed Data contains LZMA2 or BCJ+LZMA2 encoded data. Since BCJ
     47 * doesn't change the size of the data, it is enough to calculate the
     48 * safety margin for LZMA2.
     49 *
     50 * LZMA2 stores the data in chunks. Each chunk has a header whose size is
     51 * a maximum of 6 bytes, but to get round 2^n numbers, let's assume that
     52 * the maximum chunk header size is 8 bytes. After the chunk header, there
     53 * may be up to 64 KiB of actual payload in the chunk. Often the payload is
     54 * quite a bit smaller though; to be safe, let's assume that an average
     55 * chunk has only 32 KiB of payload.
     56 *
     57 * The maximum uncompressed size of the payload is 2 MiB. The minimum
     58 * uncompressed size of the payload is in practice never less than the
     59 * payload size itself. The LZMA2 format would allow uncompressed size
     60 * to be less than the payload size, but no sane compressor creates such
     61 * files. LZMA2 supports storing incompressible data in uncompressed form,
     62 * so there's never a need to create payloads whose uncompressed size is
     63 * smaller than the compressed size.
     64 *
     65 * The assumption, that the uncompressed size of the payload is never
     66 * smaller than the payload itself, is valid only when talking about
     67 * the payload as a whole. It is possible that the payload has parts where
     68 * the decompressor consumes more input than it produces output. Calculating
     69 * the worst case for this would be tricky. Instead of trying to do that,
     70 * let's simply make sure that the decompressor never overwrites any bytes
     71 * of the payload which it is currently reading.
     72 *
     73 * Now we have enough information to calculate the safety margin. We need
     74 *   - 128 bytes for the .xz file format headers;
     75 *   - 8 bytes per every 32 KiB of uncompressed size (one LZMA2 chunk header
     76 *     per chunk, each chunk having average payload size of 32 KiB); and
     77 *   - 64 KiB (biggest possible LZMA2 chunk payload size) to make sure that
     78 *     the decompressor never overwrites anything from the LZMA2 chunk
     79 *     payload it is currently reading.
     80 *
     81 * We get the following formula:
     82 *
     83 *    safety_margin = 128 + uncompressed_size * 8 / 32768 + 65536
     84 *                  = 128 + (uncompressed_size >> 12) + 65536
     85 *
     86 * For comparison, according to arch/x86/boot/compressed/misc.c, the
     87 * equivalent formula for Deflate is this:
     88 *
     89 *    safety_margin = 18 + (uncompressed_size >> 12) + 32768
     90 *
     91 * Thus, when updating Deflate-only in-place kernel decompressor to
     92 * support XZ, the fixed overhead has to be increased from 18+32768 bytes
     93 * to 128+65536 bytes.
     94 */
     95
     96/*
     97 * STATIC is defined to "static" if we are being built for kernel
     98 * decompression (pre-boot code). <linux/decompress/mm.h> will define
     99 * STATIC to empty if it wasn't already defined. Since we will need to
    100 * know later if we are being used for kernel decompression, we define
    101 * XZ_PREBOOT here.
    102 */
    103#ifdef STATIC
    104#	define XZ_PREBOOT
    105#endif
    106#ifdef __KERNEL__
    107#	include <linux/decompress/mm.h>
    108#endif
    109#define XZ_EXTERN STATIC
    110
    111#ifndef XZ_PREBOOT
    112#	include <linux/slab.h>
    113#	include <linux/xz.h>
    114#else
    115/*
    116 * Use the internal CRC32 code instead of kernel's CRC32 module, which
    117 * is not available in early phase of booting.
    118 */
    119#define XZ_INTERNAL_CRC32 1
    120
    121/*
    122 * For boot time use, we enable only the BCJ filter of the current
    123 * architecture or none if no BCJ filter is available for the architecture.
    124 */
    125#ifdef CONFIG_X86
    126#	define XZ_DEC_X86
    127#endif
    128#ifdef CONFIG_PPC
    129#	define XZ_DEC_POWERPC
    130#endif
    131#ifdef CONFIG_ARM
    132#	define XZ_DEC_ARM
    133#endif
    134#ifdef CONFIG_IA64
    135#	define XZ_DEC_IA64
    136#endif
    137#ifdef CONFIG_SPARC
    138#	define XZ_DEC_SPARC
    139#endif
    140
    141/*
    142 * This will get the basic headers so that memeq() and others
    143 * can be defined.
    144 */
    145#include "xz/xz_private.h"
    146
    147/*
    148 * Replace the normal allocation functions with the versions from
    149 * <linux/decompress/mm.h>. vfree() needs to support vfree(NULL)
    150 * when XZ_DYNALLOC is used, but the pre-boot free() doesn't support it.
    151 * Workaround it here because the other decompressors don't need it.
    152 */
    153#undef kmalloc
    154#undef kfree
    155#undef vmalloc
    156#undef vfree
    157#define kmalloc(size, flags) malloc(size)
    158#define kfree(ptr) free(ptr)
    159#define vmalloc(size) malloc(size)
    160#define vfree(ptr) do { if (ptr != NULL) free(ptr); } while (0)
    161
    162/*
    163 * FIXME: Not all basic memory functions are provided in architecture-specific
    164 * files (yet). We define our own versions here for now, but this should be
    165 * only a temporary solution.
    166 *
    167 * memeq and memzero are not used much and any remotely sane implementation
    168 * is fast enough. memcpy/memmove speed matters in multi-call mode, but
    169 * the kernel image is decompressed in single-call mode, in which only
    170 * memmove speed can matter and only if there is a lot of incompressible data
    171 * (LZMA2 stores incompressible chunks in uncompressed form). Thus, the
    172 * functions below should just be kept small; it's probably not worth
    173 * optimizing for speed.
    174 */
    175
    176#ifndef memeq
    177static bool memeq(const void *a, const void *b, size_t size)
    178{
    179	const uint8_t *x = a;
    180	const uint8_t *y = b;
    181	size_t i;
    182
    183	for (i = 0; i < size; ++i)
    184		if (x[i] != y[i])
    185			return false;
    186
    187	return true;
    188}
    189#endif
    190
    191#ifndef memzero
    192static void memzero(void *buf, size_t size)
    193{
    194	uint8_t *b = buf;
    195	uint8_t *e = b + size;
    196
    197	while (b != e)
    198		*b++ = '\0';
    199}
    200#endif
    201
    202#ifndef memmove
    203/* Not static to avoid a conflict with the prototype in the Linux headers. */
    204void *memmove(void *dest, const void *src, size_t size)
    205{
    206	uint8_t *d = dest;
    207	const uint8_t *s = src;
    208	size_t i;
    209
    210	if (d < s) {
    211		for (i = 0; i < size; ++i)
    212			d[i] = s[i];
    213	} else if (d > s) {
    214		i = size;
    215		while (i-- > 0)
    216			d[i] = s[i];
    217	}
    218
    219	return dest;
    220}
    221#endif
    222
    223/*
    224 * Since we need memmove anyway, would use it as memcpy too.
    225 * Commented out for now to avoid breaking things.
    226 */
    227/*
    228#ifndef memcpy
    229#	define memcpy memmove
    230#endif
    231*/
    232
    233#include "xz/xz_crc32.c"
    234#include "xz/xz_dec_stream.c"
    235#include "xz/xz_dec_lzma2.c"
    236#include "xz/xz_dec_bcj.c"
    237
    238#endif /* XZ_PREBOOT */
    239
    240/* Size of the input and output buffers in multi-call mode */
    241#define XZ_IOBUF_SIZE 4096
    242
    243/*
    244 * This function implements the API defined in <linux/decompress/generic.h>.
    245 *
    246 * This wrapper will automatically choose single-call or multi-call mode
    247 * of the native XZ decoder API. The single-call mode can be used only when
    248 * both input and output buffers are available as a single chunk, i.e. when
    249 * fill() and flush() won't be used.
    250 */
    251STATIC int INIT unxz(unsigned char *in, long in_size,
    252		     long (*fill)(void *dest, unsigned long size),
    253		     long (*flush)(void *src, unsigned long size),
    254		     unsigned char *out, long *in_used,
    255		     void (*error)(char *x))
    256{
    257	struct xz_buf b;
    258	struct xz_dec *s;
    259	enum xz_ret ret;
    260	bool must_free_in = false;
    261
    262#if XZ_INTERNAL_CRC32
    263	xz_crc32_init();
    264#endif
    265
    266	if (in_used != NULL)
    267		*in_used = 0;
    268
    269	if (fill == NULL && flush == NULL)
    270		s = xz_dec_init(XZ_SINGLE, 0);
    271	else
    272		s = xz_dec_init(XZ_DYNALLOC, (uint32_t)-1);
    273
    274	if (s == NULL)
    275		goto error_alloc_state;
    276
    277	if (flush == NULL) {
    278		b.out = out;
    279		b.out_size = (size_t)-1;
    280	} else {
    281		b.out_size = XZ_IOBUF_SIZE;
    282		b.out = malloc(XZ_IOBUF_SIZE);
    283		if (b.out == NULL)
    284			goto error_alloc_out;
    285	}
    286
    287	if (in == NULL) {
    288		must_free_in = true;
    289		in = malloc(XZ_IOBUF_SIZE);
    290		if (in == NULL)
    291			goto error_alloc_in;
    292	}
    293
    294	b.in = in;
    295	b.in_pos = 0;
    296	b.in_size = in_size;
    297	b.out_pos = 0;
    298
    299	if (fill == NULL && flush == NULL) {
    300		ret = xz_dec_run(s, &b);
    301	} else {
    302		do {
    303			if (b.in_pos == b.in_size && fill != NULL) {
    304				if (in_used != NULL)
    305					*in_used += b.in_pos;
    306
    307				b.in_pos = 0;
    308
    309				in_size = fill(in, XZ_IOBUF_SIZE);
    310				if (in_size < 0) {
    311					/*
    312					 * This isn't an optimal error code
    313					 * but it probably isn't worth making
    314					 * a new one either.
    315					 */
    316					ret = XZ_BUF_ERROR;
    317					break;
    318				}
    319
    320				b.in_size = in_size;
    321			}
    322
    323			ret = xz_dec_run(s, &b);
    324
    325			if (flush != NULL && (b.out_pos == b.out_size
    326					|| (ret != XZ_OK && b.out_pos > 0))) {
    327				/*
    328				 * Setting ret here may hide an error
    329				 * returned by xz_dec_run(), but probably
    330				 * it's not too bad.
    331				 */
    332				if (flush(b.out, b.out_pos) != (long)b.out_pos)
    333					ret = XZ_BUF_ERROR;
    334
    335				b.out_pos = 0;
    336			}
    337		} while (ret == XZ_OK);
    338
    339		if (must_free_in)
    340			free(in);
    341
    342		if (flush != NULL)
    343			free(b.out);
    344	}
    345
    346	if (in_used != NULL)
    347		*in_used += b.in_pos;
    348
    349	xz_dec_end(s);
    350
    351	switch (ret) {
    352	case XZ_STREAM_END:
    353		return 0;
    354
    355	case XZ_MEM_ERROR:
    356		/* This can occur only in multi-call mode. */
    357		error("XZ decompressor ran out of memory");
    358		break;
    359
    360	case XZ_FORMAT_ERROR:
    361		error("Input is not in the XZ format (wrong magic bytes)");
    362		break;
    363
    364	case XZ_OPTIONS_ERROR:
    365		error("Input was encoded with settings that are not "
    366				"supported by this XZ decoder");
    367		break;
    368
    369	case XZ_DATA_ERROR:
    370	case XZ_BUF_ERROR:
    371		error("XZ-compressed data is corrupt");
    372		break;
    373
    374	default:
    375		error("Bug in the XZ decompressor");
    376		break;
    377	}
    378
    379	return -1;
    380
    381error_alloc_in:
    382	if (flush != NULL)
    383		free(b.out);
    384
    385error_alloc_out:
    386	xz_dec_end(s);
    387
    388error_alloc_state:
    389	error("XZ decompressor ran out of memory");
    390	return -1;
    391}
    392
    393/*
    394 * This macro is used by architecture-specific files to decompress
    395 * the kernel image.
    396 */
    397#ifdef XZ_PREBOOT
    398STATIC int INIT __decompress(unsigned char *buf, long len,
    399			   long (*fill)(void*, unsigned long),
    400			   long (*flush)(void*, unsigned long),
    401			   unsigned char *out_buf, long olen,
    402			   long *pos,
    403			   void (*error)(char *x))
    404{
    405	return unxz(buf, len, fill, flush, out_buf, pos, error);
    406}
    407#endif