cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

lzo1x_compress.c (9637B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 *  LZO1X Compressor from LZO
      4 *
      5 *  Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
      6 *
      7 *  The full LZO package can be found at:
      8 *  http://www.oberhumer.com/opensource/lzo/
      9 *
     10 *  Changed for Linux kernel use by:
     11 *  Nitin Gupta <nitingupta910@gmail.com>
     12 *  Richard Purdie <rpurdie@openedhand.com>
     13 */
     14
     15#include <linux/module.h>
     16#include <linux/kernel.h>
     17#include <asm/unaligned.h>
     18#include <linux/lzo.h>
     19#include "lzodefs.h"
     20
     21static noinline size_t
     22lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
     23		    unsigned char *out, size_t *out_len,
     24		    size_t ti, void *wrkmem, signed char *state_offset,
     25		    const unsigned char bitstream_version)
     26{
     27	const unsigned char *ip;
     28	unsigned char *op;
     29	const unsigned char * const in_end = in + in_len;
     30	const unsigned char * const ip_end = in + in_len - 20;
     31	const unsigned char *ii;
     32	lzo_dict_t * const dict = (lzo_dict_t *) wrkmem;
     33
     34	op = out;
     35	ip = in;
     36	ii = ip;
     37	ip += ti < 4 ? 4 - ti : 0;
     38
     39	for (;;) {
     40		const unsigned char *m_pos = NULL;
     41		size_t t, m_len, m_off;
     42		u32 dv;
     43		u32 run_length = 0;
     44literal:
     45		ip += 1 + ((ip - ii) >> 5);
     46next:
     47		if (unlikely(ip >= ip_end))
     48			break;
     49		dv = get_unaligned_le32(ip);
     50
     51		if (dv == 0 && bitstream_version) {
     52			const unsigned char *ir = ip + 4;
     53			const unsigned char *limit = ip_end
     54				< (ip + MAX_ZERO_RUN_LENGTH + 1)
     55				? ip_end : ip + MAX_ZERO_RUN_LENGTH + 1;
     56#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \
     57	defined(LZO_FAST_64BIT_MEMORY_ACCESS)
     58			u64 dv64;
     59
     60			for (; (ir + 32) <= limit; ir += 32) {
     61				dv64 = get_unaligned((u64 *)ir);
     62				dv64 |= get_unaligned((u64 *)ir + 1);
     63				dv64 |= get_unaligned((u64 *)ir + 2);
     64				dv64 |= get_unaligned((u64 *)ir + 3);
     65				if (dv64)
     66					break;
     67			}
     68			for (; (ir + 8) <= limit; ir += 8) {
     69				dv64 = get_unaligned((u64 *)ir);
     70				if (dv64) {
     71#  if defined(__LITTLE_ENDIAN)
     72					ir += __builtin_ctzll(dv64) >> 3;
     73#  elif defined(__BIG_ENDIAN)
     74					ir += __builtin_clzll(dv64) >> 3;
     75#  else
     76#    error "missing endian definition"
     77#  endif
     78					break;
     79				}
     80			}
     81#else
     82			while ((ir < (const unsigned char *)
     83					ALIGN((uintptr_t)ir, 4)) &&
     84					(ir < limit) && (*ir == 0))
     85				ir++;
     86			if (IS_ALIGNED((uintptr_t)ir, 4)) {
     87				for (; (ir + 4) <= limit; ir += 4) {
     88					dv = *((u32 *)ir);
     89					if (dv) {
     90#  if defined(__LITTLE_ENDIAN)
     91						ir += __builtin_ctz(dv) >> 3;
     92#  elif defined(__BIG_ENDIAN)
     93						ir += __builtin_clz(dv) >> 3;
     94#  else
     95#    error "missing endian definition"
     96#  endif
     97						break;
     98					}
     99				}
    100			}
    101#endif
    102			while (likely(ir < limit) && unlikely(*ir == 0))
    103				ir++;
    104			run_length = ir - ip;
    105			if (run_length > MAX_ZERO_RUN_LENGTH)
    106				run_length = MAX_ZERO_RUN_LENGTH;
    107		} else {
    108			t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
    109			m_pos = in + dict[t];
    110			dict[t] = (lzo_dict_t) (ip - in);
    111			if (unlikely(dv != get_unaligned_le32(m_pos)))
    112				goto literal;
    113		}
    114
    115		ii -= ti;
    116		ti = 0;
    117		t = ip - ii;
    118		if (t != 0) {
    119			if (t <= 3) {
    120				op[*state_offset] |= t;
    121				COPY4(op, ii);
    122				op += t;
    123			} else if (t <= 16) {
    124				*op++ = (t - 3);
    125				COPY8(op, ii);
    126				COPY8(op + 8, ii + 8);
    127				op += t;
    128			} else {
    129				if (t <= 18) {
    130					*op++ = (t - 3);
    131				} else {
    132					size_t tt = t - 18;
    133					*op++ = 0;
    134					while (unlikely(tt > 255)) {
    135						tt -= 255;
    136						*op++ = 0;
    137					}
    138					*op++ = tt;
    139				}
    140				do {
    141					COPY8(op, ii);
    142					COPY8(op + 8, ii + 8);
    143					op += 16;
    144					ii += 16;
    145					t -= 16;
    146				} while (t >= 16);
    147				if (t > 0) do {
    148					*op++ = *ii++;
    149				} while (--t > 0);
    150			}
    151		}
    152
    153		if (unlikely(run_length)) {
    154			ip += run_length;
    155			run_length -= MIN_ZERO_RUN_LENGTH;
    156			put_unaligned_le32((run_length << 21) | 0xfffc18
    157					   | (run_length & 0x7), op);
    158			op += 4;
    159			run_length = 0;
    160			*state_offset = -3;
    161			goto finished_writing_instruction;
    162		}
    163
    164		m_len = 4;
    165		{
    166#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64)
    167		u64 v;
    168		v = get_unaligned((const u64 *) (ip + m_len)) ^
    169		    get_unaligned((const u64 *) (m_pos + m_len));
    170		if (unlikely(v == 0)) {
    171			do {
    172				m_len += 8;
    173				v = get_unaligned((const u64 *) (ip + m_len)) ^
    174				    get_unaligned((const u64 *) (m_pos + m_len));
    175				if (unlikely(ip + m_len >= ip_end))
    176					goto m_len_done;
    177			} while (v == 0);
    178		}
    179#  if defined(__LITTLE_ENDIAN)
    180		m_len += (unsigned) __builtin_ctzll(v) / 8;
    181#  elif defined(__BIG_ENDIAN)
    182		m_len += (unsigned) __builtin_clzll(v) / 8;
    183#  else
    184#    error "missing endian definition"
    185#  endif
    186#elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32)
    187		u32 v;
    188		v = get_unaligned((const u32 *) (ip + m_len)) ^
    189		    get_unaligned((const u32 *) (m_pos + m_len));
    190		if (unlikely(v == 0)) {
    191			do {
    192				m_len += 4;
    193				v = get_unaligned((const u32 *) (ip + m_len)) ^
    194				    get_unaligned((const u32 *) (m_pos + m_len));
    195				if (v != 0)
    196					break;
    197				m_len += 4;
    198				v = get_unaligned((const u32 *) (ip + m_len)) ^
    199				    get_unaligned((const u32 *) (m_pos + m_len));
    200				if (unlikely(ip + m_len >= ip_end))
    201					goto m_len_done;
    202			} while (v == 0);
    203		}
    204#  if defined(__LITTLE_ENDIAN)
    205		m_len += (unsigned) __builtin_ctz(v) / 8;
    206#  elif defined(__BIG_ENDIAN)
    207		m_len += (unsigned) __builtin_clz(v) / 8;
    208#  else
    209#    error "missing endian definition"
    210#  endif
    211#else
    212		if (unlikely(ip[m_len] == m_pos[m_len])) {
    213			do {
    214				m_len += 1;
    215				if (ip[m_len] != m_pos[m_len])
    216					break;
    217				m_len += 1;
    218				if (ip[m_len] != m_pos[m_len])
    219					break;
    220				m_len += 1;
    221				if (ip[m_len] != m_pos[m_len])
    222					break;
    223				m_len += 1;
    224				if (ip[m_len] != m_pos[m_len])
    225					break;
    226				m_len += 1;
    227				if (ip[m_len] != m_pos[m_len])
    228					break;
    229				m_len += 1;
    230				if (ip[m_len] != m_pos[m_len])
    231					break;
    232				m_len += 1;
    233				if (ip[m_len] != m_pos[m_len])
    234					break;
    235				m_len += 1;
    236				if (unlikely(ip + m_len >= ip_end))
    237					goto m_len_done;
    238			} while (ip[m_len] == m_pos[m_len]);
    239		}
    240#endif
    241		}
    242m_len_done:
    243
    244		m_off = ip - m_pos;
    245		ip += m_len;
    246		if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) {
    247			m_off -= 1;
    248			*op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2));
    249			*op++ = (m_off >> 3);
    250		} else if (m_off <= M3_MAX_OFFSET) {
    251			m_off -= 1;
    252			if (m_len <= M3_MAX_LEN)
    253				*op++ = (M3_MARKER | (m_len - 2));
    254			else {
    255				m_len -= M3_MAX_LEN;
    256				*op++ = M3_MARKER | 0;
    257				while (unlikely(m_len > 255)) {
    258					m_len -= 255;
    259					*op++ = 0;
    260				}
    261				*op++ = (m_len);
    262			}
    263			*op++ = (m_off << 2);
    264			*op++ = (m_off >> 6);
    265		} else {
    266			m_off -= 0x4000;
    267			if (m_len <= M4_MAX_LEN)
    268				*op++ = (M4_MARKER | ((m_off >> 11) & 8)
    269						| (m_len - 2));
    270			else {
    271				if (unlikely(((m_off & 0x403f) == 0x403f)
    272						&& (m_len >= 261)
    273						&& (m_len <= 264))
    274						&& likely(bitstream_version)) {
    275					// Under lzo-rle, block copies
    276					// for 261 <= length <= 264 and
    277					// (distance & 0x80f3) == 0x80f3
    278					// can result in ambiguous
    279					// output. Adjust length
    280					// to 260 to prevent ambiguity.
    281					ip -= m_len - 260;
    282					m_len = 260;
    283				}
    284				m_len -= M4_MAX_LEN;
    285				*op++ = (M4_MARKER | ((m_off >> 11) & 8));
    286				while (unlikely(m_len > 255)) {
    287					m_len -= 255;
    288					*op++ = 0;
    289				}
    290				*op++ = (m_len);
    291			}
    292			*op++ = (m_off << 2);
    293			*op++ = (m_off >> 6);
    294		}
    295		*state_offset = -2;
    296finished_writing_instruction:
    297		ii = ip;
    298		goto next;
    299	}
    300	*out_len = op - out;
    301	return in_end - (ii - ti);
    302}
    303
    304static int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
    305		     unsigned char *out, size_t *out_len,
    306		     void *wrkmem, const unsigned char bitstream_version)
    307{
    308	const unsigned char *ip = in;
    309	unsigned char *op = out;
    310	unsigned char *data_start;
    311	size_t l = in_len;
    312	size_t t = 0;
    313	signed char state_offset = -2;
    314	unsigned int m4_max_offset;
    315
    316	// LZO v0 will never write 17 as first byte (except for zero-length
    317	// input), so this is used to version the bitstream
    318	if (bitstream_version > 0) {
    319		*op++ = 17;
    320		*op++ = bitstream_version;
    321		m4_max_offset = M4_MAX_OFFSET_V1;
    322	} else {
    323		m4_max_offset = M4_MAX_OFFSET_V0;
    324	}
    325
    326	data_start = op;
    327
    328	while (l > 20) {
    329		size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1);
    330		uintptr_t ll_end = (uintptr_t) ip + ll;
    331		if ((ll_end + ((t + ll) >> 5)) <= ll_end)
    332			break;
    333		BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS);
    334		memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t));
    335		t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem,
    336					&state_offset, bitstream_version);
    337		ip += ll;
    338		op += *out_len;
    339		l  -= ll;
    340	}
    341	t += l;
    342
    343	if (t > 0) {
    344		const unsigned char *ii = in + in_len - t;
    345
    346		if (op == data_start && t <= 238) {
    347			*op++ = (17 + t);
    348		} else if (t <= 3) {
    349			op[state_offset] |= t;
    350		} else if (t <= 18) {
    351			*op++ = (t - 3);
    352		} else {
    353			size_t tt = t - 18;
    354			*op++ = 0;
    355			while (tt > 255) {
    356				tt -= 255;
    357				*op++ = 0;
    358			}
    359			*op++ = tt;
    360		}
    361		if (t >= 16) do {
    362			COPY8(op, ii);
    363			COPY8(op + 8, ii + 8);
    364			op += 16;
    365			ii += 16;
    366			t -= 16;
    367		} while (t >= 16);
    368		if (t > 0) do {
    369			*op++ = *ii++;
    370		} while (--t > 0);
    371	}
    372
    373	*op++ = M4_MARKER | 1;
    374	*op++ = 0;
    375	*op++ = 0;
    376
    377	*out_len = op - out;
    378	return LZO_E_OK;
    379}
    380
    381int lzo1x_1_compress(const unsigned char *in, size_t in_len,
    382		     unsigned char *out, size_t *out_len,
    383		     void *wrkmem)
    384{
    385	return lzogeneric1x_1_compress(in, in_len, out, out_len, wrkmem, 0);
    386}
    387
    388int lzorle1x_1_compress(const unsigned char *in, size_t in_len,
    389		     unsigned char *out, size_t *out_len,
    390		     void *wrkmem)
    391{
    392	return lzogeneric1x_1_compress(in, in_len, out, out_len,
    393				       wrkmem, LZO_VERSION);
    394}
    395
    396EXPORT_SYMBOL_GPL(lzo1x_1_compress);
    397EXPORT_SYMBOL_GPL(lzorle1x_1_compress);
    398
    399MODULE_LICENSE("GPL");
    400MODULE_DESCRIPTION("LZO1X-1 Compressor");