cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

842_compress.c (14752B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * 842 Software Compression
      4 *
      5 * Copyright (C) 2015 Dan Streetman, IBM Corp
      6 *
      7 * See 842.h for details of the 842 compressed format.
      8 */
      9
     10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
     11#define MODULE_NAME "842_compress"
     12
     13#include <linux/hashtable.h>
     14
     15#include "842.h"
     16#include "842_debugfs.h"
     17
     18#define SW842_HASHTABLE8_BITS	(10)
     19#define SW842_HASHTABLE4_BITS	(11)
     20#define SW842_HASHTABLE2_BITS	(10)
     21
     22/* By default, we allow compressing input buffers of any length, but we must
     23 * use the non-standard "short data" template so the decompressor can correctly
     24 * reproduce the uncompressed data buffer at the right length.  However the
     25 * hardware 842 compressor will not recognize the "short data" template, and
     26 * will fail to decompress any compressed buffer containing it (I have no idea
     27 * why anyone would want to use software to compress and hardware to decompress
     28 * but that's beside the point).  This parameter forces the compression
     29 * function to simply reject any input buffer that isn't a multiple of 8 bytes
     30 * long, instead of using the "short data" template, so that all compressed
     31 * buffers produced by this function will be decompressable by the 842 hardware
     32 * decompressor.  Unless you have a specific need for that, leave this disabled
     33 * so that any length buffer can be compressed.
     34 */
     35static bool sw842_strict;
     36module_param_named(strict, sw842_strict, bool, 0644);
     37
     38static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
     39	{ I8, N0, N0, N0, 0x19 }, /* 8 */
     40	{ I4, I4, N0, N0, 0x18 }, /* 18 */
     41	{ I4, I2, I2, N0, 0x17 }, /* 25 */
     42	{ I2, I2, I4, N0, 0x13 }, /* 25 */
     43	{ I2, I2, I2, I2, 0x12 }, /* 32 */
     44	{ I4, I2, D2, N0, 0x16 }, /* 33 */
     45	{ I4, D2, I2, N0, 0x15 }, /* 33 */
     46	{ I2, D2, I4, N0, 0x0e }, /* 33 */
     47	{ D2, I2, I4, N0, 0x09 }, /* 33 */
     48	{ I2, I2, I2, D2, 0x11 }, /* 40 */
     49	{ I2, I2, D2, I2, 0x10 }, /* 40 */
     50	{ I2, D2, I2, I2, 0x0d }, /* 40 */
     51	{ D2, I2, I2, I2, 0x08 }, /* 40 */
     52	{ I4, D4, N0, N0, 0x14 }, /* 41 */
     53	{ D4, I4, N0, N0, 0x04 }, /* 41 */
     54	{ I2, I2, D4, N0, 0x0f }, /* 48 */
     55	{ I2, D2, I2, D2, 0x0c }, /* 48 */
     56	{ I2, D4, I2, N0, 0x0b }, /* 48 */
     57	{ D2, I2, I2, D2, 0x07 }, /* 48 */
     58	{ D2, I2, D2, I2, 0x06 }, /* 48 */
     59	{ D4, I2, I2, N0, 0x03 }, /* 48 */
     60	{ I2, D2, D4, N0, 0x0a }, /* 56 */
     61	{ D2, I2, D4, N0, 0x05 }, /* 56 */
     62	{ D4, I2, D2, N0, 0x02 }, /* 56 */
     63	{ D4, D2, I2, N0, 0x01 }, /* 56 */
     64	{ D8, N0, N0, N0, 0x00 }, /* 64 */
     65};
     66
     67struct sw842_hlist_node8 {
     68	struct hlist_node node;
     69	u64 data;
     70	u8 index;
     71};
     72
     73struct sw842_hlist_node4 {
     74	struct hlist_node node;
     75	u32 data;
     76	u16 index;
     77};
     78
     79struct sw842_hlist_node2 {
     80	struct hlist_node node;
     81	u16 data;
     82	u8 index;
     83};
     84
     85#define INDEX_NOT_FOUND		(-1)
     86#define INDEX_NOT_CHECKED	(-2)
     87
     88struct sw842_param {
     89	u8 *in;
     90	u8 *instart;
     91	u64 ilen;
     92	u8 *out;
     93	u64 olen;
     94	u8 bit;
     95	u64 data8[1];
     96	u32 data4[2];
     97	u16 data2[4];
     98	int index8[1];
     99	int index4[2];
    100	int index2[4];
    101	DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
    102	DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
    103	DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
    104	struct sw842_hlist_node8 node8[1 << I8_BITS];
    105	struct sw842_hlist_node4 node4[1 << I4_BITS];
    106	struct sw842_hlist_node2 node2[1 << I2_BITS];
    107};
    108
    109#define get_input_data(p, o, b)						\
    110	be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
    111
    112#define init_hashtable_nodes(p, b)	do {			\
    113	int _i;							\
    114	hash_init((p)->htable##b);				\
    115	for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) {	\
    116		(p)->node##b[_i].index = _i;			\
    117		(p)->node##b[_i].data = 0;			\
    118		INIT_HLIST_NODE(&(p)->node##b[_i].node);	\
    119	}							\
    120} while (0)
    121
    122#define find_index(p, b, n)	({					\
    123	struct sw842_hlist_node##b *_n;					\
    124	p->index##b[n] = INDEX_NOT_FOUND;				\
    125	hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) {	\
    126		if (p->data##b[n] == _n->data) {			\
    127			p->index##b[n] = _n->index;			\
    128			break;						\
    129		}							\
    130	}								\
    131	p->index##b[n] >= 0;						\
    132})
    133
    134#define check_index(p, b, n)			\
    135	((p)->index##b[n] == INDEX_NOT_CHECKED	\
    136	 ? find_index(p, b, n)			\
    137	 : (p)->index##b[n] >= 0)
    138
    139#define replace_hash(p, b, i, d)	do {				\
    140	struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)];	\
    141	hash_del(&_n->node);						\
    142	_n->data = (p)->data##b[d];					\
    143	pr_debug("add hash index%x %x pos %x data %lx\n", b,		\
    144		 (unsigned int)_n->index,				\
    145		 (unsigned int)((p)->in - (p)->instart),		\
    146		 (unsigned long)_n->data);				\
    147	hash_add((p)->htable##b, &_n->node, _n->data);			\
    148} while (0)
    149
    150static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
    151
    152static int add_bits(struct sw842_param *p, u64 d, u8 n);
    153
    154static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
    155{
    156	int ret;
    157
    158	if (n <= s)
    159		return -EINVAL;
    160
    161	ret = add_bits(p, d >> s, n - s);
    162	if (ret)
    163		return ret;
    164	return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
    165}
    166
    167static int add_bits(struct sw842_param *p, u64 d, u8 n)
    168{
    169	int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
    170	u64 o;
    171	u8 *out = p->out;
    172
    173	pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
    174
    175	if (n > 64)
    176		return -EINVAL;
    177
    178	/* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
    179	 * or if we're at the end of the output buffer and would write past end
    180	 */
    181	if (bits > 64)
    182		return __split_add_bits(p, d, n, 32);
    183	else if (p->olen < 8 && bits > 32 && bits <= 56)
    184		return __split_add_bits(p, d, n, 16);
    185	else if (p->olen < 4 && bits > 16 && bits <= 24)
    186		return __split_add_bits(p, d, n, 8);
    187
    188	if (DIV_ROUND_UP(bits, 8) > p->olen)
    189		return -ENOSPC;
    190
    191	o = *out & bmask[b];
    192	d <<= s;
    193
    194	if (bits <= 8)
    195		*out = o | d;
    196	else if (bits <= 16)
    197		put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
    198	else if (bits <= 24)
    199		put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
    200	else if (bits <= 32)
    201		put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
    202	else if (bits <= 40)
    203		put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
    204	else if (bits <= 48)
    205		put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
    206	else if (bits <= 56)
    207		put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
    208	else
    209		put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
    210
    211	p->bit += n;
    212
    213	if (p->bit > 7) {
    214		p->out += p->bit / 8;
    215		p->olen -= p->bit / 8;
    216		p->bit %= 8;
    217	}
    218
    219	return 0;
    220}
    221
    222static int add_template(struct sw842_param *p, u8 c)
    223{
    224	int ret, i, b = 0;
    225	u8 *t = comp_ops[c];
    226	bool inv = false;
    227
    228	if (c >= OPS_MAX)
    229		return -EINVAL;
    230
    231	pr_debug("template %x\n", t[4]);
    232
    233	ret = add_bits(p, t[4], OP_BITS);
    234	if (ret)
    235		return ret;
    236
    237	for (i = 0; i < 4; i++) {
    238		pr_debug("op %x\n", t[i]);
    239
    240		switch (t[i] & OP_AMOUNT) {
    241		case OP_AMOUNT_8:
    242			if (b)
    243				inv = true;
    244			else if (t[i] & OP_ACTION_INDEX)
    245				ret = add_bits(p, p->index8[0], I8_BITS);
    246			else if (t[i] & OP_ACTION_DATA)
    247				ret = add_bits(p, p->data8[0], 64);
    248			else
    249				inv = true;
    250			break;
    251		case OP_AMOUNT_4:
    252			if (b == 2 && t[i] & OP_ACTION_DATA)
    253				ret = add_bits(p, get_input_data(p, 2, 32), 32);
    254			else if (b != 0 && b != 4)
    255				inv = true;
    256			else if (t[i] & OP_ACTION_INDEX)
    257				ret = add_bits(p, p->index4[b >> 2], I4_BITS);
    258			else if (t[i] & OP_ACTION_DATA)
    259				ret = add_bits(p, p->data4[b >> 2], 32);
    260			else
    261				inv = true;
    262			break;
    263		case OP_AMOUNT_2:
    264			if (b != 0 && b != 2 && b != 4 && b != 6)
    265				inv = true;
    266			if (t[i] & OP_ACTION_INDEX)
    267				ret = add_bits(p, p->index2[b >> 1], I2_BITS);
    268			else if (t[i] & OP_ACTION_DATA)
    269				ret = add_bits(p, p->data2[b >> 1], 16);
    270			else
    271				inv = true;
    272			break;
    273		case OP_AMOUNT_0:
    274			inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
    275			break;
    276		default:
    277			inv = true;
    278			break;
    279		}
    280
    281		if (ret)
    282			return ret;
    283
    284		if (inv) {
    285			pr_err("Invalid templ %x op %d : %x %x %x %x\n",
    286			       c, i, t[0], t[1], t[2], t[3]);
    287			return -EINVAL;
    288		}
    289
    290		b += t[i] & OP_AMOUNT;
    291	}
    292
    293	if (b != 8) {
    294		pr_err("Invalid template %x len %x : %x %x %x %x\n",
    295		       c, b, t[0], t[1], t[2], t[3]);
    296		return -EINVAL;
    297	}
    298
    299	if (sw842_template_counts)
    300		atomic_inc(&template_count[t[4]]);
    301
    302	return 0;
    303}
    304
    305static int add_repeat_template(struct sw842_param *p, u8 r)
    306{
    307	int ret;
    308
    309	/* repeat param is 0-based */
    310	if (!r || --r > REPEAT_BITS_MAX)
    311		return -EINVAL;
    312
    313	ret = add_bits(p, OP_REPEAT, OP_BITS);
    314	if (ret)
    315		return ret;
    316
    317	ret = add_bits(p, r, REPEAT_BITS);
    318	if (ret)
    319		return ret;
    320
    321	if (sw842_template_counts)
    322		atomic_inc(&template_repeat_count);
    323
    324	return 0;
    325}
    326
    327static int add_short_data_template(struct sw842_param *p, u8 b)
    328{
    329	int ret, i;
    330
    331	if (!b || b > SHORT_DATA_BITS_MAX)
    332		return -EINVAL;
    333
    334	ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
    335	if (ret)
    336		return ret;
    337
    338	ret = add_bits(p, b, SHORT_DATA_BITS);
    339	if (ret)
    340		return ret;
    341
    342	for (i = 0; i < b; i++) {
    343		ret = add_bits(p, p->in[i], 8);
    344		if (ret)
    345			return ret;
    346	}
    347
    348	if (sw842_template_counts)
    349		atomic_inc(&template_short_data_count);
    350
    351	return 0;
    352}
    353
    354static int add_zeros_template(struct sw842_param *p)
    355{
    356	int ret = add_bits(p, OP_ZEROS, OP_BITS);
    357
    358	if (ret)
    359		return ret;
    360
    361	if (sw842_template_counts)
    362		atomic_inc(&template_zeros_count);
    363
    364	return 0;
    365}
    366
    367static int add_end_template(struct sw842_param *p)
    368{
    369	int ret = add_bits(p, OP_END, OP_BITS);
    370
    371	if (ret)
    372		return ret;
    373
    374	if (sw842_template_counts)
    375		atomic_inc(&template_end_count);
    376
    377	return 0;
    378}
    379
    380static bool check_template(struct sw842_param *p, u8 c)
    381{
    382	u8 *t = comp_ops[c];
    383	int i, match, b = 0;
    384
    385	if (c >= OPS_MAX)
    386		return false;
    387
    388	for (i = 0; i < 4; i++) {
    389		if (t[i] & OP_ACTION_INDEX) {
    390			if (t[i] & OP_AMOUNT_2)
    391				match = check_index(p, 2, b >> 1);
    392			else if (t[i] & OP_AMOUNT_4)
    393				match = check_index(p, 4, b >> 2);
    394			else if (t[i] & OP_AMOUNT_8)
    395				match = check_index(p, 8, 0);
    396			else
    397				return false;
    398			if (!match)
    399				return false;
    400		}
    401
    402		b += t[i] & OP_AMOUNT;
    403	}
    404
    405	return true;
    406}
    407
    408static void get_next_data(struct sw842_param *p)
    409{
    410	p->data8[0] = get_input_data(p, 0, 64);
    411	p->data4[0] = get_input_data(p, 0, 32);
    412	p->data4[1] = get_input_data(p, 4, 32);
    413	p->data2[0] = get_input_data(p, 0, 16);
    414	p->data2[1] = get_input_data(p, 2, 16);
    415	p->data2[2] = get_input_data(p, 4, 16);
    416	p->data2[3] = get_input_data(p, 6, 16);
    417}
    418
    419/* update the hashtable entries.
    420 * only call this after finding/adding the current template
    421 * the dataN fields for the current 8 byte block must be already updated
    422 */
    423static void update_hashtables(struct sw842_param *p)
    424{
    425	u64 pos = p->in - p->instart;
    426	u64 n8 = (pos >> 3) % (1 << I8_BITS);
    427	u64 n4 = (pos >> 2) % (1 << I4_BITS);
    428	u64 n2 = (pos >> 1) % (1 << I2_BITS);
    429
    430	replace_hash(p, 8, n8, 0);
    431	replace_hash(p, 4, n4, 0);
    432	replace_hash(p, 4, n4, 1);
    433	replace_hash(p, 2, n2, 0);
    434	replace_hash(p, 2, n2, 1);
    435	replace_hash(p, 2, n2, 2);
    436	replace_hash(p, 2, n2, 3);
    437}
    438
    439/* find the next template to use, and add it
    440 * the p->dataN fields must already be set for the current 8 byte block
    441 */
    442static int process_next(struct sw842_param *p)
    443{
    444	int ret, i;
    445
    446	p->index8[0] = INDEX_NOT_CHECKED;
    447	p->index4[0] = INDEX_NOT_CHECKED;
    448	p->index4[1] = INDEX_NOT_CHECKED;
    449	p->index2[0] = INDEX_NOT_CHECKED;
    450	p->index2[1] = INDEX_NOT_CHECKED;
    451	p->index2[2] = INDEX_NOT_CHECKED;
    452	p->index2[3] = INDEX_NOT_CHECKED;
    453
    454	/* check up to OPS_MAX - 1; last op is our fallback */
    455	for (i = 0; i < OPS_MAX - 1; i++) {
    456		if (check_template(p, i))
    457			break;
    458	}
    459
    460	ret = add_template(p, i);
    461	if (ret)
    462		return ret;
    463
    464	return 0;
    465}
    466
    467/**
    468 * sw842_compress
    469 *
    470 * Compress the uncompressed buffer of length @ilen at @in to the output buffer
    471 * @out, using no more than @olen bytes, using the 842 compression format.
    472 *
    473 * Returns: 0 on success, error on failure.  The @olen parameter
    474 * will contain the number of output bytes written on success, or
    475 * 0 on error.
    476 */
    477int sw842_compress(const u8 *in, unsigned int ilen,
    478		   u8 *out, unsigned int *olen, void *wmem)
    479{
    480	struct sw842_param *p = (struct sw842_param *)wmem;
    481	int ret;
    482	u64 last, next, pad, total;
    483	u8 repeat_count = 0;
    484	u32 crc;
    485
    486	BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
    487
    488	init_hashtable_nodes(p, 8);
    489	init_hashtable_nodes(p, 4);
    490	init_hashtable_nodes(p, 2);
    491
    492	p->in = (u8 *)in;
    493	p->instart = p->in;
    494	p->ilen = ilen;
    495	p->out = out;
    496	p->olen = *olen;
    497	p->bit = 0;
    498
    499	total = p->olen;
    500
    501	*olen = 0;
    502
    503	/* if using strict mode, we can only compress a multiple of 8 */
    504	if (sw842_strict && (ilen % 8)) {
    505		pr_err("Using strict mode, can't compress len %d\n", ilen);
    506		return -EINVAL;
    507	}
    508
    509	/* let's compress at least 8 bytes, mkay? */
    510	if (unlikely(ilen < 8))
    511		goto skip_comp;
    512
    513	/* make initial 'last' different so we don't match the first time */
    514	last = ~get_unaligned((u64 *)p->in);
    515
    516	while (p->ilen > 7) {
    517		next = get_unaligned((u64 *)p->in);
    518
    519		/* must get the next data, as we need to update the hashtable
    520		 * entries with the new data every time
    521		 */
    522		get_next_data(p);
    523
    524		/* we don't care about endianness in last or next;
    525		 * we're just comparing 8 bytes to another 8 bytes,
    526		 * they're both the same endianness
    527		 */
    528		if (next == last) {
    529			/* repeat count bits are 0-based, so we stop at +1 */
    530			if (++repeat_count <= REPEAT_BITS_MAX)
    531				goto repeat;
    532		}
    533		if (repeat_count) {
    534			ret = add_repeat_template(p, repeat_count);
    535			repeat_count = 0;
    536			if (next == last) /* reached max repeat bits */
    537				goto repeat;
    538		}
    539
    540		if (next == 0)
    541			ret = add_zeros_template(p);
    542		else
    543			ret = process_next(p);
    544
    545		if (ret)
    546			return ret;
    547
    548repeat:
    549		last = next;
    550		update_hashtables(p);
    551		p->in += 8;
    552		p->ilen -= 8;
    553	}
    554
    555	if (repeat_count) {
    556		ret = add_repeat_template(p, repeat_count);
    557		if (ret)
    558			return ret;
    559	}
    560
    561skip_comp:
    562	if (p->ilen > 0) {
    563		ret = add_short_data_template(p, p->ilen);
    564		if (ret)
    565			return ret;
    566
    567		p->in += p->ilen;
    568		p->ilen = 0;
    569	}
    570
    571	ret = add_end_template(p);
    572	if (ret)
    573		return ret;
    574
    575	/*
    576	 * crc(0:31) is appended to target data starting with the next
    577	 * bit after End of stream template.
    578	 * nx842 calculates CRC for data in big-endian format. So doing
    579	 * same here so that sw842 decompression can be used for both
    580	 * compressed data.
    581	 */
    582	crc = crc32_be(0, in, ilen);
    583	ret = add_bits(p, crc, CRC_BITS);
    584	if (ret)
    585		return ret;
    586
    587	if (p->bit) {
    588		p->out++;
    589		p->olen--;
    590		p->bit = 0;
    591	}
    592
    593	/* pad compressed length to multiple of 8 */
    594	pad = (8 - ((total - p->olen) % 8)) % 8;
    595	if (pad) {
    596		if (pad > p->olen) /* we were so close! */
    597			return -ENOSPC;
    598		memset(p->out, 0, pad);
    599		p->out += pad;
    600		p->olen -= pad;
    601	}
    602
    603	if (unlikely((total - p->olen) > UINT_MAX))
    604		return -ENOSPC;
    605
    606	*olen = total - p->olen;
    607
    608	return 0;
    609}
    610EXPORT_SYMBOL_GPL(sw842_compress);
    611
    612static int __init sw842_init(void)
    613{
    614	if (sw842_template_counts)
    615		sw842_debugfs_create();
    616
    617	return 0;
    618}
    619module_init(sw842_init);
    620
    621static void __exit sw842_exit(void)
    622{
    623	if (sw842_template_counts)
    624		sw842_debugfs_remove();
    625}
    626module_exit(sw842_exit);
    627
    628MODULE_LICENSE("GPL");
    629MODULE_DESCRIPTION("Software 842 Compressor");
    630MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");