842_compress.c (14752B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * 842 Software Compression 4 * 5 * Copyright (C) 2015 Dan Streetman, IBM Corp 6 * 7 * See 842.h for details of the 842 compressed format. 8 */ 9 10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11#define MODULE_NAME "842_compress" 12 13#include <linux/hashtable.h> 14 15#include "842.h" 16#include "842_debugfs.h" 17 18#define SW842_HASHTABLE8_BITS (10) 19#define SW842_HASHTABLE4_BITS (11) 20#define SW842_HASHTABLE2_BITS (10) 21 22/* By default, we allow compressing input buffers of any length, but we must 23 * use the non-standard "short data" template so the decompressor can correctly 24 * reproduce the uncompressed data buffer at the right length. However the 25 * hardware 842 compressor will not recognize the "short data" template, and 26 * will fail to decompress any compressed buffer containing it (I have no idea 27 * why anyone would want to use software to compress and hardware to decompress 28 * but that's beside the point). This parameter forces the compression 29 * function to simply reject any input buffer that isn't a multiple of 8 bytes 30 * long, instead of using the "short data" template, so that all compressed 31 * buffers produced by this function will be decompressable by the 842 hardware 32 * decompressor. Unless you have a specific need for that, leave this disabled 33 * so that any length buffer can be compressed. 34 */ 35static bool sw842_strict; 36module_param_named(strict, sw842_strict, bool, 0644); 37 38static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */ 39 { I8, N0, N0, N0, 0x19 }, /* 8 */ 40 { I4, I4, N0, N0, 0x18 }, /* 18 */ 41 { I4, I2, I2, N0, 0x17 }, /* 25 */ 42 { I2, I2, I4, N0, 0x13 }, /* 25 */ 43 { I2, I2, I2, I2, 0x12 }, /* 32 */ 44 { I4, I2, D2, N0, 0x16 }, /* 33 */ 45 { I4, D2, I2, N0, 0x15 }, /* 33 */ 46 { I2, D2, I4, N0, 0x0e }, /* 33 */ 47 { D2, I2, I4, N0, 0x09 }, /* 33 */ 48 { I2, I2, I2, D2, 0x11 }, /* 40 */ 49 { I2, I2, D2, I2, 0x10 }, /* 40 */ 50 { I2, D2, I2, I2, 0x0d }, /* 40 */ 51 { D2, I2, I2, I2, 0x08 }, /* 40 */ 52 { I4, D4, N0, N0, 0x14 }, /* 41 */ 53 { D4, I4, N0, N0, 0x04 }, /* 41 */ 54 { I2, I2, D4, N0, 0x0f }, /* 48 */ 55 { I2, D2, I2, D2, 0x0c }, /* 48 */ 56 { I2, D4, I2, N0, 0x0b }, /* 48 */ 57 { D2, I2, I2, D2, 0x07 }, /* 48 */ 58 { D2, I2, D2, I2, 0x06 }, /* 48 */ 59 { D4, I2, I2, N0, 0x03 }, /* 48 */ 60 { I2, D2, D4, N0, 0x0a }, /* 56 */ 61 { D2, I2, D4, N0, 0x05 }, /* 56 */ 62 { D4, I2, D2, N0, 0x02 }, /* 56 */ 63 { D4, D2, I2, N0, 0x01 }, /* 56 */ 64 { D8, N0, N0, N0, 0x00 }, /* 64 */ 65}; 66 67struct sw842_hlist_node8 { 68 struct hlist_node node; 69 u64 data; 70 u8 index; 71}; 72 73struct sw842_hlist_node4 { 74 struct hlist_node node; 75 u32 data; 76 u16 index; 77}; 78 79struct sw842_hlist_node2 { 80 struct hlist_node node; 81 u16 data; 82 u8 index; 83}; 84 85#define INDEX_NOT_FOUND (-1) 86#define INDEX_NOT_CHECKED (-2) 87 88struct sw842_param { 89 u8 *in; 90 u8 *instart; 91 u64 ilen; 92 u8 *out; 93 u64 olen; 94 u8 bit; 95 u64 data8[1]; 96 u32 data4[2]; 97 u16 data2[4]; 98 int index8[1]; 99 int index4[2]; 100 int index2[4]; 101 DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS); 102 DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS); 103 DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS); 104 struct sw842_hlist_node8 node8[1 << I8_BITS]; 105 struct sw842_hlist_node4 node4[1 << I4_BITS]; 106 struct sw842_hlist_node2 node2[1 << I2_BITS]; 107}; 108 109#define get_input_data(p, o, b) \ 110 be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o)))) 111 112#define init_hashtable_nodes(p, b) do { \ 113 int _i; \ 114 hash_init((p)->htable##b); \ 115 for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) { \ 116 (p)->node##b[_i].index = _i; \ 117 (p)->node##b[_i].data = 0; \ 118 INIT_HLIST_NODE(&(p)->node##b[_i].node); \ 119 } \ 120} while (0) 121 122#define find_index(p, b, n) ({ \ 123 struct sw842_hlist_node##b *_n; \ 124 p->index##b[n] = INDEX_NOT_FOUND; \ 125 hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) { \ 126 if (p->data##b[n] == _n->data) { \ 127 p->index##b[n] = _n->index; \ 128 break; \ 129 } \ 130 } \ 131 p->index##b[n] >= 0; \ 132}) 133 134#define check_index(p, b, n) \ 135 ((p)->index##b[n] == INDEX_NOT_CHECKED \ 136 ? find_index(p, b, n) \ 137 : (p)->index##b[n] >= 0) 138 139#define replace_hash(p, b, i, d) do { \ 140 struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)]; \ 141 hash_del(&_n->node); \ 142 _n->data = (p)->data##b[d]; \ 143 pr_debug("add hash index%x %x pos %x data %lx\n", b, \ 144 (unsigned int)_n->index, \ 145 (unsigned int)((p)->in - (p)->instart), \ 146 (unsigned long)_n->data); \ 147 hash_add((p)->htable##b, &_n->node, _n->data); \ 148} while (0) 149 150static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe }; 151 152static int add_bits(struct sw842_param *p, u64 d, u8 n); 153 154static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s) 155{ 156 int ret; 157 158 if (n <= s) 159 return -EINVAL; 160 161 ret = add_bits(p, d >> s, n - s); 162 if (ret) 163 return ret; 164 return add_bits(p, d & GENMASK_ULL(s - 1, 0), s); 165} 166 167static int add_bits(struct sw842_param *p, u64 d, u8 n) 168{ 169 int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits; 170 u64 o; 171 u8 *out = p->out; 172 173 pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d); 174 175 if (n > 64) 176 return -EINVAL; 177 178 /* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0), 179 * or if we're at the end of the output buffer and would write past end 180 */ 181 if (bits > 64) 182 return __split_add_bits(p, d, n, 32); 183 else if (p->olen < 8 && bits > 32 && bits <= 56) 184 return __split_add_bits(p, d, n, 16); 185 else if (p->olen < 4 && bits > 16 && bits <= 24) 186 return __split_add_bits(p, d, n, 8); 187 188 if (DIV_ROUND_UP(bits, 8) > p->olen) 189 return -ENOSPC; 190 191 o = *out & bmask[b]; 192 d <<= s; 193 194 if (bits <= 8) 195 *out = o | d; 196 else if (bits <= 16) 197 put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out); 198 else if (bits <= 24) 199 put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out); 200 else if (bits <= 32) 201 put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out); 202 else if (bits <= 40) 203 put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out); 204 else if (bits <= 48) 205 put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out); 206 else if (bits <= 56) 207 put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out); 208 else 209 put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out); 210 211 p->bit += n; 212 213 if (p->bit > 7) { 214 p->out += p->bit / 8; 215 p->olen -= p->bit / 8; 216 p->bit %= 8; 217 } 218 219 return 0; 220} 221 222static int add_template(struct sw842_param *p, u8 c) 223{ 224 int ret, i, b = 0; 225 u8 *t = comp_ops[c]; 226 bool inv = false; 227 228 if (c >= OPS_MAX) 229 return -EINVAL; 230 231 pr_debug("template %x\n", t[4]); 232 233 ret = add_bits(p, t[4], OP_BITS); 234 if (ret) 235 return ret; 236 237 for (i = 0; i < 4; i++) { 238 pr_debug("op %x\n", t[i]); 239 240 switch (t[i] & OP_AMOUNT) { 241 case OP_AMOUNT_8: 242 if (b) 243 inv = true; 244 else if (t[i] & OP_ACTION_INDEX) 245 ret = add_bits(p, p->index8[0], I8_BITS); 246 else if (t[i] & OP_ACTION_DATA) 247 ret = add_bits(p, p->data8[0], 64); 248 else 249 inv = true; 250 break; 251 case OP_AMOUNT_4: 252 if (b == 2 && t[i] & OP_ACTION_DATA) 253 ret = add_bits(p, get_input_data(p, 2, 32), 32); 254 else if (b != 0 && b != 4) 255 inv = true; 256 else if (t[i] & OP_ACTION_INDEX) 257 ret = add_bits(p, p->index4[b >> 2], I4_BITS); 258 else if (t[i] & OP_ACTION_DATA) 259 ret = add_bits(p, p->data4[b >> 2], 32); 260 else 261 inv = true; 262 break; 263 case OP_AMOUNT_2: 264 if (b != 0 && b != 2 && b != 4 && b != 6) 265 inv = true; 266 if (t[i] & OP_ACTION_INDEX) 267 ret = add_bits(p, p->index2[b >> 1], I2_BITS); 268 else if (t[i] & OP_ACTION_DATA) 269 ret = add_bits(p, p->data2[b >> 1], 16); 270 else 271 inv = true; 272 break; 273 case OP_AMOUNT_0: 274 inv = (b != 8) || !(t[i] & OP_ACTION_NOOP); 275 break; 276 default: 277 inv = true; 278 break; 279 } 280 281 if (ret) 282 return ret; 283 284 if (inv) { 285 pr_err("Invalid templ %x op %d : %x %x %x %x\n", 286 c, i, t[0], t[1], t[2], t[3]); 287 return -EINVAL; 288 } 289 290 b += t[i] & OP_AMOUNT; 291 } 292 293 if (b != 8) { 294 pr_err("Invalid template %x len %x : %x %x %x %x\n", 295 c, b, t[0], t[1], t[2], t[3]); 296 return -EINVAL; 297 } 298 299 if (sw842_template_counts) 300 atomic_inc(&template_count[t[4]]); 301 302 return 0; 303} 304 305static int add_repeat_template(struct sw842_param *p, u8 r) 306{ 307 int ret; 308 309 /* repeat param is 0-based */ 310 if (!r || --r > REPEAT_BITS_MAX) 311 return -EINVAL; 312 313 ret = add_bits(p, OP_REPEAT, OP_BITS); 314 if (ret) 315 return ret; 316 317 ret = add_bits(p, r, REPEAT_BITS); 318 if (ret) 319 return ret; 320 321 if (sw842_template_counts) 322 atomic_inc(&template_repeat_count); 323 324 return 0; 325} 326 327static int add_short_data_template(struct sw842_param *p, u8 b) 328{ 329 int ret, i; 330 331 if (!b || b > SHORT_DATA_BITS_MAX) 332 return -EINVAL; 333 334 ret = add_bits(p, OP_SHORT_DATA, OP_BITS); 335 if (ret) 336 return ret; 337 338 ret = add_bits(p, b, SHORT_DATA_BITS); 339 if (ret) 340 return ret; 341 342 for (i = 0; i < b; i++) { 343 ret = add_bits(p, p->in[i], 8); 344 if (ret) 345 return ret; 346 } 347 348 if (sw842_template_counts) 349 atomic_inc(&template_short_data_count); 350 351 return 0; 352} 353 354static int add_zeros_template(struct sw842_param *p) 355{ 356 int ret = add_bits(p, OP_ZEROS, OP_BITS); 357 358 if (ret) 359 return ret; 360 361 if (sw842_template_counts) 362 atomic_inc(&template_zeros_count); 363 364 return 0; 365} 366 367static int add_end_template(struct sw842_param *p) 368{ 369 int ret = add_bits(p, OP_END, OP_BITS); 370 371 if (ret) 372 return ret; 373 374 if (sw842_template_counts) 375 atomic_inc(&template_end_count); 376 377 return 0; 378} 379 380static bool check_template(struct sw842_param *p, u8 c) 381{ 382 u8 *t = comp_ops[c]; 383 int i, match, b = 0; 384 385 if (c >= OPS_MAX) 386 return false; 387 388 for (i = 0; i < 4; i++) { 389 if (t[i] & OP_ACTION_INDEX) { 390 if (t[i] & OP_AMOUNT_2) 391 match = check_index(p, 2, b >> 1); 392 else if (t[i] & OP_AMOUNT_4) 393 match = check_index(p, 4, b >> 2); 394 else if (t[i] & OP_AMOUNT_8) 395 match = check_index(p, 8, 0); 396 else 397 return false; 398 if (!match) 399 return false; 400 } 401 402 b += t[i] & OP_AMOUNT; 403 } 404 405 return true; 406} 407 408static void get_next_data(struct sw842_param *p) 409{ 410 p->data8[0] = get_input_data(p, 0, 64); 411 p->data4[0] = get_input_data(p, 0, 32); 412 p->data4[1] = get_input_data(p, 4, 32); 413 p->data2[0] = get_input_data(p, 0, 16); 414 p->data2[1] = get_input_data(p, 2, 16); 415 p->data2[2] = get_input_data(p, 4, 16); 416 p->data2[3] = get_input_data(p, 6, 16); 417} 418 419/* update the hashtable entries. 420 * only call this after finding/adding the current template 421 * the dataN fields for the current 8 byte block must be already updated 422 */ 423static void update_hashtables(struct sw842_param *p) 424{ 425 u64 pos = p->in - p->instart; 426 u64 n8 = (pos >> 3) % (1 << I8_BITS); 427 u64 n4 = (pos >> 2) % (1 << I4_BITS); 428 u64 n2 = (pos >> 1) % (1 << I2_BITS); 429 430 replace_hash(p, 8, n8, 0); 431 replace_hash(p, 4, n4, 0); 432 replace_hash(p, 4, n4, 1); 433 replace_hash(p, 2, n2, 0); 434 replace_hash(p, 2, n2, 1); 435 replace_hash(p, 2, n2, 2); 436 replace_hash(p, 2, n2, 3); 437} 438 439/* find the next template to use, and add it 440 * the p->dataN fields must already be set for the current 8 byte block 441 */ 442static int process_next(struct sw842_param *p) 443{ 444 int ret, i; 445 446 p->index8[0] = INDEX_NOT_CHECKED; 447 p->index4[0] = INDEX_NOT_CHECKED; 448 p->index4[1] = INDEX_NOT_CHECKED; 449 p->index2[0] = INDEX_NOT_CHECKED; 450 p->index2[1] = INDEX_NOT_CHECKED; 451 p->index2[2] = INDEX_NOT_CHECKED; 452 p->index2[3] = INDEX_NOT_CHECKED; 453 454 /* check up to OPS_MAX - 1; last op is our fallback */ 455 for (i = 0; i < OPS_MAX - 1; i++) { 456 if (check_template(p, i)) 457 break; 458 } 459 460 ret = add_template(p, i); 461 if (ret) 462 return ret; 463 464 return 0; 465} 466 467/** 468 * sw842_compress 469 * 470 * Compress the uncompressed buffer of length @ilen at @in to the output buffer 471 * @out, using no more than @olen bytes, using the 842 compression format. 472 * 473 * Returns: 0 on success, error on failure. The @olen parameter 474 * will contain the number of output bytes written on success, or 475 * 0 on error. 476 */ 477int sw842_compress(const u8 *in, unsigned int ilen, 478 u8 *out, unsigned int *olen, void *wmem) 479{ 480 struct sw842_param *p = (struct sw842_param *)wmem; 481 int ret; 482 u64 last, next, pad, total; 483 u8 repeat_count = 0; 484 u32 crc; 485 486 BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS); 487 488 init_hashtable_nodes(p, 8); 489 init_hashtable_nodes(p, 4); 490 init_hashtable_nodes(p, 2); 491 492 p->in = (u8 *)in; 493 p->instart = p->in; 494 p->ilen = ilen; 495 p->out = out; 496 p->olen = *olen; 497 p->bit = 0; 498 499 total = p->olen; 500 501 *olen = 0; 502 503 /* if using strict mode, we can only compress a multiple of 8 */ 504 if (sw842_strict && (ilen % 8)) { 505 pr_err("Using strict mode, can't compress len %d\n", ilen); 506 return -EINVAL; 507 } 508 509 /* let's compress at least 8 bytes, mkay? */ 510 if (unlikely(ilen < 8)) 511 goto skip_comp; 512 513 /* make initial 'last' different so we don't match the first time */ 514 last = ~get_unaligned((u64 *)p->in); 515 516 while (p->ilen > 7) { 517 next = get_unaligned((u64 *)p->in); 518 519 /* must get the next data, as we need to update the hashtable 520 * entries with the new data every time 521 */ 522 get_next_data(p); 523 524 /* we don't care about endianness in last or next; 525 * we're just comparing 8 bytes to another 8 bytes, 526 * they're both the same endianness 527 */ 528 if (next == last) { 529 /* repeat count bits are 0-based, so we stop at +1 */ 530 if (++repeat_count <= REPEAT_BITS_MAX) 531 goto repeat; 532 } 533 if (repeat_count) { 534 ret = add_repeat_template(p, repeat_count); 535 repeat_count = 0; 536 if (next == last) /* reached max repeat bits */ 537 goto repeat; 538 } 539 540 if (next == 0) 541 ret = add_zeros_template(p); 542 else 543 ret = process_next(p); 544 545 if (ret) 546 return ret; 547 548repeat: 549 last = next; 550 update_hashtables(p); 551 p->in += 8; 552 p->ilen -= 8; 553 } 554 555 if (repeat_count) { 556 ret = add_repeat_template(p, repeat_count); 557 if (ret) 558 return ret; 559 } 560 561skip_comp: 562 if (p->ilen > 0) { 563 ret = add_short_data_template(p, p->ilen); 564 if (ret) 565 return ret; 566 567 p->in += p->ilen; 568 p->ilen = 0; 569 } 570 571 ret = add_end_template(p); 572 if (ret) 573 return ret; 574 575 /* 576 * crc(0:31) is appended to target data starting with the next 577 * bit after End of stream template. 578 * nx842 calculates CRC for data in big-endian format. So doing 579 * same here so that sw842 decompression can be used for both 580 * compressed data. 581 */ 582 crc = crc32_be(0, in, ilen); 583 ret = add_bits(p, crc, CRC_BITS); 584 if (ret) 585 return ret; 586 587 if (p->bit) { 588 p->out++; 589 p->olen--; 590 p->bit = 0; 591 } 592 593 /* pad compressed length to multiple of 8 */ 594 pad = (8 - ((total - p->olen) % 8)) % 8; 595 if (pad) { 596 if (pad > p->olen) /* we were so close! */ 597 return -ENOSPC; 598 memset(p->out, 0, pad); 599 p->out += pad; 600 p->olen -= pad; 601 } 602 603 if (unlikely((total - p->olen) > UINT_MAX)) 604 return -ENOSPC; 605 606 *olen = total - p->olen; 607 608 return 0; 609} 610EXPORT_SYMBOL_GPL(sw842_compress); 611 612static int __init sw842_init(void) 613{ 614 if (sw842_template_counts) 615 sw842_debugfs_create(); 616 617 return 0; 618} 619module_init(sw842_init); 620 621static void __exit sw842_exit(void) 622{ 623 if (sw842_template_counts) 624 sw842_debugfs_remove(); 625} 626module_exit(sw842_exit); 627 628MODULE_LICENSE("GPL"); 629MODULE_DESCRIPTION("Software 842 Compressor"); 630MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");