decompress_unlzma.c (16208B)
1/* Lzma decompressor for Linux kernel. Shamelessly snarfed 2 *from busybox 1.1.1 3 * 4 *Linux kernel adaptation 5 *Copyright (C) 2006 Alain < alain@knaff.lu > 6 * 7 *Based on small lzma deflate implementation/Small range coder 8 *implementation for lzma. 9 *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > 10 * 11 *Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/) 12 *Copyright (C) 1999-2005 Igor Pavlov 13 * 14 *Copyrights of the parts, see headers below. 15 * 16 * 17 *This program is free software; you can redistribute it and/or 18 *modify it under the terms of the GNU Lesser General Public 19 *License as published by the Free Software Foundation; either 20 *version 2.1 of the License, or (at your option) any later version. 21 * 22 *This program is distributed in the hope that it will be useful, 23 *but WITHOUT ANY WARRANTY; without even the implied warranty of 24 *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 25 *Lesser General Public License for more details. 26 * 27 *You should have received a copy of the GNU Lesser General Public 28 *License along with this library; if not, write to the Free Software 29 *Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 30 */ 31 32#ifdef STATIC 33#define PREBOOT 34#else 35#include <linux/decompress/unlzma.h> 36#endif /* STATIC */ 37 38#include <linux/decompress/mm.h> 39 40#define MIN(a, b) (((a) < (b)) ? (a) : (b)) 41 42static long long INIT read_int(unsigned char *ptr, int size) 43{ 44 int i; 45 long long ret = 0; 46 47 for (i = 0; i < size; i++) 48 ret = (ret << 8) | ptr[size-i-1]; 49 return ret; 50} 51 52#define ENDIAN_CONVERT(x) \ 53 x = (typeof(x))read_int((unsigned char *)&x, sizeof(x)) 54 55 56/* Small range coder implementation for lzma. 57 *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > 58 * 59 *Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/) 60 *Copyright (c) 1999-2005 Igor Pavlov 61 */ 62 63#include <linux/compiler.h> 64 65#define LZMA_IOBUF_SIZE 0x10000 66 67struct rc { 68 long (*fill)(void*, unsigned long); 69 uint8_t *ptr; 70 uint8_t *buffer; 71 uint8_t *buffer_end; 72 long buffer_size; 73 uint32_t code; 74 uint32_t range; 75 uint32_t bound; 76 void (*error)(char *); 77}; 78 79 80#define RC_TOP_BITS 24 81#define RC_MOVE_BITS 5 82#define RC_MODEL_TOTAL_BITS 11 83 84 85static long INIT nofill(void *buffer, unsigned long len) 86{ 87 return -1; 88} 89 90/* Called twice: once at startup and once in rc_normalize() */ 91static void INIT rc_read(struct rc *rc) 92{ 93 rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE); 94 if (rc->buffer_size <= 0) 95 rc->error("unexpected EOF"); 96 rc->ptr = rc->buffer; 97 rc->buffer_end = rc->buffer + rc->buffer_size; 98} 99 100/* Called once */ 101static inline void INIT rc_init(struct rc *rc, 102 long (*fill)(void*, unsigned long), 103 char *buffer, long buffer_size) 104{ 105 if (fill) 106 rc->fill = fill; 107 else 108 rc->fill = nofill; 109 rc->buffer = (uint8_t *)buffer; 110 rc->buffer_size = buffer_size; 111 rc->buffer_end = rc->buffer + rc->buffer_size; 112 rc->ptr = rc->buffer; 113 114 rc->code = 0; 115 rc->range = 0xFFFFFFFF; 116} 117 118static inline void INIT rc_init_code(struct rc *rc) 119{ 120 int i; 121 122 for (i = 0; i < 5; i++) { 123 if (rc->ptr >= rc->buffer_end) 124 rc_read(rc); 125 rc->code = (rc->code << 8) | *rc->ptr++; 126 } 127} 128 129 130/* Called twice, but one callsite is in inline'd rc_is_bit_0_helper() */ 131static void INIT rc_do_normalize(struct rc *rc) 132{ 133 if (rc->ptr >= rc->buffer_end) 134 rc_read(rc); 135 rc->range <<= 8; 136 rc->code = (rc->code << 8) | *rc->ptr++; 137} 138static inline void INIT rc_normalize(struct rc *rc) 139{ 140 if (rc->range < (1 << RC_TOP_BITS)) 141 rc_do_normalize(rc); 142} 143 144/* Called 9 times */ 145/* Why rc_is_bit_0_helper exists? 146 *Because we want to always expose (rc->code < rc->bound) to optimizer 147 */ 148static inline uint32_t INIT rc_is_bit_0_helper(struct rc *rc, uint16_t *p) 149{ 150 rc_normalize(rc); 151 rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); 152 return rc->bound; 153} 154static inline int INIT rc_is_bit_0(struct rc *rc, uint16_t *p) 155{ 156 uint32_t t = rc_is_bit_0_helper(rc, p); 157 return rc->code < t; 158} 159 160/* Called ~10 times, but very small, thus inlined */ 161static inline void INIT rc_update_bit_0(struct rc *rc, uint16_t *p) 162{ 163 rc->range = rc->bound; 164 *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; 165} 166static inline void INIT rc_update_bit_1(struct rc *rc, uint16_t *p) 167{ 168 rc->range -= rc->bound; 169 rc->code -= rc->bound; 170 *p -= *p >> RC_MOVE_BITS; 171} 172 173/* Called 4 times in unlzma loop */ 174static int INIT rc_get_bit(struct rc *rc, uint16_t *p, int *symbol) 175{ 176 if (rc_is_bit_0(rc, p)) { 177 rc_update_bit_0(rc, p); 178 *symbol *= 2; 179 return 0; 180 } else { 181 rc_update_bit_1(rc, p); 182 *symbol = *symbol * 2 + 1; 183 return 1; 184 } 185} 186 187/* Called once */ 188static inline int INIT rc_direct_bit(struct rc *rc) 189{ 190 rc_normalize(rc); 191 rc->range >>= 1; 192 if (rc->code >= rc->range) { 193 rc->code -= rc->range; 194 return 1; 195 } 196 return 0; 197} 198 199/* Called twice */ 200static inline void INIT 201rc_bit_tree_decode(struct rc *rc, uint16_t *p, int num_levels, int *symbol) 202{ 203 int i = num_levels; 204 205 *symbol = 1; 206 while (i--) 207 rc_get_bit(rc, p + *symbol, symbol); 208 *symbol -= 1 << num_levels; 209} 210 211 212/* 213 * Small lzma deflate implementation. 214 * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > 215 * 216 * Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/) 217 * Copyright (C) 1999-2005 Igor Pavlov 218 */ 219 220 221struct lzma_header { 222 uint8_t pos; 223 uint32_t dict_size; 224 uint64_t dst_size; 225} __attribute__ ((packed)) ; 226 227 228#define LZMA_BASE_SIZE 1846 229#define LZMA_LIT_SIZE 768 230 231#define LZMA_NUM_POS_BITS_MAX 4 232 233#define LZMA_LEN_NUM_LOW_BITS 3 234#define LZMA_LEN_NUM_MID_BITS 3 235#define LZMA_LEN_NUM_HIGH_BITS 8 236 237#define LZMA_LEN_CHOICE 0 238#define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1) 239#define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1) 240#define LZMA_LEN_MID (LZMA_LEN_LOW \ 241 + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))) 242#define LZMA_LEN_HIGH (LZMA_LEN_MID \ 243 +(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))) 244#define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)) 245 246#define LZMA_NUM_STATES 12 247#define LZMA_NUM_LIT_STATES 7 248 249#define LZMA_START_POS_MODEL_INDEX 4 250#define LZMA_END_POS_MODEL_INDEX 14 251#define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1)) 252 253#define LZMA_NUM_POS_SLOT_BITS 6 254#define LZMA_NUM_LEN_TO_POS_STATES 4 255 256#define LZMA_NUM_ALIGN_BITS 4 257 258#define LZMA_MATCH_MIN_LEN 2 259 260#define LZMA_IS_MATCH 0 261#define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) 262#define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES) 263#define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES) 264#define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES) 265#define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES) 266#define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \ 267 + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) 268#define LZMA_SPEC_POS (LZMA_POS_SLOT \ 269 +(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)) 270#define LZMA_ALIGN (LZMA_SPEC_POS \ 271 + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX) 272#define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)) 273#define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS) 274#define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS) 275 276 277struct writer { 278 uint8_t *buffer; 279 uint8_t previous_byte; 280 size_t buffer_pos; 281 int bufsize; 282 size_t global_pos; 283 long (*flush)(void*, unsigned long); 284 struct lzma_header *header; 285}; 286 287struct cstate { 288 int state; 289 uint32_t rep0, rep1, rep2, rep3; 290}; 291 292static inline size_t INIT get_pos(struct writer *wr) 293{ 294 return 295 wr->global_pos + wr->buffer_pos; 296} 297 298static inline uint8_t INIT peek_old_byte(struct writer *wr, 299 uint32_t offs) 300{ 301 if (!wr->flush) { 302 int32_t pos; 303 while (offs > wr->header->dict_size) 304 offs -= wr->header->dict_size; 305 pos = wr->buffer_pos - offs; 306 return wr->buffer[pos]; 307 } else { 308 uint32_t pos = wr->buffer_pos - offs; 309 while (pos >= wr->header->dict_size) 310 pos += wr->header->dict_size; 311 return wr->buffer[pos]; 312 } 313 314} 315 316static inline int INIT write_byte(struct writer *wr, uint8_t byte) 317{ 318 wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte; 319 if (wr->flush && wr->buffer_pos == wr->header->dict_size) { 320 wr->buffer_pos = 0; 321 wr->global_pos += wr->header->dict_size; 322 if (wr->flush((char *)wr->buffer, wr->header->dict_size) 323 != wr->header->dict_size) 324 return -1; 325 } 326 return 0; 327} 328 329 330static inline int INIT copy_byte(struct writer *wr, uint32_t offs) 331{ 332 return write_byte(wr, peek_old_byte(wr, offs)); 333} 334 335static inline int INIT copy_bytes(struct writer *wr, 336 uint32_t rep0, int len) 337{ 338 do { 339 if (copy_byte(wr, rep0)) 340 return -1; 341 len--; 342 } while (len != 0 && wr->buffer_pos < wr->header->dst_size); 343 344 return len; 345} 346 347static inline int INIT process_bit0(struct writer *wr, struct rc *rc, 348 struct cstate *cst, uint16_t *p, 349 int pos_state, uint16_t *prob, 350 int lc, uint32_t literal_pos_mask) { 351 int mi = 1; 352 rc_update_bit_0(rc, prob); 353 prob = (p + LZMA_LITERAL + 354 (LZMA_LIT_SIZE 355 * (((get_pos(wr) & literal_pos_mask) << lc) 356 + (wr->previous_byte >> (8 - lc)))) 357 ); 358 359 if (cst->state >= LZMA_NUM_LIT_STATES) { 360 int match_byte = peek_old_byte(wr, cst->rep0); 361 do { 362 int bit; 363 uint16_t *prob_lit; 364 365 match_byte <<= 1; 366 bit = match_byte & 0x100; 367 prob_lit = prob + 0x100 + bit + mi; 368 if (rc_get_bit(rc, prob_lit, &mi)) { 369 if (!bit) 370 break; 371 } else { 372 if (bit) 373 break; 374 } 375 } while (mi < 0x100); 376 } 377 while (mi < 0x100) { 378 uint16_t *prob_lit = prob + mi; 379 rc_get_bit(rc, prob_lit, &mi); 380 } 381 if (cst->state < 4) 382 cst->state = 0; 383 else if (cst->state < 10) 384 cst->state -= 3; 385 else 386 cst->state -= 6; 387 388 return write_byte(wr, mi); 389} 390 391static inline int INIT process_bit1(struct writer *wr, struct rc *rc, 392 struct cstate *cst, uint16_t *p, 393 int pos_state, uint16_t *prob) { 394 int offset; 395 uint16_t *prob_len; 396 int num_bits; 397 int len; 398 399 rc_update_bit_1(rc, prob); 400 prob = p + LZMA_IS_REP + cst->state; 401 if (rc_is_bit_0(rc, prob)) { 402 rc_update_bit_0(rc, prob); 403 cst->rep3 = cst->rep2; 404 cst->rep2 = cst->rep1; 405 cst->rep1 = cst->rep0; 406 cst->state = cst->state < LZMA_NUM_LIT_STATES ? 0 : 3; 407 prob = p + LZMA_LEN_CODER; 408 } else { 409 rc_update_bit_1(rc, prob); 410 prob = p + LZMA_IS_REP_G0 + cst->state; 411 if (rc_is_bit_0(rc, prob)) { 412 rc_update_bit_0(rc, prob); 413 prob = (p + LZMA_IS_REP_0_LONG 414 + (cst->state << 415 LZMA_NUM_POS_BITS_MAX) + 416 pos_state); 417 if (rc_is_bit_0(rc, prob)) { 418 rc_update_bit_0(rc, prob); 419 420 cst->state = cst->state < LZMA_NUM_LIT_STATES ? 421 9 : 11; 422 return copy_byte(wr, cst->rep0); 423 } else { 424 rc_update_bit_1(rc, prob); 425 } 426 } else { 427 uint32_t distance; 428 429 rc_update_bit_1(rc, prob); 430 prob = p + LZMA_IS_REP_G1 + cst->state; 431 if (rc_is_bit_0(rc, prob)) { 432 rc_update_bit_0(rc, prob); 433 distance = cst->rep1; 434 } else { 435 rc_update_bit_1(rc, prob); 436 prob = p + LZMA_IS_REP_G2 + cst->state; 437 if (rc_is_bit_0(rc, prob)) { 438 rc_update_bit_0(rc, prob); 439 distance = cst->rep2; 440 } else { 441 rc_update_bit_1(rc, prob); 442 distance = cst->rep3; 443 cst->rep3 = cst->rep2; 444 } 445 cst->rep2 = cst->rep1; 446 } 447 cst->rep1 = cst->rep0; 448 cst->rep0 = distance; 449 } 450 cst->state = cst->state < LZMA_NUM_LIT_STATES ? 8 : 11; 451 prob = p + LZMA_REP_LEN_CODER; 452 } 453 454 prob_len = prob + LZMA_LEN_CHOICE; 455 if (rc_is_bit_0(rc, prob_len)) { 456 rc_update_bit_0(rc, prob_len); 457 prob_len = (prob + LZMA_LEN_LOW 458 + (pos_state << 459 LZMA_LEN_NUM_LOW_BITS)); 460 offset = 0; 461 num_bits = LZMA_LEN_NUM_LOW_BITS; 462 } else { 463 rc_update_bit_1(rc, prob_len); 464 prob_len = prob + LZMA_LEN_CHOICE_2; 465 if (rc_is_bit_0(rc, prob_len)) { 466 rc_update_bit_0(rc, prob_len); 467 prob_len = (prob + LZMA_LEN_MID 468 + (pos_state << 469 LZMA_LEN_NUM_MID_BITS)); 470 offset = 1 << LZMA_LEN_NUM_LOW_BITS; 471 num_bits = LZMA_LEN_NUM_MID_BITS; 472 } else { 473 rc_update_bit_1(rc, prob_len); 474 prob_len = prob + LZMA_LEN_HIGH; 475 offset = ((1 << LZMA_LEN_NUM_LOW_BITS) 476 + (1 << LZMA_LEN_NUM_MID_BITS)); 477 num_bits = LZMA_LEN_NUM_HIGH_BITS; 478 } 479 } 480 481 rc_bit_tree_decode(rc, prob_len, num_bits, &len); 482 len += offset; 483 484 if (cst->state < 4) { 485 int pos_slot; 486 487 cst->state += LZMA_NUM_LIT_STATES; 488 prob = 489 p + LZMA_POS_SLOT + 490 ((len < 491 LZMA_NUM_LEN_TO_POS_STATES ? len : 492 LZMA_NUM_LEN_TO_POS_STATES - 1) 493 << LZMA_NUM_POS_SLOT_BITS); 494 rc_bit_tree_decode(rc, prob, 495 LZMA_NUM_POS_SLOT_BITS, 496 &pos_slot); 497 if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { 498 int i, mi; 499 num_bits = (pos_slot >> 1) - 1; 500 cst->rep0 = 2 | (pos_slot & 1); 501 if (pos_slot < LZMA_END_POS_MODEL_INDEX) { 502 cst->rep0 <<= num_bits; 503 prob = p + LZMA_SPEC_POS + 504 cst->rep0 - pos_slot - 1; 505 } else { 506 num_bits -= LZMA_NUM_ALIGN_BITS; 507 while (num_bits--) 508 cst->rep0 = (cst->rep0 << 1) | 509 rc_direct_bit(rc); 510 prob = p + LZMA_ALIGN; 511 cst->rep0 <<= LZMA_NUM_ALIGN_BITS; 512 num_bits = LZMA_NUM_ALIGN_BITS; 513 } 514 i = 1; 515 mi = 1; 516 while (num_bits--) { 517 if (rc_get_bit(rc, prob + mi, &mi)) 518 cst->rep0 |= i; 519 i <<= 1; 520 } 521 } else 522 cst->rep0 = pos_slot; 523 if (++(cst->rep0) == 0) 524 return 0; 525 if (cst->rep0 > wr->header->dict_size 526 || cst->rep0 > get_pos(wr)) 527 return -1; 528 } 529 530 len += LZMA_MATCH_MIN_LEN; 531 532 return copy_bytes(wr, cst->rep0, len); 533} 534 535 536 537STATIC inline int INIT unlzma(unsigned char *buf, long in_len, 538 long (*fill)(void*, unsigned long), 539 long (*flush)(void*, unsigned long), 540 unsigned char *output, 541 long *posp, 542 void(*error)(char *x) 543 ) 544{ 545 struct lzma_header header; 546 int lc, pb, lp; 547 uint32_t pos_state_mask; 548 uint32_t literal_pos_mask; 549 uint16_t *p; 550 int num_probs; 551 struct rc rc; 552 int i, mi; 553 struct writer wr; 554 struct cstate cst; 555 unsigned char *inbuf; 556 int ret = -1; 557 558 rc.error = error; 559 560 if (buf) 561 inbuf = buf; 562 else 563 inbuf = malloc(LZMA_IOBUF_SIZE); 564 if (!inbuf) { 565 error("Could not allocate input buffer"); 566 goto exit_0; 567 } 568 569 cst.state = 0; 570 cst.rep0 = cst.rep1 = cst.rep2 = cst.rep3 = 1; 571 572 wr.header = &header; 573 wr.flush = flush; 574 wr.global_pos = 0; 575 wr.previous_byte = 0; 576 wr.buffer_pos = 0; 577 578 rc_init(&rc, fill, inbuf, in_len); 579 580 for (i = 0; i < sizeof(header); i++) { 581 if (rc.ptr >= rc.buffer_end) 582 rc_read(&rc); 583 ((unsigned char *)&header)[i] = *rc.ptr++; 584 } 585 586 if (header.pos >= (9 * 5 * 5)) { 587 error("bad header"); 588 goto exit_1; 589 } 590 591 mi = 0; 592 lc = header.pos; 593 while (lc >= 9) { 594 mi++; 595 lc -= 9; 596 } 597 pb = 0; 598 lp = mi; 599 while (lp >= 5) { 600 pb++; 601 lp -= 5; 602 } 603 pos_state_mask = (1 << pb) - 1; 604 literal_pos_mask = (1 << lp) - 1; 605 606 ENDIAN_CONVERT(header.dict_size); 607 ENDIAN_CONVERT(header.dst_size); 608 609 if (header.dict_size == 0) 610 header.dict_size = 1; 611 612 if (output) 613 wr.buffer = output; 614 else { 615 wr.bufsize = MIN(header.dst_size, header.dict_size); 616 wr.buffer = large_malloc(wr.bufsize); 617 } 618 if (wr.buffer == NULL) 619 goto exit_1; 620 621 num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); 622 p = (uint16_t *) large_malloc(num_probs * sizeof(*p)); 623 if (p == NULL) 624 goto exit_2; 625 num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); 626 for (i = 0; i < num_probs; i++) 627 p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; 628 629 rc_init_code(&rc); 630 631 while (get_pos(&wr) < header.dst_size) { 632 int pos_state = get_pos(&wr) & pos_state_mask; 633 uint16_t *prob = p + LZMA_IS_MATCH + 634 (cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state; 635 if (rc_is_bit_0(&rc, prob)) { 636 if (process_bit0(&wr, &rc, &cst, p, pos_state, prob, 637 lc, literal_pos_mask)) { 638 error("LZMA data is corrupt"); 639 goto exit_3; 640 } 641 } else { 642 if (process_bit1(&wr, &rc, &cst, p, pos_state, prob)) { 643 error("LZMA data is corrupt"); 644 goto exit_3; 645 } 646 if (cst.rep0 == 0) 647 break; 648 } 649 if (rc.buffer_size <= 0) 650 goto exit_3; 651 } 652 653 if (posp) 654 *posp = rc.ptr-rc.buffer; 655 if (!wr.flush || wr.flush(wr.buffer, wr.buffer_pos) == wr.buffer_pos) 656 ret = 0; 657exit_3: 658 large_free(p); 659exit_2: 660 if (!output) 661 large_free(wr.buffer); 662exit_1: 663 if (!buf) 664 free(inbuf); 665exit_0: 666 return ret; 667} 668 669#ifdef PREBOOT 670STATIC int INIT __decompress(unsigned char *buf, long in_len, 671 long (*fill)(void*, unsigned long), 672 long (*flush)(void*, unsigned long), 673 unsigned char *output, long out_len, 674 long *posp, 675 void (*error)(char *x)) 676{ 677 return unlzma(buf, in_len - 4, fill, flush, output, posp, error); 678} 679#endif