gbcompress.c (11647B)
1// This is free and unencumbered software released into the public domain. 2// For more information, please refer to <https://unlicense.org> 3// bbbbbr 2020 4 5#include <stdio.h> 6//#include <string.h> 7#include <stdlib.h> 8#include <stdbool.h> 9#include <stdint.h> 10#include "gbcompress.h" 11 12const uint8_t len_mask = 0x3F; 13const uint8_t token_mask = 0xC0; 14 15#define token_byte 0x00 16#define token_word 0x40 17#define token_str 0x80 18#define token_trash 0xC0 19 20const uint8_t EOFMarker = 0x00; 21 22uint8_t * FinBuf = NULL; 23uint32_t Fsize_in = 0; 24uint32_t FinIndex = 0; 25 26uint8_t ** pp_FoutBuf = NULL; 27uint8_t * FoutBuf = NULL; 28uint32_t Fsize_out = 0; 29uint32_t FoutIndex = 0; 30 31 32static void check_write_size(uint8_t len) { 33 34 // Grow output buffer if needed 35 if ((FoutIndex + len) >= Fsize_out) { 36 uint8_t * p_tmp = *pp_FoutBuf; 37 38 // Reallocate to twice as large 39 Fsize_out = Fsize_out * 2; 40 *pp_FoutBuf = (void *)realloc(*pp_FoutBuf, Fsize_out); 41 42 // If realloc failed, free original buffer before quitting 43 if (!(*pp_FoutBuf)) { 44 printf("Error: Failed to grow memory for output buffer!\n"); 45 if (p_tmp) free(p_tmp); 46 p_tmp = NULL; 47 exit(EXIT_FAILURE); 48 } else 49 FoutBuf = *pp_FoutBuf; // Update working pointer 50 } 51} 52 53 54static void write_byte(uint8_t len, uint8_t data) { 55 56 check_write_size(2); // writing 2 bytes 57 58 FoutBuf[FoutIndex++] = ((len - 1) & len_mask); 59 FoutBuf[FoutIndex++] = data; 60} 61 62 63static void write_word( uint8_t len, uint16_t data ) { 64 65 check_write_size(3); // writing 3 bytes 66 67 FoutBuf[FoutIndex++] = (((len - 1) & len_mask) | token_word); 68 FoutBuf[FoutIndex++] = (uint8_t)((data >> 8) & 0xFF); 69 FoutBuf[FoutIndex++] = (uint8_t)(data & 0xFF); 70} 71 72 73static void write_string( uint8_t len, uint16_t data) { 74 75 check_write_size(3); // writing 3 bytes 76 77 // conver's complement does not give the negation, see ยง Most negative number below. t back-ref offset from positive unsigned to negative signed 78 data = (data ^ 0xFFFF) + 1; 79 80 FoutBuf[FoutIndex++] = (((len - 1) & len_mask) | token_str); 81 FoutBuf[FoutIndex++] = (uint8_t)(data & 0xFF); 82 FoutBuf[FoutIndex++] = (uint8_t)((data >> 8) & 0xFF); 83} 84 85 86static void write_trash( uint8_t len, uint8_t * pos) { 87 88 uint8_t i; 89 90 check_write_size(len); // writing len bytes 91 92 FoutBuf[FoutIndex++] = (((len-1) & len_mask) | token_trash); 93 for (i=0; i < len; i++) 94 FoutBuf[FoutIndex++] = pos[i]; 95} 96 97 98static void write_end(void) { 99 100 check_write_size(1); // writing 1 byte 101 102 FoutBuf[FoutIndex++] = EOFMarker; 103} 104 105 106static int read_uint16_t(uint32_t byte_pos, uint16_t * out_data) { 107 108 if ((byte_pos + 2) < Fsize_in) { 109 *out_data = (uint16_t)((FinBuf[byte_pos] << 8) + (uint16_t)FinBuf[byte_pos+1]); 110 return true; 111 } 112 else return false; 113} 114 115 116// Writes out any pending "trash" (non-rle sequence) and resets the counter 117static void flush_trash(uint32_t * byte_pos, uint32_t * trash_len) { 118 119 if (*trash_len > 0) { 120 write_trash(*trash_len, &FinBuf[*byte_pos - *trash_len]); 121 *trash_len = 0; 122 } 123} 124 125 126// Convert buffer inBuf to gbcompress rle encoding and write out to outBuf 127// Returns converted length 128uint32_t gbcompress_buf(uint8_t * inBuf, uint32_t size_in, uint8_t ** pp_outBuf, uint32_t size_out) { 129 130 uint8_t rle_u8_match; // x 131 uint16_t rle_u16_match; // y 132 133 uint32_t rle_u8_len; // r_rb 134 uint32_t rle_u16_len; // r_rw 135 uint32_t rle_str_len; // r_rs 136 uint32_t trash_len; // tb (by "trash" the original author meant, "non-rle sequence of bytes") 137 138 uint32_t rle_str_start; // rr 139 uint32_t rle_str_back_offset; // sr (this is signed in original code, handled differently to be unsigned now) 140 uint32_t rle_str_len_work; // rl 141 142 FinBuf = inBuf; 143 Fsize_in = size_in; 144 FinIndex = 0; // bp 145 146 pp_FoutBuf = pp_outBuf; 147 FoutBuf = *pp_outBuf; 148 Fsize_out = size_out; 149 FoutIndex = 0; 150 151 trash_len = 0; 152 153 while (FinIndex < Fsize_in) { 154 155 // Check for u8 RLE run up to 63 bytes max 156 rle_u8_match = FinBuf[FinIndex]; 157 rle_u8_len = 1; 158 while ((FinIndex + rle_u8_len) < Fsize_in) { 159 // If the current u8 matches, increment the length 160 if ((FinBuf[FinIndex + rle_u8_len] == rle_u8_match) && 161 (rle_u8_len < 64)) { 162 rle_u8_len++; 163 } 164 else break; 165 } 166 167 // Check for overlapping uint16_t RLE run up to 63 bytes max 168 // Read in initial u16 to match against 169 if (read_uint16_t(FinIndex, &rle_u16_match)) { 170 uint16_t temp_u16; 171 rle_u16_len = 1; 172 // If the current u16 matches, increment the length 173 while (read_uint16_t(FinIndex + (rle_u16_len * 2), &temp_u16)) { 174 if ((temp_u16 == rle_u16_match) && 175 (rle_u16_len < 64)) { 176 rle_u16_len++; 177 } 178 else break; 179 } 180 } 181 182 // Check for matching sequences starting at current position 183 // against all previous data beginning at start up to 63 bytes max 184 // (back reference "strings") 185 rle_str_back_offset = 0; 186 rle_str_len = 0; 187 // Max string backreference length is 16 bits unsigned 188 // adjust search start accordingly 189 if (FinIndex > 0xFFFF) 190 rle_str_start = FinIndex - 0xFFFF; 191 else 192 rle_str_start = 0; 193 194 while (rle_str_start < FinIndex) { 195 rle_str_len_work = 0; 196 197 // Check for a matching run at rle_str_start against FinIndex 198 // and save length to rle_str_len_work 199 while ((FinIndex + rle_str_len_work) < Fsize_in) { 200 // Test to see if u8 in current sequence matches the u8 for a previous sequence 201 // Break out of the current sequence if it would reach 64 bytes 202 // or it would reach the current byte pos (and cause an overlap) 203 if ((FinBuf[rle_str_start + rle_str_len_work] == FinBuf[FinIndex + rle_str_len_work]) && 204 ((rle_str_start + rle_str_len_work) < FinIndex) && 205 (rle_str_len_work < 64)) { 206 207 rle_str_len_work++; 208 } 209 else break; 210 } 211 212 // If the newly tested sequence length is greater than the previous length 213 // then save the length and store an negative offset to it in rle_str_back_offset 214 if (rle_str_len_work > rle_str_len) { 215 rle_str_back_offset = FinIndex - rle_str_start; // Changed this from neg offset to pos, and gets flipped to negative on writing it out 216 rle_str_len = rle_str_len_work; 217 } 218 219 rle_str_start++; 220 } 221 222 223 // Write out any rle data if it's ready 224 if ((rle_u8_len > 2) && 225 (rle_u8_len > rle_u16_len) && 226 (rle_u8_len > rle_str_len)) { 227 flush_trash(&FinIndex, &trash_len); 228 write_byte(rle_u8_len, rle_u8_match); 229 FinIndex = FinIndex + rle_u8_len; 230 } 231 else if ((rle_u16_len > 2) && 232 ((rle_u16_len*2) > rle_str_len)) { 233 flush_trash(&FinIndex, &trash_len); 234 write_word(rle_u16_len, rle_u16_match); 235 FinIndex = FinIndex + rle_u16_len*2; 236 } 237 else if (rle_str_len > 3) { 238 flush_trash(&FinIndex, &trash_len); 239 write_string(rle_str_len, rle_str_back_offset); 240 FinIndex = FinIndex + rle_str_len; 241 } 242 else if (trash_len >= 64) { 243 write_trash(trash_len, &FinBuf[FinIndex-trash_len]); 244 trash_len = 0; 245 } 246 else { 247 trash_len++; 248 FinIndex++; 249 } 250 251 } 252 253 // Flush any remaining "trash" bytes 254 flush_trash(&FinIndex, &trash_len); 255 256 write_end(); 257 258 return FoutIndex; 259} 260 261 262 263static void write_single_byte(uint8_t data) { 264 265 check_write_size(1); 266 267 FoutBuf[FoutIndex++] = data; 268} 269 270 271static uint8_t read_single_byte(void) { 272 273 if (FinIndex >= Fsize_in) { 274 printf("Error: Read past end of input buffer!\n"); 275 exit(EXIT_FAILURE); 276 } 277 278 return (FinBuf[FinIndex++]); 279} 280 281 282// Decompress buffer inBuf from gbcompress rle encoding and write to outBuf 283// Returns converted length 284uint32_t gbdecompress_buf(uint8_t * inBuf, uint32_t size_in, uint8_t ** pp_outBuf, uint32_t size_out) { 285 286 uint8_t rle_val[2]; 287 uint8_t token; 288 uint8_t rle_toggle; 289 uint8_t * inBuf_end = inBuf + size_in - 1; 290 uint32_t rle_len; 291 uint32_t backref_index; 292 293 FinBuf = inBuf; 294 Fsize_in = size_in; 295 FinIndex = 0; 296 297 pp_FoutBuf = pp_outBuf; 298 FoutBuf = *pp_outBuf; 299 Fsize_out = size_out; 300 FoutIndex = 0; 301 302 303 while (FinIndex < size_in) { 304 305 token = read_single_byte(); 306 307 // Check for EOF token, exit if encountered 308 if (token == EOFMarker) 309 break; 310 311 // Read a RLE token 312 // First byte should always be a token 313 rle_len = (token & len_mask) + 1; 314 token = token & token_mask; 315 316 // Read how to handle ENCODED RLE Value 317 switch (token) { 318 case token_byte: 319 // Load only one byte 320 rle_val[0] = read_single_byte(); 321 break; 322 323 case token_word: 324 // If token is Word double the RLE decode length 325 rle_len *= 2; 326 rle_toggle = 0; // Reset RLE decoded word/byte flipflop 327 328 // Load LS byte then MS Byte 329 rle_val[0] = read_single_byte(); 330 rle_val[1] = read_single_byte(); 331 break; 332 333 case token_str: 334 // This token: copy N bytes from negative offset of current 335 // DECODED memory location (back reference) 336 backref_index = ((uint16_t)read_single_byte() | (( (uint16_t)read_single_byte() ) << 8)); 337 // Convert input from from Signed to Unsigned 338 backref_index = (backref_index ^ 0xFFFF) + 1; 339 340 // Assign the string back reference relative to the current buffer pointer 341 backref_index = (FoutIndex - (uint32_t)backref_index); 342 break; 343 344 case token_trash: // AKA "Trash Bytes" in GBTD 345 // This token : copy next N bytes directly from ENCODED input 346 break; 347 } 348 349 while (rle_len) { 350 351 // Copy the decoded byte into VRAM 352 switch (token) { 353 case token_byte: 354 // Copy from cached repeating RLE value 355 write_single_byte(rle_val[0]); 356 break; 357 358 case token_word: 359 // Copy from cached repeating RLE value 360 // Toggle between MS/LS bytes input values 361 write_single_byte(rle_val[rle_toggle]); 362 rle_toggle ^= 0x01; 363 break; 364 365 case token_str: 366 // Copy byte from the backreferenced VRAM address 367 // Then increment backreference to next VRAM byte 368 write_single_byte(FoutBuf[backref_index++]); 369 break; 370 371 case token_trash: 372 // Copy directly from encoded input 373 write_single_byte(read_single_byte()); 374 break; 375 } 376 377 rle_len--; // Decrement number of RLE bytes remaining 378 } 379 } 380 381 return FoutIndex; 382}