zstd_internal.h (15040B)
1/* 2 * Copyright (c) Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11#ifndef ZSTD_CCOMMON_H_MODULE 12#define ZSTD_CCOMMON_H_MODULE 13 14/* this module contains definitions which must be identical 15 * across compression, decompression and dictBuilder. 16 * It also contains a few functions useful to at least 2 of them 17 * and which benefit from being inlined */ 18 19/*-************************************* 20* Dependencies 21***************************************/ 22#include "compiler.h" 23#include "mem.h" 24#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ 25#include "error_private.h" 26#define ZSTD_STATIC_LINKING_ONLY 27#include <linux/zstd.h> 28#define FSE_STATIC_LINKING_ONLY 29#include "fse.h" 30#define HUF_STATIC_LINKING_ONLY 31#include "huf.h" 32#include <linux/xxhash.h> /* XXH_reset, update, digest */ 33#define ZSTD_TRACE 0 34 35 36/* ---- static assert (debug) --- */ 37#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) 38#define ZSTD_isError ERR_isError /* for inlining */ 39#define FSE_isError ERR_isError 40#define HUF_isError ERR_isError 41 42 43/*-************************************* 44* shared macros 45***************************************/ 46#undef MIN 47#undef MAX 48#define MIN(a,b) ((a)<(b) ? (a) : (b)) 49#define MAX(a,b) ((a)>(b) ? (a) : (b)) 50 51/* 52 * Ignore: this is an internal helper. 53 * 54 * This is a helper function to help force C99-correctness during compilation. 55 * Under strict compilation modes, variadic macro arguments can't be empty. 56 * However, variadic function arguments can be. Using a function therefore lets 57 * us statically check that at least one (string) argument was passed, 58 * independent of the compilation flags. 59 */ 60static INLINE_KEYWORD UNUSED_ATTR 61void _force_has_format_string(const char *format, ...) { 62 (void)format; 63} 64 65/* 66 * Ignore: this is an internal helper. 67 * 68 * We want to force this function invocation to be syntactically correct, but 69 * we don't want to force runtime evaluation of its arguments. 70 */ 71#define _FORCE_HAS_FORMAT_STRING(...) \ 72 if (0) { \ 73 _force_has_format_string(__VA_ARGS__); \ 74 } 75 76/* 77 * Return the specified error if the condition evaluates to true. 78 * 79 * In debug modes, prints additional information. 80 * In order to do that (particularly, printing the conditional that failed), 81 * this can't just wrap RETURN_ERROR(). 82 */ 83#define RETURN_ERROR_IF(cond, err, ...) \ 84 if (cond) { \ 85 RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ 86 __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ 87 _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ 88 RAWLOG(3, ": " __VA_ARGS__); \ 89 RAWLOG(3, "\n"); \ 90 return ERROR(err); \ 91 } 92 93/* 94 * Unconditionally return the specified error. 95 * 96 * In debug modes, prints additional information. 97 */ 98#define RETURN_ERROR(err, ...) \ 99 do { \ 100 RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ 101 __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ 102 _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ 103 RAWLOG(3, ": " __VA_ARGS__); \ 104 RAWLOG(3, "\n"); \ 105 return ERROR(err); \ 106 } while(0); 107 108/* 109 * If the provided expression evaluates to an error code, returns that error code. 110 * 111 * In debug modes, prints additional information. 112 */ 113#define FORWARD_IF_ERROR(err, ...) \ 114 do { \ 115 size_t const err_code = (err); \ 116 if (ERR_isError(err_code)) { \ 117 RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ 118 __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ 119 _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ 120 RAWLOG(3, ": " __VA_ARGS__); \ 121 RAWLOG(3, "\n"); \ 122 return err_code; \ 123 } \ 124 } while(0); 125 126 127/*-************************************* 128* Common constants 129***************************************/ 130#define ZSTD_OPT_NUM (1<<12) 131 132#define ZSTD_REP_NUM 3 /* number of repcodes */ 133#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) 134static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; 135 136#define KB *(1 <<10) 137#define MB *(1 <<20) 138#define GB *(1U<<30) 139 140#define BIT7 128 141#define BIT6 64 142#define BIT5 32 143#define BIT4 16 144#define BIT1 2 145#define BIT0 1 146 147#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 148static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; 149static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; 150 151#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ 152 153#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ 154static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; 155typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; 156 157#define ZSTD_FRAMECHECKSUMSIZE 4 158 159#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ 160#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ 161 162#define HufLog 12 163typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; 164 165#define LONGNBSEQ 0x7F00 166 167#define MINMATCH 3 168 169#define Litbits 8 170#define MaxLit ((1<<Litbits) - 1) 171#define MaxML 52 172#define MaxLL 35 173#define DefaultMaxOff 28 174#define MaxOff 31 175#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */ 176#define MLFSELog 9 177#define LLFSELog 9 178#define OffFSELog 8 179#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog) 180 181#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */ 182/* Each table cannot take more than #symbols * FSELog bits */ 183#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8) 184 185static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = { 186 0, 0, 0, 0, 0, 0, 0, 0, 187 0, 0, 0, 0, 0, 0, 0, 0, 188 1, 1, 1, 1, 2, 2, 3, 3, 189 4, 6, 7, 8, 9,10,11,12, 190 13,14,15,16 191}; 192static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = { 193 4, 3, 2, 2, 2, 2, 2, 2, 194 2, 2, 2, 2, 2, 1, 1, 1, 195 2, 2, 2, 2, 2, 2, 2, 2, 196 2, 3, 2, 1, 1, 1, 1, 1, 197 -1,-1,-1,-1 198}; 199#define LL_DEFAULTNORMLOG 6 /* for static allocation */ 200static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; 201 202static UNUSED_ATTR const U32 ML_bits[MaxML+1] = { 203 0, 0, 0, 0, 0, 0, 0, 0, 204 0, 0, 0, 0, 0, 0, 0, 0, 205 0, 0, 0, 0, 0, 0, 0, 0, 206 0, 0, 0, 0, 0, 0, 0, 0, 207 1, 1, 1, 1, 2, 2, 3, 3, 208 4, 4, 5, 7, 8, 9,10,11, 209 12,13,14,15,16 210}; 211static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = { 212 1, 4, 3, 2, 2, 2, 2, 2, 213 2, 1, 1, 1, 1, 1, 1, 1, 214 1, 1, 1, 1, 1, 1, 1, 1, 215 1, 1, 1, 1, 1, 1, 1, 1, 216 1, 1, 1, 1, 1, 1, 1, 1, 217 1, 1, 1, 1, 1, 1,-1,-1, 218 -1,-1,-1,-1,-1 219}; 220#define ML_DEFAULTNORMLOG 6 /* for static allocation */ 221static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG; 222 223static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = { 224 1, 1, 1, 1, 1, 1, 2, 2, 225 2, 1, 1, 1, 1, 1, 1, 1, 226 1, 1, 1, 1, 1, 1, 1, 1, 227 -1,-1,-1,-1,-1 228}; 229#define OF_DEFAULTNORMLOG 5 /* for static allocation */ 230static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; 231 232 233/*-******************************************* 234* Shared functions to include for inlining 235*********************************************/ 236static void ZSTD_copy8(void* dst, const void* src) { 237 ZSTD_memcpy(dst, src, 8); 238} 239 240#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } 241static void ZSTD_copy16(void* dst, const void* src) { 242 ZSTD_memcpy(dst, src, 16); 243} 244#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } 245 246#define WILDCOPY_OVERLENGTH 32 247#define WILDCOPY_VECLEN 16 248 249typedef enum { 250 ZSTD_no_overlap, 251 ZSTD_overlap_src_before_dst 252 /* ZSTD_overlap_dst_before_src, */ 253} ZSTD_overlap_e; 254 255/*! ZSTD_wildcopy() : 256 * Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0) 257 * @param ovtype controls the overlap detection 258 * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. 259 * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart. 260 * The src buffer must be before the dst buffer. 261 */ 262MEM_STATIC FORCE_INLINE_ATTR 263void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype) 264{ 265 ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; 266 const BYTE* ip = (const BYTE*)src; 267 BYTE* op = (BYTE*)dst; 268 BYTE* const oend = op + length; 269 270 assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); 271 272 if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { 273 /* Handle short offset copies. */ 274 do { 275 COPY8(op, ip) 276 } while (op < oend); 277 } else { 278 assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); 279 /* Separate out the first COPY16() call because the copy length is 280 * almost certain to be short, so the branches have different 281 * probabilities. Since it is almost certain to be short, only do 282 * one COPY16() in the first call. Then, do two calls per loop since 283 * at that point it is more likely to have a high trip count. 284 */ 285#ifdef __aarch64__ 286 do { 287 COPY16(op, ip); 288 } 289 while (op < oend); 290#else 291 ZSTD_copy16(op, ip); 292 if (16 >= length) return; 293 op += 16; 294 ip += 16; 295 do { 296 COPY16(op, ip); 297 COPY16(op, ip); 298 } 299 while (op < oend); 300#endif 301 } 302} 303 304MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) 305{ 306 size_t const length = MIN(dstCapacity, srcSize); 307 if (length > 0) { 308 ZSTD_memcpy(dst, src, length); 309 } 310 return length; 311} 312 313/* define "workspace is too large" as this number of times larger than needed */ 314#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 315 316/* when workspace is continuously too large 317 * during at least this number of times, 318 * context's memory usage is considered wasteful, 319 * because it's sized to handle a worst case scenario which rarely happens. 320 * In which case, resize it down to free some memory */ 321#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 322 323/* Controls whether the input/output buffer is buffered or stable. */ 324typedef enum { 325 ZSTD_bm_buffered = 0, /* Buffer the input/output */ 326 ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */ 327} ZSTD_bufferMode_e; 328 329 330/*-******************************************* 331* Private declarations 332*********************************************/ 333typedef struct seqDef_s { 334 U32 offset; /* Offset code of the sequence */ 335 U16 litLength; 336 U16 matchLength; 337} seqDef; 338 339typedef struct { 340 seqDef* sequencesStart; 341 seqDef* sequences; /* ptr to end of sequences */ 342 BYTE* litStart; 343 BYTE* lit; /* ptr to end of literals */ 344 BYTE* llCode; 345 BYTE* mlCode; 346 BYTE* ofCode; 347 size_t maxNbSeq; 348 size_t maxNbLit; 349 350 /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength 351 * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment 352 * the existing value of the litLength or matchLength by 0x10000. 353 */ 354 U32 longLengthID; /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */ 355 U32 longLengthPos; /* Index of the sequence to apply long length modification to */ 356} seqStore_t; 357 358typedef struct { 359 U32 litLength; 360 U32 matchLength; 361} ZSTD_sequenceLength; 362 363/* 364 * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences 365 * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. 366 */ 367MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) 368{ 369 ZSTD_sequenceLength seqLen; 370 seqLen.litLength = seq->litLength; 371 seqLen.matchLength = seq->matchLength + MINMATCH; 372 if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { 373 if (seqStore->longLengthID == 1) { 374 seqLen.litLength += 0xFFFF; 375 } 376 if (seqStore->longLengthID == 2) { 377 seqLen.matchLength += 0xFFFF; 378 } 379 } 380 return seqLen; 381} 382 383/* 384 * Contains the compressed frame size and an upper-bound for the decompressed frame size. 385 * Note: before using `compressedSize`, check for errors using ZSTD_isError(). 386 * similarly, before using `decompressedBound`, check for errors using: 387 * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` 388 */ 389typedef struct { 390 size_t compressedSize; 391 unsigned long long decompressedBound; 392} ZSTD_frameSizeInfo; /* decompress & legacy */ 393 394const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ 395void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ 396 397/* custom memory allocation functions */ 398void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem); 399void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem); 400void ZSTD_customFree(void* ptr, ZSTD_customMem customMem); 401 402 403MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ 404{ 405 assert(val != 0); 406 { 407# if (__GNUC__ >= 3) /* GCC Intrinsic */ 408 return __builtin_clz (val) ^ 31; 409# else /* Software version */ 410 static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; 411 U32 v = val; 412 v |= v >> 1; 413 v |= v >> 2; 414 v |= v >> 4; 415 v |= v >> 8; 416 v |= v >> 16; 417 return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; 418# endif 419 } 420} 421 422 423/* ZSTD_invalidateRepCodes() : 424 * ensures next compression will not use repcodes from previous block. 425 * Note : only works with regular variant; 426 * do not use with extDict variant ! */ 427void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ 428 429 430typedef struct { 431 blockType_e blockType; 432 U32 lastBlock; 433 U32 origSize; 434} blockProperties_t; /* declared here for decompress and fullbench */ 435 436/*! ZSTD_getcBlockSize() : 437 * Provides the size of compressed block from block header `src` */ 438/* Used by: decompress, fullbench (does not get its definition from here) */ 439size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, 440 blockProperties_t* bpPtr); 441 442/*! ZSTD_decodeSeqHeaders() : 443 * decode sequence header from src */ 444/* Used by: decompress, fullbench (does not get its definition from here) */ 445size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, 446 const void* src, size_t srcSize); 447 448 449 450#endif /* ZSTD_CCOMMON_H_MODULE */