huf.h (20231B)
1/* ****************************************************************** 2 * huff0 huffman codec, 3 * part of Finite State Entropy library 4 * Copyright (c) Yann Collet, Facebook, Inc. 5 * 6 * You can contact the author at : 7 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 8 * 9 * This source code is licensed under both the BSD-style license (found in the 10 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11 * in the COPYING file in the root directory of this source tree). 12 * You may select, at your option, one of the above-listed licenses. 13****************************************************************** */ 14 15 16#ifndef HUF_H_298734234 17#define HUF_H_298734234 18 19/* *** Dependencies *** */ 20#include "zstd_deps.h" /* size_t */ 21 22 23/* *** library symbols visibility *** */ 24/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, 25 * HUF symbols remain "private" (internal symbols for library only). 26 * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ 27#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) 28# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) 29#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ 30# define HUF_PUBLIC_API __declspec(dllexport) 31#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) 32# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ 33#else 34# define HUF_PUBLIC_API 35#endif 36 37 38/* ========================== */ 39/* *** simple functions *** */ 40/* ========================== */ 41 42/* HUF_compress() : 43 * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. 44 * 'dst' buffer must be already allocated. 45 * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). 46 * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. 47 * @return : size of compressed data (<= `dstCapacity`). 48 * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! 49 * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) 50 */ 51HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, 52 const void* src, size_t srcSize); 53 54/* HUF_decompress() : 55 * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', 56 * into already allocated buffer 'dst', of minimum size 'dstSize'. 57 * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. 58 * Note : in contrast with FSE, HUF_decompress can regenerate 59 * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, 60 * because it knows size to regenerate (originalSize). 61 * @return : size of regenerated data (== originalSize), 62 * or an error code, which can be tested using HUF_isError() 63 */ 64HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, 65 const void* cSrc, size_t cSrcSize); 66 67 68/* *** Tool functions *** */ 69#define HUF_BLOCKSIZE_MAX (128 * 1024) /*< maximum input size for a single block compressed with HUF_compress */ 70HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /*< maximum compressed size (worst case) */ 71 72/* Error Management */ 73HUF_PUBLIC_API unsigned HUF_isError(size_t code); /*< tells if a return value is an error code */ 74HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /*< provides error code string (useful for debugging) */ 75 76 77/* *** Advanced function *** */ 78 79/* HUF_compress2() : 80 * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. 81 * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . 82 * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ 83HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, 84 const void* src, size_t srcSize, 85 unsigned maxSymbolValue, unsigned tableLog); 86 87/* HUF_compress4X_wksp() : 88 * Same as HUF_compress2(), but uses externally allocated `workSpace`. 89 * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ 90#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) 91#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) 92HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, 93 const void* src, size_t srcSize, 94 unsigned maxSymbolValue, unsigned tableLog, 95 void* workSpace, size_t wkspSize); 96 97#endif /* HUF_H_298734234 */ 98 99/* ****************************************************************** 100 * WARNING !! 101 * The following section contains advanced and experimental definitions 102 * which shall never be used in the context of a dynamic library, 103 * because they are not guaranteed to remain stable in the future. 104 * Only consider them in association with static linking. 105 * *****************************************************************/ 106#if !defined(HUF_H_HUF_STATIC_LINKING_ONLY) 107#define HUF_H_HUF_STATIC_LINKING_ONLY 108 109/* *** Dependencies *** */ 110#include "mem.h" /* U32 */ 111#define FSE_STATIC_LINKING_ONLY 112#include "fse.h" 113 114 115/* *** Constants *** */ 116#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ 117#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ 118#define HUF_SYMBOLVALUE_MAX 255 119 120#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ 121#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) 122# error "HUF_TABLELOG_MAX is too large !" 123#endif 124 125 126/* **************************************** 127* Static allocation 128******************************************/ 129/* HUF buffer bounds */ 130#define HUF_CTABLEBOUND 129 131#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ 132#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ 133 134/* static allocation of HUF's Compression Table */ 135/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */ 136struct HUF_CElt_s { 137 U16 val; 138 BYTE nbBits; 139}; /* typedef'd to HUF_CElt */ 140typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */ 141#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ 142#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) 143#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ 144 HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */ 145 146/* static allocation of HUF's DTable */ 147typedef U32 HUF_DTable; 148#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) 149#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \ 150 HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } 151#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ 152 HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } 153 154 155/* **************************************** 156* Advanced decompression functions 157******************************************/ 158size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ 159#ifndef HUF_FORCE_DECOMPRESS_X1 160size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ 161#endif 162 163size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< decodes RLE and uncompressed */ 164size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< considers RLE and uncompressed as errors */ 165size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< considers RLE and uncompressed as errors */ 166size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ 167size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder */ 168#ifndef HUF_FORCE_DECOMPRESS_X1 169size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ 170size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder */ 171#endif 172 173 174/* **************************************** 175 * HUF detailed API 176 * ****************************************/ 177 178/*! HUF_compress() does the following: 179 * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") 180 * 2. (optional) refine tableLog using HUF_optimalTableLog() 181 * 3. build Huffman table from count using HUF_buildCTable() 182 * 4. save Huffman table to memory buffer using HUF_writeCTable() 183 * 5. encode the data stream using HUF_compress4X_usingCTable() 184 * 185 * The following API allows targeting specific sub-functions for advanced tasks. 186 * For example, it's possible to compress several blocks using the same 'CTable', 187 * or to save and regenerate 'CTable' using external methods. 188 */ 189unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); 190size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ 191size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); 192size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); 193size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); 194size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); 195int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); 196 197typedef enum { 198 HUF_repeat_none, /*< Cannot use the previous table */ 199 HUF_repeat_check, /*< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ 200 HUF_repeat_valid /*< Can use the previous table and it is assumed to be valid */ 201 } HUF_repeat; 202/* HUF_compress4X_repeat() : 203 * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. 204 * If it uses hufTable it does not modify hufTable or repeat. 205 * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. 206 * If preferRepeat then the old table will always be used if valid. */ 207size_t HUF_compress4X_repeat(void* dst, size_t dstSize, 208 const void* src, size_t srcSize, 209 unsigned maxSymbolValue, unsigned tableLog, 210 void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ 211 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); 212 213/* HUF_buildCTable_wksp() : 214 * Same as HUF_buildCTable(), but using externally allocated scratch buffer. 215 * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. 216 */ 217#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) 218#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) 219size_t HUF_buildCTable_wksp (HUF_CElt* tree, 220 const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, 221 void* workSpace, size_t wkspSize); 222 223/*! HUF_readStats() : 224 * Read compact Huffman tree, saved by HUF_writeCTable(). 225 * `huffWeight` is destination buffer. 226 * @return : size read from `src` , or an error Code . 227 * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ 228size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, 229 U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, 230 const void* src, size_t srcSize); 231 232/*! HUF_readStats_wksp() : 233 * Same as HUF_readStats() but takes an external workspace which must be 234 * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE. 235 * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. 236 */ 237#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1) 238#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned)) 239size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, 240 U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, 241 const void* src, size_t srcSize, 242 void* workspace, size_t wkspSize, 243 int bmi2); 244 245/* HUF_readCTable() : 246 * Loading a CTable saved with HUF_writeCTable() */ 247size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); 248 249/* HUF_getNbBits() : 250 * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX 251 * Note 1 : is not inlined, as HUF_CElt definition is private 252 * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ 253U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); 254 255/* 256 * HUF_decompress() does the following: 257 * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics 258 * 2. build Huffman table from save, using HUF_readDTableX?() 259 * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() 260 */ 261 262/* HUF_selectDecoder() : 263 * Tells which decoder is likely to decode faster, 264 * based on a set of pre-computed metrics. 265 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . 266 * Assumption : 0 < dstSize <= 128 KB */ 267U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); 268 269/* 270 * The minimum workspace size for the `workSpace` used in 271 * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp(). 272 * 273 * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when 274 * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. 275 * Buffer overflow errors may potentially occur if code modifications result in 276 * a required workspace size greater than that specified in the following 277 * macro. 278 */ 279#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9)) 280#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) 281 282#ifndef HUF_FORCE_DECOMPRESS_X2 283size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); 284size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); 285#endif 286#ifndef HUF_FORCE_DECOMPRESS_X1 287size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); 288size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); 289#endif 290 291size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); 292#ifndef HUF_FORCE_DECOMPRESS_X2 293size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); 294#endif 295#ifndef HUF_FORCE_DECOMPRESS_X1 296size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); 297#endif 298 299 300/* ====================== */ 301/* single stream variants */ 302/* ====================== */ 303 304size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); 305size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ 306size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); 307/* HUF_compress1X_repeat() : 308 * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. 309 * If it uses hufTable it does not modify hufTable or repeat. 310 * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. 311 * If preferRepeat then the old table will always be used if valid. */ 312size_t HUF_compress1X_repeat(void* dst, size_t dstSize, 313 const void* src, size_t srcSize, 314 unsigned maxSymbolValue, unsigned tableLog, 315 void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ 316 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); 317 318size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ 319#ifndef HUF_FORCE_DECOMPRESS_X1 320size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ 321#endif 322 323size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); 324size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); 325#ifndef HUF_FORCE_DECOMPRESS_X2 326size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ 327size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder */ 328#endif 329#ifndef HUF_FORCE_DECOMPRESS_X1 330size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ 331size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder */ 332#endif 333 334size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /*< automatic selection of sing or double symbol decoder, based on DTable */ 335#ifndef HUF_FORCE_DECOMPRESS_X2 336size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); 337#endif 338#ifndef HUF_FORCE_DECOMPRESS_X1 339size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); 340#endif 341 342/* BMI2 variants. 343 * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. 344 */ 345size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); 346#ifndef HUF_FORCE_DECOMPRESS_X2 347size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); 348#endif 349size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); 350size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); 351#ifndef HUF_FORCE_DECOMPRESS_X2 352size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); 353#endif 354 355#endif /* HUF_STATIC_LINKING_ONLY */ 356