cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

zstd_lazy.c (63555B)


      1/*
      2 * Copyright (c) Yann Collet, Facebook, Inc.
      3 * All rights reserved.
      4 *
      5 * This source code is licensed under both the BSD-style license (found in the
      6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
      7 * in the COPYING file in the root directory of this source tree).
      8 * You may select, at your option, one of the above-listed licenses.
      9 */
     10
     11#include "zstd_compress_internal.h"
     12#include "zstd_lazy.h"
     13
     14
     15/*-*************************************
     16*  Binary Tree search
     17***************************************/
     18
     19static void
     20ZSTD_updateDUBT(ZSTD_matchState_t* ms,
     21                const BYTE* ip, const BYTE* iend,
     22                U32 mls)
     23{
     24    const ZSTD_compressionParameters* const cParams = &ms->cParams;
     25    U32* const hashTable = ms->hashTable;
     26    U32  const hashLog = cParams->hashLog;
     27
     28    U32* const bt = ms->chainTable;
     29    U32  const btLog  = cParams->chainLog - 1;
     30    U32  const btMask = (1 << btLog) - 1;
     31
     32    const BYTE* const base = ms->window.base;
     33    U32 const target = (U32)(ip - base);
     34    U32 idx = ms->nextToUpdate;
     35
     36    if (idx != target)
     37        DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
     38                    idx, target, ms->window.dictLimit);
     39    assert(ip + 8 <= iend);   /* condition for ZSTD_hashPtr */
     40    (void)iend;
     41
     42    assert(idx >= ms->window.dictLimit);   /* condition for valid base+idx */
     43    for ( ; idx < target ; idx++) {
     44        size_t const h  = ZSTD_hashPtr(base + idx, hashLog, mls);   /* assumption : ip + 8 <= iend */
     45        U32    const matchIndex = hashTable[h];
     46
     47        U32*   const nextCandidatePtr = bt + 2*(idx&btMask);
     48        U32*   const sortMarkPtr  = nextCandidatePtr + 1;
     49
     50        DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
     51        hashTable[h] = idx;   /* Update Hash Table */
     52        *nextCandidatePtr = matchIndex;   /* update BT like a chain */
     53        *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
     54    }
     55    ms->nextToUpdate = target;
     56}
     57
     58
     59/* ZSTD_insertDUBT1() :
     60 *  sort one already inserted but unsorted position
     61 *  assumption : curr >= btlow == (curr - btmask)
     62 *  doesn't fail */
     63static void
     64ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
     65                 U32 curr, const BYTE* inputEnd,
     66                 U32 nbCompares, U32 btLow,
     67                 const ZSTD_dictMode_e dictMode)
     68{
     69    const ZSTD_compressionParameters* const cParams = &ms->cParams;
     70    U32* const bt = ms->chainTable;
     71    U32  const btLog  = cParams->chainLog - 1;
     72    U32  const btMask = (1 << btLog) - 1;
     73    size_t commonLengthSmaller=0, commonLengthLarger=0;
     74    const BYTE* const base = ms->window.base;
     75    const BYTE* const dictBase = ms->window.dictBase;
     76    const U32 dictLimit = ms->window.dictLimit;
     77    const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
     78    const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
     79    const BYTE* const dictEnd = dictBase + dictLimit;
     80    const BYTE* const prefixStart = base + dictLimit;
     81    const BYTE* match;
     82    U32* smallerPtr = bt + 2*(curr&btMask);
     83    U32* largerPtr  = smallerPtr + 1;
     84    U32 matchIndex = *smallerPtr;   /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
     85    U32 dummy32;   /* to be nullified at the end */
     86    U32 const windowValid = ms->window.lowLimit;
     87    U32 const maxDistance = 1U << cParams->windowLog;
     88    U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
     89
     90
     91    DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
     92                curr, dictLimit, windowLow);
     93    assert(curr >= btLow);
     94    assert(ip < iend);   /* condition for ZSTD_count */
     95
     96    for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
     97        U32* const nextPtr = bt + 2*(matchIndex & btMask);
     98        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
     99        assert(matchIndex < curr);
    100        /* note : all candidates are now supposed sorted,
    101         * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
    102         * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
    103
    104        if ( (dictMode != ZSTD_extDict)
    105          || (matchIndex+matchLength >= dictLimit)  /* both in current segment*/
    106          || (curr < dictLimit) /* both in extDict */) {
    107            const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
    108                                     || (matchIndex+matchLength >= dictLimit)) ?
    109                                        base : dictBase;
    110            assert( (matchIndex+matchLength >= dictLimit)   /* might be wrong if extDict is incorrectly set to 0 */
    111                 || (curr < dictLimit) );
    112            match = mBase + matchIndex;
    113            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
    114        } else {
    115            match = dictBase + matchIndex;
    116            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
    117            if (matchIndex+matchLength >= dictLimit)
    118                match = base + matchIndex;   /* preparation for next read of match[matchLength] */
    119        }
    120
    121        DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
    122                    curr, matchIndex, (U32)matchLength);
    123
    124        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
    125            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
    126        }
    127
    128        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
    129            /* match is smaller than current */
    130            *smallerPtr = matchIndex;             /* update smaller idx */
    131            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
    132            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
    133            DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
    134                        matchIndex, btLow, nextPtr[1]);
    135            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
    136            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
    137        } else {
    138            /* match is larger than current */
    139            *largerPtr = matchIndex;
    140            commonLengthLarger = matchLength;
    141            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
    142            DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
    143                        matchIndex, btLow, nextPtr[0]);
    144            largerPtr = nextPtr;
    145            matchIndex = nextPtr[0];
    146    }   }
    147
    148    *smallerPtr = *largerPtr = 0;
    149}
    150
    151
    152static size_t
    153ZSTD_DUBT_findBetterDictMatch (
    154        ZSTD_matchState_t* ms,
    155        const BYTE* const ip, const BYTE* const iend,
    156        size_t* offsetPtr,
    157        size_t bestLength,
    158        U32 nbCompares,
    159        U32 const mls,
    160        const ZSTD_dictMode_e dictMode)
    161{
    162    const ZSTD_matchState_t * const dms = ms->dictMatchState;
    163    const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
    164    const U32 * const dictHashTable = dms->hashTable;
    165    U32         const hashLog = dmsCParams->hashLog;
    166    size_t      const h  = ZSTD_hashPtr(ip, hashLog, mls);
    167    U32               dictMatchIndex = dictHashTable[h];
    168
    169    const BYTE* const base = ms->window.base;
    170    const BYTE* const prefixStart = base + ms->window.dictLimit;
    171    U32         const curr = (U32)(ip-base);
    172    const BYTE* const dictBase = dms->window.base;
    173    const BYTE* const dictEnd = dms->window.nextSrc;
    174    U32         const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
    175    U32         const dictLowLimit = dms->window.lowLimit;
    176    U32         const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
    177
    178    U32*        const dictBt = dms->chainTable;
    179    U32         const btLog  = dmsCParams->chainLog - 1;
    180    U32         const btMask = (1 << btLog) - 1;
    181    U32         const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
    182
    183    size_t commonLengthSmaller=0, commonLengthLarger=0;
    184
    185    (void)dictMode;
    186    assert(dictMode == ZSTD_dictMatchState);
    187
    188    for (; nbCompares && (dictMatchIndex > dictLowLimit); --nbCompares) {
    189        U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
    190        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
    191        const BYTE* match = dictBase + dictMatchIndex;
    192        matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
    193        if (dictMatchIndex+matchLength >= dictHighLimit)
    194            match = base + dictMatchIndex + dictIndexDelta;   /* to prepare for next usage of match[matchLength] */
    195
    196        if (matchLength > bestLength) {
    197            U32 matchIndex = dictMatchIndex + dictIndexDelta;
    198            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
    199                DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
    200                    curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
    201                bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
    202            }
    203            if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
    204                break;   /* drop, to guarantee consistency (miss a little bit of compression) */
    205            }
    206        }
    207
    208        if (match[matchLength] < ip[matchLength]) {
    209            if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
    210            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
    211            dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
    212        } else {
    213            /* match is larger than current */
    214            if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
    215            commonLengthLarger = matchLength;
    216            dictMatchIndex = nextPtr[0];
    217        }
    218    }
    219
    220    if (bestLength >= MINMATCH) {
    221        U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
    222        DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
    223                    curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
    224    }
    225    return bestLength;
    226
    227}
    228
    229
    230static size_t
    231ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
    232                        const BYTE* const ip, const BYTE* const iend,
    233                        size_t* offsetPtr,
    234                        U32 const mls,
    235                        const ZSTD_dictMode_e dictMode)
    236{
    237    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    238    U32*   const hashTable = ms->hashTable;
    239    U32    const hashLog = cParams->hashLog;
    240    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
    241    U32          matchIndex  = hashTable[h];
    242
    243    const BYTE* const base = ms->window.base;
    244    U32    const curr = (U32)(ip-base);
    245    U32    const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
    246
    247    U32*   const bt = ms->chainTable;
    248    U32    const btLog  = cParams->chainLog - 1;
    249    U32    const btMask = (1 << btLog) - 1;
    250    U32    const btLow = (btMask >= curr) ? 0 : curr - btMask;
    251    U32    const unsortLimit = MAX(btLow, windowLow);
    252
    253    U32*         nextCandidate = bt + 2*(matchIndex&btMask);
    254    U32*         unsortedMark = bt + 2*(matchIndex&btMask) + 1;
    255    U32          nbCompares = 1U << cParams->searchLog;
    256    U32          nbCandidates = nbCompares;
    257    U32          previousCandidate = 0;
    258
    259    DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
    260    assert(ip <= iend-8);   /* required for h calculation */
    261    assert(dictMode != ZSTD_dedicatedDictSearch);
    262
    263    /* reach end of unsorted candidates list */
    264    while ( (matchIndex > unsortLimit)
    265         && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
    266         && (nbCandidates > 1) ) {
    267        DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
    268                    matchIndex);
    269        *unsortedMark = previousCandidate;  /* the unsortedMark becomes a reversed chain, to move up back to original position */
    270        previousCandidate = matchIndex;
    271        matchIndex = *nextCandidate;
    272        nextCandidate = bt + 2*(matchIndex&btMask);
    273        unsortedMark = bt + 2*(matchIndex&btMask) + 1;
    274        nbCandidates --;
    275    }
    276
    277    /* nullify last candidate if it's still unsorted
    278     * simplification, detrimental to compression ratio, beneficial for speed */
    279    if ( (matchIndex > unsortLimit)
    280      && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
    281        DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
    282                    matchIndex);
    283        *nextCandidate = *unsortedMark = 0;
    284    }
    285
    286    /* batch sort stacked candidates */
    287    matchIndex = previousCandidate;
    288    while (matchIndex) {  /* will end on matchIndex == 0 */
    289        U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
    290        U32 const nextCandidateIdx = *nextCandidateIdxPtr;
    291        ZSTD_insertDUBT1(ms, matchIndex, iend,
    292                         nbCandidates, unsortLimit, dictMode);
    293        matchIndex = nextCandidateIdx;
    294        nbCandidates++;
    295    }
    296
    297    /* find longest match */
    298    {   size_t commonLengthSmaller = 0, commonLengthLarger = 0;
    299        const BYTE* const dictBase = ms->window.dictBase;
    300        const U32 dictLimit = ms->window.dictLimit;
    301        const BYTE* const dictEnd = dictBase + dictLimit;
    302        const BYTE* const prefixStart = base + dictLimit;
    303        U32* smallerPtr = bt + 2*(curr&btMask);
    304        U32* largerPtr  = bt + 2*(curr&btMask) + 1;
    305        U32 matchEndIdx = curr + 8 + 1;
    306        U32 dummy32;   /* to be nullified at the end */
    307        size_t bestLength = 0;
    308
    309        matchIndex  = hashTable[h];
    310        hashTable[h] = curr;   /* Update Hash Table */
    311
    312        for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
    313            U32* const nextPtr = bt + 2*(matchIndex & btMask);
    314            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
    315            const BYTE* match;
    316
    317            if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
    318                match = base + matchIndex;
    319                matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
    320            } else {
    321                match = dictBase + matchIndex;
    322                matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
    323                if (matchIndex+matchLength >= dictLimit)
    324                    match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
    325            }
    326
    327            if (matchLength > bestLength) {
    328                if (matchLength > matchEndIdx - matchIndex)
    329                    matchEndIdx = matchIndex + (U32)matchLength;
    330                if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
    331                    bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
    332                if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
    333                    if (dictMode == ZSTD_dictMatchState) {
    334                        nbCompares = 0; /* in addition to avoiding checking any
    335                                         * further in this loop, make sure we
    336                                         * skip checking in the dictionary. */
    337                    }
    338                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
    339                }
    340            }
    341
    342            if (match[matchLength] < ip[matchLength]) {
    343                /* match is smaller than current */
    344                *smallerPtr = matchIndex;             /* update smaller idx */
    345                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
    346                if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
    347                smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
    348                matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
    349            } else {
    350                /* match is larger than current */
    351                *largerPtr = matchIndex;
    352                commonLengthLarger = matchLength;
    353                if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
    354                largerPtr = nextPtr;
    355                matchIndex = nextPtr[0];
    356        }   }
    357
    358        *smallerPtr = *largerPtr = 0;
    359
    360        assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
    361        if (dictMode == ZSTD_dictMatchState && nbCompares) {
    362            bestLength = ZSTD_DUBT_findBetterDictMatch(
    363                    ms, ip, iend,
    364                    offsetPtr, bestLength, nbCompares,
    365                    mls, dictMode);
    366        }
    367
    368        assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
    369        ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
    370        if (bestLength >= MINMATCH) {
    371            U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
    372            DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
    373                        curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
    374        }
    375        return bestLength;
    376    }
    377}
    378
    379
    380/* ZSTD_BtFindBestMatch() : Tree updater, providing best match */
    381FORCE_INLINE_TEMPLATE size_t
    382ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
    383                const BYTE* const ip, const BYTE* const iLimit,
    384                      size_t* offsetPtr,
    385                const U32 mls /* template */,
    386                const ZSTD_dictMode_e dictMode)
    387{
    388    DEBUGLOG(7, "ZSTD_BtFindBestMatch");
    389    if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
    390    ZSTD_updateDUBT(ms, ip, iLimit, mls);
    391    return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
    392}
    393
    394
    395static size_t
    396ZSTD_BtFindBestMatch_selectMLS (  ZSTD_matchState_t* ms,
    397                            const BYTE* ip, const BYTE* const iLimit,
    398                                  size_t* offsetPtr)
    399{
    400    switch(ms->cParams.minMatch)
    401    {
    402    default : /* includes case 3 */
    403    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
    404    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
    405    case 7 :
    406    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
    407    }
    408}
    409
    410
    411static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
    412                        ZSTD_matchState_t* ms,
    413                        const BYTE* ip, const BYTE* const iLimit,
    414                        size_t* offsetPtr)
    415{
    416    switch(ms->cParams.minMatch)
    417    {
    418    default : /* includes case 3 */
    419    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
    420    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
    421    case 7 :
    422    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
    423    }
    424}
    425
    426
    427static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
    428                        ZSTD_matchState_t* ms,
    429                        const BYTE* ip, const BYTE* const iLimit,
    430                        size_t* offsetPtr)
    431{
    432    switch(ms->cParams.minMatch)
    433    {
    434    default : /* includes case 3 */
    435    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
    436    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
    437    case 7 :
    438    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
    439    }
    440}
    441
    442
    443
    444/* *********************************
    445*  Hash Chain
    446***********************************/
    447#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
    448
    449/* Update chains up to ip (excluded)
    450   Assumption : always within prefix (i.e. not within extDict) */
    451FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
    452                        ZSTD_matchState_t* ms,
    453                        const ZSTD_compressionParameters* const cParams,
    454                        const BYTE* ip, U32 const mls)
    455{
    456    U32* const hashTable  = ms->hashTable;
    457    const U32 hashLog = cParams->hashLog;
    458    U32* const chainTable = ms->chainTable;
    459    const U32 chainMask = (1 << cParams->chainLog) - 1;
    460    const BYTE* const base = ms->window.base;
    461    const U32 target = (U32)(ip - base);
    462    U32 idx = ms->nextToUpdate;
    463
    464    while(idx < target) { /* catch up */
    465        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
    466        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
    467        hashTable[h] = idx;
    468        idx++;
    469    }
    470
    471    ms->nextToUpdate = target;
    472    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
    473}
    474
    475U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
    476    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    477    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
    478}
    479
    480void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
    481{
    482    const BYTE* const base = ms->window.base;
    483    U32 const target = (U32)(ip - base);
    484    U32* const hashTable = ms->hashTable;
    485    U32* const chainTable = ms->chainTable;
    486    U32 const chainSize = 1 << ms->cParams.chainLog;
    487    U32 idx = ms->nextToUpdate;
    488    U32 const minChain = chainSize < target ? target - chainSize : idx;
    489    U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
    490    U32 const cacheSize = bucketSize - 1;
    491    U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
    492    U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
    493
    494    /* We know the hashtable is oversized by a factor of `bucketSize`.
    495     * We are going to temporarily pretend `bucketSize == 1`, keeping only a
    496     * single entry. We will use the rest of the space to construct a temporary
    497     * chaintable.
    498     */
    499    U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
    500    U32* const tmpHashTable = hashTable;
    501    U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
    502    U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
    503    U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
    504
    505    U32 hashIdx;
    506
    507    assert(ms->cParams.chainLog <= 24);
    508    assert(ms->cParams.hashLog >= ms->cParams.chainLog);
    509    assert(idx != 0);
    510    assert(tmpMinChain <= minChain);
    511
    512    /* fill conventional hash table and conventional chain table */
    513    for ( ; idx < target; idx++) {
    514        U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
    515        if (idx >= tmpMinChain) {
    516            tmpChainTable[idx - tmpMinChain] = hashTable[h];
    517        }
    518        tmpHashTable[h] = idx;
    519    }
    520
    521    /* sort chains into ddss chain table */
    522    {
    523        U32 chainPos = 0;
    524        for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
    525            U32 count;
    526            U32 countBeyondMinChain = 0;
    527            U32 i = tmpHashTable[hashIdx];
    528            for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
    529                /* skip through the chain to the first position that won't be
    530                 * in the hash cache bucket */
    531                if (i < minChain) {
    532                    countBeyondMinChain++;
    533                }
    534                i = tmpChainTable[i - tmpMinChain];
    535            }
    536            if (count == cacheSize) {
    537                for (count = 0; count < chainLimit;) {
    538                    if (i < minChain) {
    539                        if (!i || countBeyondMinChain++ > cacheSize) {
    540                            /* only allow pulling `cacheSize` number of entries
    541                             * into the cache or chainTable beyond `minChain`,
    542                             * to replace the entries pulled out of the
    543                             * chainTable into the cache. This lets us reach
    544                             * back further without increasing the total number
    545                             * of entries in the chainTable, guaranteeing the
    546                             * DDSS chain table will fit into the space
    547                             * allocated for the regular one. */
    548                            break;
    549                        }
    550                    }
    551                    chainTable[chainPos++] = i;
    552                    count++;
    553                    if (i < tmpMinChain) {
    554                        break;
    555                    }
    556                    i = tmpChainTable[i - tmpMinChain];
    557                }
    558            } else {
    559                count = 0;
    560            }
    561            if (count) {
    562                tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
    563            } else {
    564                tmpHashTable[hashIdx] = 0;
    565            }
    566        }
    567        assert(chainPos <= chainSize); /* I believe this is guaranteed... */
    568    }
    569
    570    /* move chain pointers into the last entry of each hash bucket */
    571    for (hashIdx = (1 << hashLog); hashIdx; ) {
    572        U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
    573        U32 const chainPackedPointer = tmpHashTable[hashIdx];
    574        U32 i;
    575        for (i = 0; i < cacheSize; i++) {
    576            hashTable[bucketIdx + i] = 0;
    577        }
    578        hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
    579    }
    580
    581    /* fill the buckets of the hash table */
    582    for (idx = ms->nextToUpdate; idx < target; idx++) {
    583        U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
    584                   << ZSTD_LAZY_DDSS_BUCKET_LOG;
    585        U32 i;
    586        /* Shift hash cache down 1. */
    587        for (i = cacheSize - 1; i; i--)
    588            hashTable[h + i] = hashTable[h + i - 1];
    589        hashTable[h] = idx;
    590    }
    591
    592    ms->nextToUpdate = target;
    593}
    594
    595
    596/* inlining is important to hardwire a hot branch (template emulation) */
    597FORCE_INLINE_TEMPLATE
    598size_t ZSTD_HcFindBestMatch_generic (
    599                        ZSTD_matchState_t* ms,
    600                        const BYTE* const ip, const BYTE* const iLimit,
    601                        size_t* offsetPtr,
    602                        const U32 mls, const ZSTD_dictMode_e dictMode)
    603{
    604    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    605    U32* const chainTable = ms->chainTable;
    606    const U32 chainSize = (1 << cParams->chainLog);
    607    const U32 chainMask = chainSize-1;
    608    const BYTE* const base = ms->window.base;
    609    const BYTE* const dictBase = ms->window.dictBase;
    610    const U32 dictLimit = ms->window.dictLimit;
    611    const BYTE* const prefixStart = base + dictLimit;
    612    const BYTE* const dictEnd = dictBase + dictLimit;
    613    const U32 curr = (U32)(ip-base);
    614    const U32 maxDistance = 1U << cParams->windowLog;
    615    const U32 lowestValid = ms->window.lowLimit;
    616    const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
    617    const U32 isDictionary = (ms->loadedDictEnd != 0);
    618    const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
    619    const U32 minChain = curr > chainSize ? curr - chainSize : 0;
    620    U32 nbAttempts = 1U << cParams->searchLog;
    621    size_t ml=4-1;
    622
    623    const ZSTD_matchState_t* const dms = ms->dictMatchState;
    624    const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
    625                         ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
    626    const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
    627                        ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
    628
    629    U32 matchIndex;
    630
    631    if (dictMode == ZSTD_dedicatedDictSearch) {
    632        const U32* entry = &dms->hashTable[ddsIdx];
    633        PREFETCH_L1(entry);
    634    }
    635
    636    /* HC4 match finder */
    637    matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
    638
    639    for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
    640        size_t currentMl=0;
    641        if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
    642            const BYTE* const match = base + matchIndex;
    643            assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
    644            if (match[ml] == ip[ml])   /* potentially better */
    645                currentMl = ZSTD_count(ip, match, iLimit);
    646        } else {
    647            const BYTE* const match = dictBase + matchIndex;
    648            assert(match+4 <= dictEnd);
    649            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
    650                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
    651        }
    652
    653        /* save best solution */
    654        if (currentMl > ml) {
    655            ml = currentMl;
    656            *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
    657            if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
    658        }
    659
    660        if (matchIndex <= minChain) break;
    661        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
    662    }
    663
    664    assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
    665    if (dictMode == ZSTD_dedicatedDictSearch) {
    666        const U32 ddsLowestIndex  = dms->window.dictLimit;
    667        const BYTE* const ddsBase = dms->window.base;
    668        const BYTE* const ddsEnd  = dms->window.nextSrc;
    669        const U32 ddsSize         = (U32)(ddsEnd - ddsBase);
    670        const U32 ddsIndexDelta   = dictLimit - ddsSize;
    671        const U32 bucketSize      = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
    672        const U32 bucketLimit     = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
    673        U32 ddsAttempt;
    674
    675        for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
    676            PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
    677        }
    678
    679        {
    680            U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
    681            U32 const chainIndex = chainPackedPointer >> 8;
    682
    683            PREFETCH_L1(&dms->chainTable[chainIndex]);
    684        }
    685
    686        for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
    687            size_t currentMl=0;
    688            const BYTE* match;
    689            matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
    690            match = ddsBase + matchIndex;
    691
    692            if (!matchIndex) {
    693                return ml;
    694            }
    695
    696            /* guaranteed by table construction */
    697            (void)ddsLowestIndex;
    698            assert(matchIndex >= ddsLowestIndex);
    699            assert(match+4 <= ddsEnd);
    700            if (MEM_read32(match) == MEM_read32(ip)) {
    701                /* assumption : matchIndex <= dictLimit-4 (by table construction) */
    702                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
    703            }
    704
    705            /* save best solution */
    706            if (currentMl > ml) {
    707                ml = currentMl;
    708                *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
    709                if (ip+currentMl == iLimit) {
    710                    /* best possible, avoids read overflow on next attempt */
    711                    return ml;
    712                }
    713            }
    714        }
    715
    716        {
    717            U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
    718            U32 chainIndex = chainPackedPointer >> 8;
    719            U32 const chainLength = chainPackedPointer & 0xFF;
    720            U32 const chainAttempts = nbAttempts - ddsAttempt;
    721            U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
    722            U32 chainAttempt;
    723
    724            for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
    725                PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
    726            }
    727
    728            for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
    729                size_t currentMl=0;
    730                const BYTE* match;
    731                matchIndex = dms->chainTable[chainIndex];
    732                match = ddsBase + matchIndex;
    733
    734                /* guaranteed by table construction */
    735                assert(matchIndex >= ddsLowestIndex);
    736                assert(match+4 <= ddsEnd);
    737                if (MEM_read32(match) == MEM_read32(ip)) {
    738                    /* assumption : matchIndex <= dictLimit-4 (by table construction) */
    739                    currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
    740                }
    741
    742                /* save best solution */
    743                if (currentMl > ml) {
    744                    ml = currentMl;
    745                    *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
    746                    if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
    747                }
    748            }
    749        }
    750    } else if (dictMode == ZSTD_dictMatchState) {
    751        const U32* const dmsChainTable = dms->chainTable;
    752        const U32 dmsChainSize         = (1 << dms->cParams.chainLog);
    753        const U32 dmsChainMask         = dmsChainSize - 1;
    754        const U32 dmsLowestIndex       = dms->window.dictLimit;
    755        const BYTE* const dmsBase      = dms->window.base;
    756        const BYTE* const dmsEnd       = dms->window.nextSrc;
    757        const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
    758        const U32 dmsIndexDelta        = dictLimit - dmsSize;
    759        const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
    760
    761        matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
    762
    763        for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
    764            size_t currentMl=0;
    765            const BYTE* const match = dmsBase + matchIndex;
    766            assert(match+4 <= dmsEnd);
    767            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
    768                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
    769
    770            /* save best solution */
    771            if (currentMl > ml) {
    772                ml = currentMl;
    773                *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
    774                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
    775            }
    776
    777            if (matchIndex <= dmsMinChain) break;
    778
    779            matchIndex = dmsChainTable[matchIndex & dmsChainMask];
    780        }
    781    }
    782
    783    return ml;
    784}
    785
    786
    787FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
    788                        ZSTD_matchState_t* ms,
    789                        const BYTE* ip, const BYTE* const iLimit,
    790                        size_t* offsetPtr)
    791{
    792    switch(ms->cParams.minMatch)
    793    {
    794    default : /* includes case 3 */
    795    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
    796    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
    797    case 7 :
    798    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
    799    }
    800}
    801
    802
    803static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
    804                        ZSTD_matchState_t* ms,
    805                        const BYTE* ip, const BYTE* const iLimit,
    806                        size_t* offsetPtr)
    807{
    808    switch(ms->cParams.minMatch)
    809    {
    810    default : /* includes case 3 */
    811    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
    812    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
    813    case 7 :
    814    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
    815    }
    816}
    817
    818
    819static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
    820                        ZSTD_matchState_t* ms,
    821                        const BYTE* ip, const BYTE* const iLimit,
    822                        size_t* offsetPtr)
    823{
    824    switch(ms->cParams.minMatch)
    825    {
    826    default : /* includes case 3 */
    827    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
    828    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
    829    case 7 :
    830    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
    831    }
    832}
    833
    834
    835FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
    836                        ZSTD_matchState_t* ms,
    837                        const BYTE* ip, const BYTE* const iLimit,
    838                        size_t* offsetPtr)
    839{
    840    switch(ms->cParams.minMatch)
    841    {
    842    default : /* includes case 3 */
    843    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
    844    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
    845    case 7 :
    846    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
    847    }
    848}
    849
    850
    851/* *******************************
    852*  Common parser - lazy strategy
    853*********************************/
    854typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
    855
    856FORCE_INLINE_TEMPLATE size_t
    857ZSTD_compressBlock_lazy_generic(
    858                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
    859                        U32 rep[ZSTD_REP_NUM],
    860                        const void* src, size_t srcSize,
    861                        const searchMethod_e searchMethod, const U32 depth,
    862                        ZSTD_dictMode_e const dictMode)
    863{
    864    const BYTE* const istart = (const BYTE*)src;
    865    const BYTE* ip = istart;
    866    const BYTE* anchor = istart;
    867    const BYTE* const iend = istart + srcSize;
    868    const BYTE* const ilimit = iend - 8;
    869    const BYTE* const base = ms->window.base;
    870    const U32 prefixLowestIndex = ms->window.dictLimit;
    871    const BYTE* const prefixLowest = base + prefixLowestIndex;
    872
    873    typedef size_t (*searchMax_f)(
    874                        ZSTD_matchState_t* ms,
    875                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
    876
    877    /*
    878     * This table is indexed first by the four ZSTD_dictMode_e values, and then
    879     * by the two searchMethod_e values. NULLs are placed for configurations
    880     * that should never occur (extDict modes go to the other implementation
    881     * below and there is no DDSS for binary tree search yet).
    882     */
    883    const searchMax_f searchFuncs[4][2] = {
    884        {
    885            ZSTD_HcFindBestMatch_selectMLS,
    886            ZSTD_BtFindBestMatch_selectMLS
    887        },
    888        {
    889            NULL,
    890            NULL
    891        },
    892        {
    893            ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
    894            ZSTD_BtFindBestMatch_dictMatchState_selectMLS
    895        },
    896        {
    897            ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
    898            NULL
    899        }
    900    };
    901
    902    searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
    903    U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
    904
    905    const int isDMS = dictMode == ZSTD_dictMatchState;
    906    const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
    907    const int isDxS = isDMS || isDDS;
    908    const ZSTD_matchState_t* const dms = ms->dictMatchState;
    909    const U32 dictLowestIndex      = isDxS ? dms->window.dictLimit : 0;
    910    const BYTE* const dictBase     = isDxS ? dms->window.base : NULL;
    911    const BYTE* const dictLowest   = isDxS ? dictBase + dictLowestIndex : NULL;
    912    const BYTE* const dictEnd      = isDxS ? dms->window.nextSrc : NULL;
    913    const U32 dictIndexDelta       = isDxS ?
    914                                     prefixLowestIndex - (U32)(dictEnd - dictBase) :
    915                                     0;
    916    const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
    917
    918    assert(searchMax != NULL);
    919
    920    DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
    921
    922    /* init */
    923    ip += (dictAndPrefixLength == 0);
    924    if (dictMode == ZSTD_noDict) {
    925        U32 const curr = (U32)(ip - base);
    926        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
    927        U32 const maxRep = curr - windowLow;
    928        if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
    929        if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
    930    }
    931    if (isDxS) {
    932        /* dictMatchState repCode checks don't currently handle repCode == 0
    933         * disabling. */
    934        assert(offset_1 <= dictAndPrefixLength);
    935        assert(offset_2 <= dictAndPrefixLength);
    936    }
    937
    938    /* Match Loop */
    939#if defined(__x86_64__)
    940    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
    941     * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
    942     */
    943    __asm__(".p2align 5");
    944#endif
    945    while (ip < ilimit) {
    946        size_t matchLength=0;
    947        size_t offset=0;
    948        const BYTE* start=ip+1;
    949
    950        /* check repCode */
    951        if (isDxS) {
    952            const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
    953            const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
    954                                && repIndex < prefixLowestIndex) ?
    955                                   dictBase + (repIndex - dictIndexDelta) :
    956                                   base + repIndex;
    957            if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
    958                && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
    959                const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
    960                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
    961                if (depth==0) goto _storeSequence;
    962            }
    963        }
    964        if ( dictMode == ZSTD_noDict
    965          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
    966            matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
    967            if (depth==0) goto _storeSequence;
    968        }
    969
    970        /* first search (depth 0) */
    971        {   size_t offsetFound = 999999999;
    972            size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
    973            if (ml2 > matchLength)
    974                matchLength = ml2, start = ip, offset=offsetFound;
    975        }
    976
    977        if (matchLength < 4) {
    978            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
    979            continue;
    980        }
    981
    982        /* let's try to find a better solution */
    983        if (depth>=1)
    984        while (ip<ilimit) {
    985            ip ++;
    986            if ( (dictMode == ZSTD_noDict)
    987              && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
    988                size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
    989                int const gain2 = (int)(mlRep * 3);
    990                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
    991                if ((mlRep >= 4) && (gain2 > gain1))
    992                    matchLength = mlRep, offset = 0, start = ip;
    993            }
    994            if (isDxS) {
    995                const U32 repIndex = (U32)(ip - base) - offset_1;
    996                const BYTE* repMatch = repIndex < prefixLowestIndex ?
    997                               dictBase + (repIndex - dictIndexDelta) :
    998                               base + repIndex;
    999                if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
   1000                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
   1001                    const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
   1002                    size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
   1003                    int const gain2 = (int)(mlRep * 3);
   1004                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
   1005                    if ((mlRep >= 4) && (gain2 > gain1))
   1006                        matchLength = mlRep, offset = 0, start = ip;
   1007                }
   1008            }
   1009            {   size_t offset2=999999999;
   1010                size_t const ml2 = searchMax(ms, ip, iend, &offset2);
   1011                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
   1012                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
   1013                if ((ml2 >= 4) && (gain2 > gain1)) {
   1014                    matchLength = ml2, offset = offset2, start = ip;
   1015                    continue;   /* search a better one */
   1016            }   }
   1017
   1018            /* let's find an even better one */
   1019            if ((depth==2) && (ip<ilimit)) {
   1020                ip ++;
   1021                if ( (dictMode == ZSTD_noDict)
   1022                  && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
   1023                    size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
   1024                    int const gain2 = (int)(mlRep * 4);
   1025                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
   1026                    if ((mlRep >= 4) && (gain2 > gain1))
   1027                        matchLength = mlRep, offset = 0, start = ip;
   1028                }
   1029                if (isDxS) {
   1030                    const U32 repIndex = (U32)(ip - base) - offset_1;
   1031                    const BYTE* repMatch = repIndex < prefixLowestIndex ?
   1032                                   dictBase + (repIndex - dictIndexDelta) :
   1033                                   base + repIndex;
   1034                    if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
   1035                        && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
   1036                        const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
   1037                        size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
   1038                        int const gain2 = (int)(mlRep * 4);
   1039                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
   1040                        if ((mlRep >= 4) && (gain2 > gain1))
   1041                            matchLength = mlRep, offset = 0, start = ip;
   1042                    }
   1043                }
   1044                {   size_t offset2=999999999;
   1045                    size_t const ml2 = searchMax(ms, ip, iend, &offset2);
   1046                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
   1047                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
   1048                    if ((ml2 >= 4) && (gain2 > gain1)) {
   1049                        matchLength = ml2, offset = offset2, start = ip;
   1050                        continue;
   1051            }   }   }
   1052            break;  /* nothing found : store previous solution */
   1053        }
   1054
   1055        /* NOTE:
   1056         * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
   1057         * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
   1058         * overflows the pointer, which is undefined behavior.
   1059         */
   1060        /* catch up */
   1061        if (offset) {
   1062            if (dictMode == ZSTD_noDict) {
   1063                while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
   1064                     && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )  /* only search for offset within prefix */
   1065                    { start--; matchLength++; }
   1066            }
   1067            if (isDxS) {
   1068                U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
   1069                const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
   1070                const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
   1071                while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
   1072            }
   1073            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
   1074        }
   1075        /* store sequence */
   1076_storeSequence:
   1077        {   size_t const litLength = start - anchor;
   1078            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
   1079            anchor = ip = start + matchLength;
   1080        }
   1081
   1082        /* check immediate repcode */
   1083        if (isDxS) {
   1084            while (ip <= ilimit) {
   1085                U32 const current2 = (U32)(ip-base);
   1086                U32 const repIndex = current2 - offset_2;
   1087                const BYTE* repMatch = repIndex < prefixLowestIndex ?
   1088                        dictBase - dictIndexDelta + repIndex :
   1089                        base + repIndex;
   1090                if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
   1091                   && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
   1092                    const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
   1093                    matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
   1094                    offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
   1095                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
   1096                    ip += matchLength;
   1097                    anchor = ip;
   1098                    continue;
   1099                }
   1100                break;
   1101            }
   1102        }
   1103
   1104        if (dictMode == ZSTD_noDict) {
   1105            while ( ((ip <= ilimit) & (offset_2>0))
   1106                 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
   1107                /* store sequence */
   1108                matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
   1109                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
   1110                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
   1111                ip += matchLength;
   1112                anchor = ip;
   1113                continue;   /* faster when present ... (?) */
   1114    }   }   }
   1115
   1116    /* Save reps for next block */
   1117    rep[0] = offset_1 ? offset_1 : savedOffset;
   1118    rep[1] = offset_2 ? offset_2 : savedOffset;
   1119
   1120    /* Return the last literals size */
   1121    return (size_t)(iend - anchor);
   1122}
   1123
   1124
   1125size_t ZSTD_compressBlock_btlazy2(
   1126        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1127        void const* src, size_t srcSize)
   1128{
   1129    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
   1130}
   1131
   1132size_t ZSTD_compressBlock_lazy2(
   1133        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1134        void const* src, size_t srcSize)
   1135{
   1136    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
   1137}
   1138
   1139size_t ZSTD_compressBlock_lazy(
   1140        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1141        void const* src, size_t srcSize)
   1142{
   1143    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
   1144}
   1145
   1146size_t ZSTD_compressBlock_greedy(
   1147        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1148        void const* src, size_t srcSize)
   1149{
   1150    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
   1151}
   1152
   1153size_t ZSTD_compressBlock_btlazy2_dictMatchState(
   1154        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1155        void const* src, size_t srcSize)
   1156{
   1157    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
   1158}
   1159
   1160size_t ZSTD_compressBlock_lazy2_dictMatchState(
   1161        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1162        void const* src, size_t srcSize)
   1163{
   1164    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
   1165}
   1166
   1167size_t ZSTD_compressBlock_lazy_dictMatchState(
   1168        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1169        void const* src, size_t srcSize)
   1170{
   1171    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
   1172}
   1173
   1174size_t ZSTD_compressBlock_greedy_dictMatchState(
   1175        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1176        void const* src, size_t srcSize)
   1177{
   1178    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
   1179}
   1180
   1181
   1182size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
   1183        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1184        void const* src, size_t srcSize)
   1185{
   1186    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
   1187}
   1188
   1189size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
   1190        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1191        void const* src, size_t srcSize)
   1192{
   1193    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
   1194}
   1195
   1196size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
   1197        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1198        void const* src, size_t srcSize)
   1199{
   1200    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
   1201}
   1202
   1203
   1204FORCE_INLINE_TEMPLATE
   1205size_t ZSTD_compressBlock_lazy_extDict_generic(
   1206                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
   1207                        U32 rep[ZSTD_REP_NUM],
   1208                        const void* src, size_t srcSize,
   1209                        const searchMethod_e searchMethod, const U32 depth)
   1210{
   1211    const BYTE* const istart = (const BYTE*)src;
   1212    const BYTE* ip = istart;
   1213    const BYTE* anchor = istart;
   1214    const BYTE* const iend = istart + srcSize;
   1215    const BYTE* const ilimit = iend - 8;
   1216    const BYTE* const base = ms->window.base;
   1217    const U32 dictLimit = ms->window.dictLimit;
   1218    const BYTE* const prefixStart = base + dictLimit;
   1219    const BYTE* const dictBase = ms->window.dictBase;
   1220    const BYTE* const dictEnd  = dictBase + dictLimit;
   1221    const BYTE* const dictStart  = dictBase + ms->window.lowLimit;
   1222    const U32 windowLog = ms->cParams.windowLog;
   1223
   1224    typedef size_t (*searchMax_f)(
   1225                        ZSTD_matchState_t* ms,
   1226                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
   1227    searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
   1228
   1229    U32 offset_1 = rep[0], offset_2 = rep[1];
   1230
   1231    DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
   1232
   1233    /* init */
   1234    ip += (ip == prefixStart);
   1235
   1236    /* Match Loop */
   1237#if defined(__x86_64__)
   1238    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
   1239     * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
   1240     */
   1241    __asm__(".p2align 5");
   1242#endif
   1243    while (ip < ilimit) {
   1244        size_t matchLength=0;
   1245        size_t offset=0;
   1246        const BYTE* start=ip+1;
   1247        U32 curr = (U32)(ip-base);
   1248
   1249        /* check repCode */
   1250        {   const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
   1251            const U32 repIndex = (U32)(curr+1 - offset_1);
   1252            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
   1253            const BYTE* const repMatch = repBase + repIndex;
   1254            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))   /* intentional overflow */
   1255            if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
   1256                /* repcode detected we should take it */
   1257                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
   1258                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
   1259                if (depth==0) goto _storeSequence;
   1260        }   }
   1261
   1262        /* first search (depth 0) */
   1263        {   size_t offsetFound = 999999999;
   1264            size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
   1265            if (ml2 > matchLength)
   1266                matchLength = ml2, start = ip, offset=offsetFound;
   1267        }
   1268
   1269        if (matchLength < 4) {
   1270            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
   1271            continue;
   1272        }
   1273
   1274        /* let's try to find a better solution */
   1275        if (depth>=1)
   1276        while (ip<ilimit) {
   1277            ip ++;
   1278            curr++;
   1279            /* check repCode */
   1280            if (offset) {
   1281                const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
   1282                const U32 repIndex = (U32)(curr - offset_1);
   1283                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
   1284                const BYTE* const repMatch = repBase + repIndex;
   1285                if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
   1286                if (MEM_read32(ip) == MEM_read32(repMatch)) {
   1287                    /* repcode detected */
   1288                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
   1289                    size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
   1290                    int const gain2 = (int)(repLength * 3);
   1291                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
   1292                    if ((repLength >= 4) && (gain2 > gain1))
   1293                        matchLength = repLength, offset = 0, start = ip;
   1294            }   }
   1295
   1296            /* search match, depth 1 */
   1297            {   size_t offset2=999999999;
   1298                size_t const ml2 = searchMax(ms, ip, iend, &offset2);
   1299                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
   1300                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
   1301                if ((ml2 >= 4) && (gain2 > gain1)) {
   1302                    matchLength = ml2, offset = offset2, start = ip;
   1303                    continue;   /* search a better one */
   1304            }   }
   1305
   1306            /* let's find an even better one */
   1307            if ((depth==2) && (ip<ilimit)) {
   1308                ip ++;
   1309                curr++;
   1310                /* check repCode */
   1311                if (offset) {
   1312                    const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
   1313                    const U32 repIndex = (U32)(curr - offset_1);
   1314                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
   1315                    const BYTE* const repMatch = repBase + repIndex;
   1316                    if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
   1317                    if (MEM_read32(ip) == MEM_read32(repMatch)) {
   1318                        /* repcode detected */
   1319                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
   1320                        size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
   1321                        int const gain2 = (int)(repLength * 4);
   1322                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
   1323                        if ((repLength >= 4) && (gain2 > gain1))
   1324                            matchLength = repLength, offset = 0, start = ip;
   1325                }   }
   1326
   1327                /* search match, depth 2 */
   1328                {   size_t offset2=999999999;
   1329                    size_t const ml2 = searchMax(ms, ip, iend, &offset2);
   1330                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
   1331                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
   1332                    if ((ml2 >= 4) && (gain2 > gain1)) {
   1333                        matchLength = ml2, offset = offset2, start = ip;
   1334                        continue;
   1335            }   }   }
   1336            break;  /* nothing found : store previous solution */
   1337        }
   1338
   1339        /* catch up */
   1340        if (offset) {
   1341            U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
   1342            const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
   1343            const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
   1344            while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
   1345            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
   1346        }
   1347
   1348        /* store sequence */
   1349_storeSequence:
   1350        {   size_t const litLength = start - anchor;
   1351            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
   1352            anchor = ip = start + matchLength;
   1353        }
   1354
   1355        /* check immediate repcode */
   1356        while (ip <= ilimit) {
   1357            const U32 repCurrent = (U32)(ip-base);
   1358            const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
   1359            const U32 repIndex = repCurrent - offset_2;
   1360            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
   1361            const BYTE* const repMatch = repBase + repIndex;
   1362            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
   1363            if (MEM_read32(ip) == MEM_read32(repMatch)) {
   1364                /* repcode detected we should take it */
   1365                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
   1366                matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
   1367                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
   1368                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
   1369                ip += matchLength;
   1370                anchor = ip;
   1371                continue;   /* faster when present ... (?) */
   1372            }
   1373            break;
   1374    }   }
   1375
   1376    /* Save reps for next block */
   1377    rep[0] = offset_1;
   1378    rep[1] = offset_2;
   1379
   1380    /* Return the last literals size */
   1381    return (size_t)(iend - anchor);
   1382}
   1383
   1384
   1385size_t ZSTD_compressBlock_greedy_extDict(
   1386        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1387        void const* src, size_t srcSize)
   1388{
   1389    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
   1390}
   1391
   1392size_t ZSTD_compressBlock_lazy_extDict(
   1393        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1394        void const* src, size_t srcSize)
   1395
   1396{
   1397    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
   1398}
   1399
   1400size_t ZSTD_compressBlock_lazy2_extDict(
   1401        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1402        void const* src, size_t srcSize)
   1403
   1404{
   1405    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
   1406}
   1407
   1408size_t ZSTD_compressBlock_btlazy2_extDict(
   1409        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   1410        void const* src, size_t srcSize)
   1411
   1412{
   1413    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
   1414}