cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

hist.c (6756B)


      1/* ******************************************************************
      2 * hist : Histogram functions
      3 * part of Finite State Entropy project
      4 * Copyright (c) Yann Collet, Facebook, Inc.
      5 *
      6 *  You can contact the author at :
      7 *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
      8 *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
      9 *
     10 * This source code is licensed under both the BSD-style license (found in the
     11 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
     12 * in the COPYING file in the root directory of this source tree).
     13 * You may select, at your option, one of the above-listed licenses.
     14****************************************************************** */
     15
     16/* --- dependencies --- */
     17#include "../common/mem.h"             /* U32, BYTE, etc. */
     18#include "../common/debug.h"           /* assert, DEBUGLOG */
     19#include "../common/error_private.h"   /* ERROR */
     20#include "hist.h"
     21
     22
     23/* --- Error management --- */
     24unsigned HIST_isError(size_t code) { return ERR_isError(code); }
     25
     26/*-**************************************************************
     27 *  Histogram functions
     28 ****************************************************************/
     29unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
     30                           const void* src, size_t srcSize)
     31{
     32    const BYTE* ip = (const BYTE*)src;
     33    const BYTE* const end = ip + srcSize;
     34    unsigned maxSymbolValue = *maxSymbolValuePtr;
     35    unsigned largestCount=0;
     36
     37    ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
     38    if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
     39
     40    while (ip<end) {
     41        assert(*ip <= maxSymbolValue);
     42        count[*ip++]++;
     43    }
     44
     45    while (!count[maxSymbolValue]) maxSymbolValue--;
     46    *maxSymbolValuePtr = maxSymbolValue;
     47
     48    {   U32 s;
     49        for (s=0; s<=maxSymbolValue; s++)
     50            if (count[s] > largestCount) largestCount = count[s];
     51    }
     52
     53    return largestCount;
     54}
     55
     56typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
     57
     58/* HIST_count_parallel_wksp() :
     59 * store histogram into 4 intermediate tables, recombined at the end.
     60 * this design makes better use of OoO cpus,
     61 * and is noticeably faster when some values are heavily repeated.
     62 * But it needs some additional workspace for intermediate tables.
     63 * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
     64 * @return : largest histogram frequency,
     65 *           or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
     66static size_t HIST_count_parallel_wksp(
     67                                unsigned* count, unsigned* maxSymbolValuePtr,
     68                                const void* source, size_t sourceSize,
     69                                HIST_checkInput_e check,
     70                                U32* const workSpace)
     71{
     72    const BYTE* ip = (const BYTE*)source;
     73    const BYTE* const iend = ip+sourceSize;
     74    size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
     75    unsigned max=0;
     76    U32* const Counting1 = workSpace;
     77    U32* const Counting2 = Counting1 + 256;
     78    U32* const Counting3 = Counting2 + 256;
     79    U32* const Counting4 = Counting3 + 256;
     80
     81    /* safety checks */
     82    assert(*maxSymbolValuePtr <= 255);
     83    if (!sourceSize) {
     84        ZSTD_memset(count, 0, countSize);
     85        *maxSymbolValuePtr = 0;
     86        return 0;
     87    }
     88    ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
     89
     90    /* by stripes of 16 bytes */
     91    {   U32 cached = MEM_read32(ip); ip += 4;
     92        while (ip < iend-15) {
     93            U32 c = cached; cached = MEM_read32(ip); ip += 4;
     94            Counting1[(BYTE) c     ]++;
     95            Counting2[(BYTE)(c>>8) ]++;
     96            Counting3[(BYTE)(c>>16)]++;
     97            Counting4[       c>>24 ]++;
     98            c = cached; cached = MEM_read32(ip); ip += 4;
     99            Counting1[(BYTE) c     ]++;
    100            Counting2[(BYTE)(c>>8) ]++;
    101            Counting3[(BYTE)(c>>16)]++;
    102            Counting4[       c>>24 ]++;
    103            c = cached; cached = MEM_read32(ip); ip += 4;
    104            Counting1[(BYTE) c     ]++;
    105            Counting2[(BYTE)(c>>8) ]++;
    106            Counting3[(BYTE)(c>>16)]++;
    107            Counting4[       c>>24 ]++;
    108            c = cached; cached = MEM_read32(ip); ip += 4;
    109            Counting1[(BYTE) c     ]++;
    110            Counting2[(BYTE)(c>>8) ]++;
    111            Counting3[(BYTE)(c>>16)]++;
    112            Counting4[       c>>24 ]++;
    113        }
    114        ip-=4;
    115    }
    116
    117    /* finish last symbols */
    118    while (ip<iend) Counting1[*ip++]++;
    119
    120    {   U32 s;
    121        for (s=0; s<256; s++) {
    122            Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
    123            if (Counting1[s] > max) max = Counting1[s];
    124    }   }
    125
    126    {   unsigned maxSymbolValue = 255;
    127        while (!Counting1[maxSymbolValue]) maxSymbolValue--;
    128        if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
    129        *maxSymbolValuePtr = maxSymbolValue;
    130        ZSTD_memmove(count, Counting1, countSize);   /* in case count & Counting1 are overlapping */
    131    }
    132    return (size_t)max;
    133}
    134
    135/* HIST_countFast_wksp() :
    136 * Same as HIST_countFast(), but using an externally provided scratch buffer.
    137 * `workSpace` is a writable buffer which must be 4-bytes aligned,
    138 * `workSpaceSize` must be >= HIST_WKSP_SIZE
    139 */
    140size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
    141                          const void* source, size_t sourceSize,
    142                          void* workSpace, size_t workSpaceSize)
    143{
    144    if (sourceSize < 1500) /* heuristic threshold */
    145        return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
    146    if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
    147    if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
    148    return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
    149}
    150
    151/* HIST_count_wksp() :
    152 * Same as HIST_count(), but using an externally provided scratch buffer.
    153 * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
    154size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
    155                       const void* source, size_t sourceSize,
    156                       void* workSpace, size_t workSpaceSize)
    157{
    158    if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
    159    if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
    160    if (*maxSymbolValuePtr < 255)
    161        return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
    162    *maxSymbolValuePtr = 255;
    163    return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
    164}
    165