cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

vnc-enc-zywrle.h (27416B)


      1/********************************************************************
      2 *                                                                  *
      3 * THIS FILE IS PART OF THE 'ZYWRLE' VNC CODEC SOURCE CODE.         *
      4 *                                                                  *
      5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
      6 * GOVERNED BY A FOLLOWING BSD-STYLE SOURCE LICENSE.                *
      7 * PLEASE READ THESE TERMS BEFORE DISTRIBUTING.                     *
      8 *                                                                  *
      9 * THE 'ZYWRLE' VNC CODEC SOURCE CODE IS (C) COPYRIGHT 2006         *
     10 * BY Hitachi Systems & Services, Ltd.                              *
     11 * (Noriaki Yamazaki, Research & Development Center)               *
     12 *                                                                  *
     13 *                                                                  *
     14 ********************************************************************
     15Redistribution and use in source and binary forms, with or without
     16modification, are permitted provided that the following conditions
     17are met:
     18
     19- Redistributions of source code must retain the above copyright
     20notice, this list of conditions and the following disclaimer.
     21
     22- Redistributions in binary form must reproduce the above copyright
     23notice, this list of conditions and the following disclaimer in the
     24documentation and/or other materials provided with the distribution.
     25
     26- Neither the name of the Hitachi Systems & Services, Ltd. nor
     27the names of its contributors may be used to endorse or promote
     28products derived from this software without specific prior written
     29permission.
     30
     31THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     32``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     33LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     34A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION
     35OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     36SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     37LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     38DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     39THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     40(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     41OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     42 ********************************************************************/
     43
     44#ifndef VNC_ENC_ZYWRLE_H
     45#define VNC_ENC_ZYWRLE_H
     46
     47/* Tables for Coefficients filtering. */
     48#ifndef ZYWRLE_QUANTIZE
     49/* Type A:lower bit omitting of EZW style. */
     50static const unsigned int zywrle_param[3][3]={
     51        {0x0000F000, 0x00000000, 0x00000000},
     52        {0x0000C000, 0x00F0F0F0, 0x00000000},
     53        {0x0000C000, 0x00C0C0C0, 0x00F0F0F0},
     54/*	{0x0000FF00, 0x00000000, 0x00000000},
     55        {0x0000FF00, 0x00FFFFFF, 0x00000000},
     56        {0x0000FF00, 0x00FFFFFF, 0x00FFFFFF}, */
     57};
     58#else
     59/* Type B:Non liner quantization filter. */
     60static const int8_t zywrle_conv[4][256]={
     61{	/* bi=5, bo=5 r=0.0:PSNR=24.849 */
     62        0, 0, 0, 0, 0, 0, 0, 0,
     63        0, 0, 0, 0, 0, 0, 0, 0,
     64        0, 0, 0, 0, 0, 0, 0, 0,
     65        0, 0, 0, 0, 0, 0, 0, 0,
     66        0, 0, 0, 0, 0, 0, 0, 0,
     67        0, 0, 0, 0, 0, 0, 0, 0,
     68        0, 0, 0, 0, 0, 0, 0, 0,
     69        0, 0, 0, 0, 0, 0, 0, 0,
     70        0, 0, 0, 0, 0, 0, 0, 0,
     71        0, 0, 0, 0, 0, 0, 0, 0,
     72        0, 0, 0, 0, 0, 0, 0, 0,
     73        0, 0, 0, 0, 0, 0, 0, 0,
     74        0, 0, 0, 0, 0, 0, 0, 0,
     75        0, 0, 0, 0, 0, 0, 0, 0,
     76        0, 0, 0, 0, 0, 0, 0, 0,
     77        0, 0, 0, 0, 0, 0, 0, 0,
     78        0, 0, 0, 0, 0, 0, 0, 0,
     79        0, 0, 0, 0, 0, 0, 0, 0,
     80        0, 0, 0, 0, 0, 0, 0, 0,
     81        0, 0, 0, 0, 0, 0, 0, 0,
     82        0, 0, 0, 0, 0, 0, 0, 0,
     83        0, 0, 0, 0, 0, 0, 0, 0,
     84        0, 0, 0, 0, 0, 0, 0, 0,
     85        0, 0, 0, 0, 0, 0, 0, 0,
     86        0, 0, 0, 0, 0, 0, 0, 0,
     87        0, 0, 0, 0, 0, 0, 0, 0,
     88        0, 0, 0, 0, 0, 0, 0, 0,
     89        0, 0, 0, 0, 0, 0, 0, 0,
     90        0, 0, 0, 0, 0, 0, 0, 0,
     91        0, 0, 0, 0, 0, 0, 0, 0,
     92        0, 0, 0, 0, 0, 0, 0, 0,
     93        0, 0, 0, 0, 0, 0, 0, 0,
     94},
     95{	/* bi=5, bo=5 r=2.0:PSNR=74.031 */
     96        0, 0, 0, 0, 0, 0, 0, 0,
     97        0, 0, 0, 0, 0, 0, 0, 0,
     98        0, 0, 0, 0, 0, 0, 0, 32,
     99        32, 32, 32, 32, 32, 32, 32, 32,
    100        32, 32, 32, 32, 32, 32, 32, 32,
    101        48, 48, 48, 48, 48, 48, 48, 48,
    102        48, 48, 48, 56, 56, 56, 56, 56,
    103        56, 56, 56, 56, 64, 64, 64, 64,
    104        64, 64, 64, 64, 72, 72, 72, 72,
    105        72, 72, 72, 72, 80, 80, 80, 80,
    106        80, 80, 88, 88, 88, 88, 88, 88,
    107        88, 88, 88, 88, 88, 88, 96, 96,
    108        96, 96, 96, 104, 104, 104, 104, 104,
    109        104, 104, 104, 104, 104, 112, 112, 112,
    110        112, 112, 112, 112, 112, 112, 120, 120,
    111        120, 120, 120, 120, 120, 120, 120, 120,
    112        0, -120, -120, -120, -120, -120, -120, -120,
    113        -120, -120, -120, -112, -112, -112, -112, -112,
    114        -112, -112, -112, -112, -104, -104, -104, -104,
    115        -104, -104, -104, -104, -104, -104, -96, -96,
    116        -96, -96, -96, -88, -88, -88, -88, -88,
    117        -88, -88, -88, -88, -88, -88, -88, -80,
    118        -80, -80, -80, -80, -80, -72, -72, -72,
    119        -72, -72, -72, -72, -72, -64, -64, -64,
    120        -64, -64, -64, -64, -64, -56, -56, -56,
    121        -56, -56, -56, -56, -56, -56, -48, -48,
    122        -48, -48, -48, -48, -48, -48, -48, -48,
    123        -48, -32, -32, -32, -32, -32, -32, -32,
    124        -32, -32, -32, -32, -32, -32, -32, -32,
    125        -32, -32, 0, 0, 0, 0, 0, 0,
    126        0, 0, 0, 0, 0, 0, 0, 0,
    127        0, 0, 0, 0, 0, 0, 0, 0,
    128},
    129{	/* bi=5, bo=4 r=2.0:PSNR=64.441 */
    130        0, 0, 0, 0, 0, 0, 0, 0,
    131        0, 0, 0, 0, 0, 0, 0, 0,
    132        0, 0, 0, 0, 0, 0, 0, 0,
    133        0, 0, 0, 0, 0, 0, 0, 0,
    134        48, 48, 48, 48, 48, 48, 48, 48,
    135        48, 48, 48, 48, 48, 48, 48, 48,
    136        48, 48, 48, 48, 48, 48, 48, 48,
    137        64, 64, 64, 64, 64, 64, 64, 64,
    138        64, 64, 64, 64, 64, 64, 64, 64,
    139        80, 80, 80, 80, 80, 80, 80, 80,
    140        80, 80, 80, 80, 80, 88, 88, 88,
    141        88, 88, 88, 88, 88, 88, 88, 88,
    142        104, 104, 104, 104, 104, 104, 104, 104,
    143        104, 104, 104, 112, 112, 112, 112, 112,
    144        112, 112, 112, 112, 120, 120, 120, 120,
    145        120, 120, 120, 120, 120, 120, 120, 120,
    146        0, -120, -120, -120, -120, -120, -120, -120,
    147        -120, -120, -120, -120, -120, -112, -112, -112,
    148        -112, -112, -112, -112, -112, -112, -104, -104,
    149        -104, -104, -104, -104, -104, -104, -104, -104,
    150        -104, -88, -88, -88, -88, -88, -88, -88,
    151        -88, -88, -88, -88, -80, -80, -80, -80,
    152        -80, -80, -80, -80, -80, -80, -80, -80,
    153        -80, -64, -64, -64, -64, -64, -64, -64,
    154        -64, -64, -64, -64, -64, -64, -64, -64,
    155        -64, -48, -48, -48, -48, -48, -48, -48,
    156        -48, -48, -48, -48, -48, -48, -48, -48,
    157        -48, -48, -48, -48, -48, -48, -48, -48,
    158        -48, 0, 0, 0, 0, 0, 0, 0,
    159        0, 0, 0, 0, 0, 0, 0, 0,
    160        0, 0, 0, 0, 0, 0, 0, 0,
    161        0, 0, 0, 0, 0, 0, 0, 0,
    162},
    163{	/* bi=5, bo=2 r=2.0:PSNR=43.175 */
    164        0, 0, 0, 0, 0, 0, 0, 0,
    165        0, 0, 0, 0, 0, 0, 0, 0,
    166        0, 0, 0, 0, 0, 0, 0, 0,
    167        0, 0, 0, 0, 0, 0, 0, 0,
    168        0, 0, 0, 0, 0, 0, 0, 0,
    169        0, 0, 0, 0, 0, 0, 0, 0,
    170        0, 0, 0, 0, 0, 0, 0, 0,
    171        0, 0, 0, 0, 0, 0, 0, 0,
    172        88, 88, 88, 88, 88, 88, 88, 88,
    173        88, 88, 88, 88, 88, 88, 88, 88,
    174        88, 88, 88, 88, 88, 88, 88, 88,
    175        88, 88, 88, 88, 88, 88, 88, 88,
    176        88, 88, 88, 88, 88, 88, 88, 88,
    177        88, 88, 88, 88, 88, 88, 88, 88,
    178        88, 88, 88, 88, 88, 88, 88, 88,
    179        88, 88, 88, 88, 88, 88, 88, 88,
    180        0, -88, -88, -88, -88, -88, -88, -88,
    181        -88, -88, -88, -88, -88, -88, -88, -88,
    182        -88, -88, -88, -88, -88, -88, -88, -88,
    183        -88, -88, -88, -88, -88, -88, -88, -88,
    184        -88, -88, -88, -88, -88, -88, -88, -88,
    185        -88, -88, -88, -88, -88, -88, -88, -88,
    186        -88, -88, -88, -88, -88, -88, -88, -88,
    187        -88, -88, -88, -88, -88, -88, -88, -88,
    188        -88, 0, 0, 0, 0, 0, 0, 0,
    189        0, 0, 0, 0, 0, 0, 0, 0,
    190        0, 0, 0, 0, 0, 0, 0, 0,
    191        0, 0, 0, 0, 0, 0, 0, 0,
    192        0, 0, 0, 0, 0, 0, 0, 0,
    193        0, 0, 0, 0, 0, 0, 0, 0,
    194        0, 0, 0, 0, 0, 0, 0, 0,
    195        0, 0, 0, 0, 0, 0, 0, 0,
    196}
    197};
    198
    199static const int8_t *zywrle_param[3][3][3]={
    200        {{zywrle_conv[0], zywrle_conv[2], zywrle_conv[0]},
    201         {zywrle_conv[0], zywrle_conv[0], zywrle_conv[0]},
    202         {zywrle_conv[0], zywrle_conv[0], zywrle_conv[0]}},
    203        {{zywrle_conv[0], zywrle_conv[3], zywrle_conv[0]},
    204         {zywrle_conv[1], zywrle_conv[1], zywrle_conv[1]},
    205         {zywrle_conv[0], zywrle_conv[0], zywrle_conv[0]}},
    206        {{zywrle_conv[0], zywrle_conv[3], zywrle_conv[0]},
    207         {zywrle_conv[2], zywrle_conv[2], zywrle_conv[2]},
    208         {zywrle_conv[1], zywrle_conv[1], zywrle_conv[1]}},
    209};
    210#endif
    211
    212/*   Load/Save pixel stuffs. */
    213#define ZYWRLE_YMASK15  0xFFFFFFF8
    214#define ZYWRLE_UVMASK15 0xFFFFFFF8
    215#define ZYWRLE_LOAD_PIXEL15(src, r, g, b)                               \
    216    do {                                                                \
    217        r = (((uint8_t*)src)[S_1]<< 1)& 0xF8;                           \
    218        g = (((uint8_t*)src)[S_1]<< 6) | (((uint8_t*)src)[S_0]>> 2);    \
    219        g &= 0xF8;                                                      \
    220        b =  (((uint8_t*)src)[S_0]<< 3)& 0xF8;                          \
    221    } while (0)
    222
    223#define ZYWRLE_SAVE_PIXEL15(dst, r, g, b)                               \
    224    do {                                                                \
    225        r &= 0xF8;                                                      \
    226        g &= 0xF8;                                                      \
    227        b &= 0xF8;                                                      \
    228        ((uint8_t*)dst)[S_1] = (uint8_t)((r >> 1)|(g >> 6));            \
    229        ((uint8_t*)dst)[S_0] = (uint8_t)(((b >> 3)|(g << 2))& 0xFF);    \
    230    } while (0)
    231
    232#define ZYWRLE_YMASK16  0xFFFFFFFC
    233#define ZYWRLE_UVMASK16 0xFFFFFFF8
    234#define ZYWRLE_LOAD_PIXEL16(src, r, g, b)                               \
    235    do {                                                                \
    236        r = ((uint8_t*)src)[S_1] & 0xF8;                                \
    237        g = (((uint8_t*)src)[S_1]<< 5) | (((uint8_t*)src)[S_0] >> 3);   \
    238        g &= 0xFC;                                                      \
    239        b = (((uint8_t*)src)[S_0]<< 3) & 0xF8;                          \
    240    } while (0)
    241
    242#define ZYWRLE_SAVE_PIXEL16(dst, r, g,b)                                \
    243    do {                                                                \
    244        r &= 0xF8;                                                      \
    245        g &= 0xFC;                                                      \
    246        b &= 0xF8;                                                      \
    247        ((uint8_t*)dst)[S_1] = (uint8_t)(r | (g >> 5));                 \
    248        ((uint8_t*)dst)[S_0] = (uint8_t)(((b >> 3)|(g << 3)) & 0xFF);   \
    249    } while (0)
    250
    251#define ZYWRLE_YMASK32  0xFFFFFFFF
    252#define ZYWRLE_UVMASK32 0xFFFFFFFF
    253#define ZYWRLE_LOAD_PIXEL32(src, r, g, b)     \
    254    do {                                      \
    255        r = ((uint8_t*)src)[L_2];             \
    256        g = ((uint8_t*)src)[L_1];             \
    257        b = ((uint8_t*)src)[L_0];             \
    258    } while (0)
    259#define ZYWRLE_SAVE_PIXEL32(dst, r, g, b)             \
    260    do {                                              \
    261        ((uint8_t*)dst)[L_2] = (uint8_t)r;            \
    262        ((uint8_t*)dst)[L_1] = (uint8_t)g;            \
    263        ((uint8_t*)dst)[L_0] = (uint8_t)b;            \
    264    } while (0)
    265
    266static inline void harr(int8_t *px0, int8_t *px1)
    267{
    268    /* Piecewise-Linear Harr(PLHarr) */
    269    int x0 = (int)*px0, x1 = (int)*px1;
    270    int orgx0 = x0, orgx1 = x1;
    271
    272    if ((x0 ^ x1) & 0x80) {
    273        /* differ sign */
    274        x1 += x0;
    275        if (((x1 ^ orgx1) & 0x80) == 0) {
    276            /* |x1| > |x0| */
    277            x0 -= x1;	/* H = -B */
    278        }
    279    } else {
    280        /* same sign */
    281        x0 -= x1;
    282        if (((x0 ^ orgx0) & 0x80) == 0) {
    283            /* |x0| > |x1| */
    284            x1 += x0;	/* L = A */
    285        }
    286    }
    287    *px0 = (int8_t)x1;
    288    *px1 = (int8_t)x0;
    289}
    290
    291/*
    292 1D-Wavelet transform.
    293
    294 In coefficients array, the famous 'pyramid' decomposition is well used.
    295
    296 1D Model:
    297   |L0L0L0L0|L0L0L0L0|H0H0H0H0|H0H0H0H0| : level 0
    298   |L1L1L1L1|H1H1H1H1|H0H0H0H0|H0H0H0H0| : level 1
    299
    300 But this method needs line buffer because H/L is different position from X0/X1.
    301 So, I used 'interleave' decomposition instead of it.
    302
    303 1D Model:
    304   |L0H0L0H0|L0H0L0H0|L0H0L0H0|L0H0L0H0| : level 0
    305   |L1H0H1H0|L1H0H1H0|L1H0H1H0|L1H0H1H0| : level 1
    306
    307 In this method, H/L and X0/X1 is always same position.
    308 This leads us to more speed and less memory.
    309 Of cause, the result of both method is quite same
    310 because it's only difference that coefficient position.
    311*/
    312static inline void wavelet_level(int *data, int size, int l, int skip_pixel)
    313{
    314    int s, ofs;
    315    int8_t *px0;
    316    int8_t *end;
    317
    318    px0 = (int8_t*)data;
    319    s = (8 << l) * skip_pixel;
    320    end = px0 + (size >> (l + 1)) * s;
    321    s -= 2;
    322    ofs = (4 << l) * skip_pixel;
    323
    324    while (px0 < end) {
    325        harr(px0, px0 + ofs);
    326        px0++;
    327        harr(px0, px0 + ofs);
    328        px0++;
    329        harr(px0, px0 + ofs);
    330        px0 += s;
    331    }
    332}
    333
    334#ifndef ZYWRLE_QUANTIZE
    335/* Type A:lower bit omitting of EZW style. */
    336static inline void filter_wavelet_square(int *buf, int width, int height,
    337                                         int level, int l)
    338{
    339    int r, s;
    340    int x, y;
    341    int *h;
    342    const unsigned int *m;
    343
    344    m = &(zywrle_param[level - 1][l]);
    345    s = 2 << l;
    346
    347    for (r = 1; r < 4; r++) {
    348        h = buf;
    349        if (r & 0x01) {
    350            h += s >> 1;
    351        }
    352        if (r & 0x02) {
    353            h += (s >> 1) * width;
    354        }
    355        for (y = 0; y < height / s; y++) {
    356            for (x = 0; x < width / s; x++) {
    357                /*
    358                  these are same following code.
    359                  h[x] = h[x] / (~m[x]+1) * (~m[x]+1);
    360                  ( round h[x] with m[x] bit )
    361                  '&' operator isn't 'round' but is 'floor'.
    362                  So, we must offset when h[x] is negative.
    363                */
    364                if (((int8_t*)h)[0] & 0x80) {
    365                    ((int8_t*)h)[0] += ~((int8_t*)m)[0];
    366                }
    367                if (((int8_t*)h)[1] & 0x80) {
    368                    ((int8_t*)h)[1] += ~((int8_t*)m)[1];
    369                }
    370                if (((int8_t*)h)[2] & 0x80) {
    371                    ((int8_t*)h)[2] += ~((int8_t*)m)[2];
    372                }
    373                *h &= *m;
    374                h += s;
    375            }
    376            h += (s-1)*width;
    377        }
    378    }
    379}
    380#else
    381/*
    382 Type B:Non liner quantization filter.
    383
    384 Coefficients have Gaussian curve and smaller value which is
    385 large part of coefficients isn't more important than larger value.
    386 So, I use filter of Non liner quantize/dequantize table.
    387 In general, Non liner quantize formula is explained as following.
    388
    389    y=f(x)   = sign(x)*round( ((abs(x)/(2^7))^ r   )* 2^(bo-1) )*2^(8-bo)
    390    x=f-1(y) = sign(y)*round( ((abs(y)/(2^7))^(1/r))* 2^(bi-1) )*2^(8-bi)
    391 ( r:power coefficient  bi:effective MSB in input  bo:effective MSB in output )
    392
    393   r < 1.0 : Smaller value is more important than larger value.
    394   r > 1.0 : Larger value is more important than smaller value.
    395   r = 1.0 : Liner quantization which is same with EZW style.
    396
    397 r = 0.75 is famous non liner quantization used in MP3 audio codec.
    398 In contrast to audio data, larger value is important in wavelet coefficients.
    399 So, I select r = 2.0 table( quantize is x^2, dequantize sqrt(x) ).
    400
    401 As compared with EZW style liner quantization, this filter tended to be
    402 more sharp edge and be more compression rate but be more blocking noise and be
    403 less quality. Especially, the surface of graphic objects has distinguishable
    404 noise in middle quality mode.
    405
    406 We need only quantized-dequantized(filtered) value rather than quantized value
    407 itself because all values are packed or palette-lized in later ZRLE section.
    408 This lead us not to need to modify client decoder when we change
    409 the filtering procedure in future.
    410 Client only decodes coefficients given by encoder.
    411*/
    412static inline void filter_wavelet_square(int *buf, int width, int height,
    413                                         int level, int l)
    414{
    415    int r, s;
    416    int x, y;
    417    int *h;
    418    const int8_t **m;
    419
    420    m = zywrle_param[level - 1][l];
    421    s = 2 << l;
    422
    423    for (r = 1; r < 4; r++) {
    424        h = buf;
    425        if (r & 0x01) {
    426            h += s >> 1;
    427        }
    428        if (r & 0x02) {
    429            h += (s >> 1) * width;
    430        }
    431        for (y = 0; y < height / s; y++) {
    432            for (x = 0; x < width / s; x++) {
    433                ((int8_t*)h)[0] = m[0][((uint8_t*)h)[0]];
    434                ((int8_t*)h)[1] = m[1][((uint8_t*)h)[1]];
    435                ((int8_t*)h)[2] = m[2][((uint8_t*)h)[2]];
    436                h += s;
    437            }
    438            h += (s - 1) * width;
    439        }
    440    }
    441}
    442#endif
    443
    444static inline void wavelet(int *buf, int width, int height, int level)
    445{
    446        int l, s;
    447        int *top;
    448        int *end;
    449
    450        for (l = 0; l < level; l++) {
    451                top = buf;
    452                end = buf + height * width;
    453                s = width << l;
    454                while (top < end) {
    455                        wavelet_level(top, width, l, 1);
    456                        top += s;
    457                }
    458                top = buf;
    459                end = buf + width;
    460                s = 1<<l;
    461                while (top < end) {
    462                        wavelet_level(top, height, l, width);
    463                        top += s;
    464                }
    465                filter_wavelet_square(buf, width, height, level, l);
    466        }
    467}
    468
    469
    470/* Load/Save coefficients stuffs.
    471 Coefficients manages as 24 bits little-endian pixel. */
    472#define ZYWRLE_LOAD_COEFF(src, r, g, b)         \
    473    do {                                        \
    474        r = ((int8_t*)src)[2];                  \
    475        g = ((int8_t*)src)[1];                  \
    476        b = ((int8_t*)src)[0];                  \
    477    } while (0)
    478
    479#define ZYWRLE_SAVE_COEFF(dst, r, g, b)       \
    480    do {                                      \
    481        ((int8_t*)dst)[2] = (int8_t)r;        \
    482        ((int8_t*)dst)[1] = (int8_t)g;        \
    483        ((int8_t*)dst)[0] = (int8_t)b;        \
    484    } while (0)
    485
    486/*
    487  RGB <=> YUV conversion stuffs.
    488  YUV coversion is explained as following formula in strict meaning:
    489  Y =  0.299R + 0.587G + 0.114B (   0<=Y<=255)
    490  U = -0.169R - 0.331G + 0.500B (-128<=U<=127)
    491  V =  0.500R - 0.419G - 0.081B (-128<=V<=127)
    492
    493  I use simple conversion RCT(reversible color transform) which is described
    494  in JPEG-2000 specification.
    495  Y = (R + 2G + B)/4 (   0<=Y<=255)
    496  U = B-G (-256<=U<=255)
    497  V = R-G (-256<=V<=255)
    498*/
    499
    500/* RCT is N-bit RGB to N-bit Y and N+1-bit UV.
    501   For make Same N-bit, UV is lossy.
    502   More exact PLHarr, we reduce to odd range(-127<=x<=127). */
    503#define ZYWRLE_RGBYUV_(r, g, b, y, u, v, ymask, uvmask)          \
    504    do {                                                         \
    505        y = (r + (g << 1) + b) >> 2;                             \
    506        u =  b - g;                                              \
    507        v =  r - g;                                              \
    508        y -= 128;                                                \
    509        u >>= 1;                                                 \
    510        v >>= 1;                                                 \
    511        y &= ymask;                                              \
    512        u &= uvmask;                                             \
    513        v &= uvmask;                                             \
    514        if (y == -128) {                                         \
    515            y += (0xFFFFFFFF - ymask + 1);                       \
    516        }                                                        \
    517        if (u == -128) {                                         \
    518            u += (0xFFFFFFFF - uvmask + 1);                      \
    519        }                                                        \
    520        if (v == -128) {                                         \
    521            v += (0xFFFFFFFF - uvmask + 1);                      \
    522        }                                                        \
    523    } while (0)
    524
    525
    526/*
    527 coefficient packing/unpacking stuffs.
    528 Wavelet transform makes 4 sub coefficient image from 1 original image.
    529
    530 model with pyramid decomposition:
    531   +------+------+
    532   |      |      |
    533   |  L   |  Hx  |
    534   |      |      |
    535   +------+------+
    536   |      |      |
    537   |  H   |  Hxy |
    538   |      |      |
    539   +------+------+
    540
    541 So, we must transfer each sub images individually in strict meaning.
    542 But at least ZRLE meaning, following one decompositon image is same as
    543 avobe individual sub image. I use this format.
    544 (Strictly saying, transfer order is reverse(Hxy->Hy->Hx->L)
    545  for simplified procedure for any wavelet level.)
    546
    547   +------+------+
    548   |      L      |
    549   +------+------+
    550   |      Hx     |
    551   +------+------+
    552   |      Hy     |
    553   +------+------+
    554   |      Hxy    |
    555   +------+------+
    556*/
    557#define ZYWRLE_INC_PTR(data)                         \
    558    do {                                             \
    559        data++;                                      \
    560        if( data - p >= (w + uw) ) {                 \
    561            data += scanline-(w + uw);               \
    562            p = data;                                \
    563        }                                            \
    564    } while (0)
    565
    566#define ZYWRLE_TRANSFER_COEFF(buf, data, t, w, h, scanline, level, TRANS) \
    567    do {                                                                \
    568        ph = buf;                                                       \
    569        s = 2 << level;                                                 \
    570        if (t & 0x01) {                                                 \
    571            ph += s >> 1;                                               \
    572        }                                                               \
    573        if (t & 0x02) {                                                 \
    574            ph += (s >> 1) * w;                                         \
    575        }                                                               \
    576        end = ph + h * w;                                               \
    577        while (ph < end) {                                              \
    578            line = ph + w;                                              \
    579            while (ph < line) {                                         \
    580                TRANS                                                   \
    581                    ZYWRLE_INC_PTR(data);                               \
    582                ph += s;                                                \
    583            }                                                           \
    584            ph += (s - 1) * w;                                          \
    585        }                                                               \
    586    } while (0)
    587
    588#define ZYWRLE_PACK_COEFF(buf, data, t, width, height, scanline, level)	\
    589    ZYWRLE_TRANSFER_COEFF(buf, data, t, width, height, scanline, level, \
    590                          ZYWRLE_LOAD_COEFF(ph, r, g, b);               \
    591                          ZYWRLE_SAVE_PIXEL(data, r, g, b);)
    592
    593#define ZYWRLE_UNPACK_COEFF(buf, data, t, width, height, scanline, level) \
    594    ZYWRLE_TRANSFER_COEFF(buf, data, t, width, height, scanline, level, \
    595                          ZYWRLE_LOAD_PIXEL(data, r, g, b);             \
    596                          ZYWRLE_SAVE_COEFF(ph, r, g, b);)
    597
    598#define ZYWRLE_SAVE_UNALIGN(data, TRANS)                     \
    599    do {                                                     \
    600        top = buf + w * h;                                   \
    601        end = buf + (w + uw) * (h + uh);                     \
    602        while (top < end) {                                  \
    603            TRANS                                            \
    604                ZYWRLE_INC_PTR(data);                        \
    605                top++;                                       \
    606        }                                                    \
    607    } while (0)
    608
    609#define ZYWRLE_LOAD_UNALIGN(data,TRANS)                                 \
    610    do {                                                                \
    611        top = buf + w * h;                                              \
    612        if (uw) {                                                       \
    613            p = data + w;                                               \
    614            end = (int*)(p + h * scanline);                             \
    615            while (p < (ZRLE_PIXEL*)end) {                              \
    616                line = (int*)(p + uw);                                  \
    617                while (p < (ZRLE_PIXEL*)line) {                         \
    618                    TRANS                                               \
    619                        p++;                                            \
    620                    top++;                                              \
    621                }                                                       \
    622                p += scanline - uw;                                     \
    623            }                                                           \
    624        }                                                               \
    625        if (uh) {                                                       \
    626            p = data + h * scanline;                                    \
    627            end = (int*)(p + uh * scanline);                            \
    628            while (p < (ZRLE_PIXEL*)end) {                              \
    629                line = (int*)(p + w);                                   \
    630                while (p < (ZRLE_PIXEL*)line) {                         \
    631                    TRANS                                               \
    632                        p++;                                            \
    633                    top++;                                              \
    634                }                                                       \
    635                p += scanline - w;                                      \
    636            }                                                           \
    637        }                                                               \
    638        if (uw && uh) {                                                 \
    639            p= data + w + h * scanline;                                 \
    640            end = (int*)(p + uh * scanline);                            \
    641            while (p < (ZRLE_PIXEL*)end) {                              \
    642                line = (int*)(p + uw);                                  \
    643                while (p < (ZRLE_PIXEL*)line) {                         \
    644                    TRANS                                               \
    645                        p++;                                            \
    646                    top++;                                              \
    647                }                                                       \
    648                p += scanline-uw;                                       \
    649            }                                                           \
    650        }                                                               \
    651    } while (0)
    652
    653static inline void zywrle_calc_size(int *w, int *h, int level)
    654{
    655    *w &= ~((1 << level) - 1);
    656    *h &= ~((1 << level) - 1);
    657}
    658
    659#endif