SDL_RLEaccel.c - cscg22-gearboy - CSCG 2022 Challenge 'Gearboy'

	cscg22-gearboy CSCG 2022 Challenge 'Gearboy'
	git clone https://git.sinitax.com/sinitax/cscg22-gearboy
	Log \| Files \| Refs \| sfeed.txt
SDL_RLEaccel.c (59014B)
      1/*
      2  Simple DirectMedia Layer
      3  Copyright (C) 1997-2014 Sam Lantinga <slouken@libsdl.org>
      4
      5  This software is provided 'as-is', without any express or implied
      6  warranty.  In no event will the authors be held liable for any damages
      7  arising from the use of this software.
      8
      9  Permission is granted to anyone to use this software for any purpose,
     10  including commercial applications, and to alter it and redistribute it
     11  freely, subject to the following restrictions:
     12
     13  1. The origin of this software must not be misrepresented; you must not
     14     claim that you wrote the original software. If you use this software
     15     in a product, an acknowledgment in the product documentation would be
     16     appreciated but is not required.
     17  2. Altered source versions must be plainly marked as such, and must not be
     18     misrepresented as being the original software.
     19  3. This notice may not be removed or altered from any source distribution.
     20*/
     21#include "../SDL_internal.h"
     22
     23/*
     24 * RLE encoding for software colorkey and alpha-channel acceleration
     25 *
     26 * Original version by Sam Lantinga
     27 *
     28 * Mattias Engdeg�rd (Yorick): Rewrite. New encoding format, encoder and
     29 * decoder. Added per-surface alpha blitter. Added per-pixel alpha
     30 * format, encoder and blitter.
     31 *
     32 * Many thanks to Xark and johns for hints, benchmarks and useful comments
     33 * leading to this code.
     34 *
     35 * Welcome to Macro Mayhem.
     36 */
     37
     38/*
     39 * The encoding translates the image data to a stream of segments of the form
     40 *
     41 * <skip> <run> <data>
     42 *
     43 * where <skip> is the number of transparent pixels to skip,
     44 *       <run>  is the number of opaque pixels to blit,
     45 * and   <data> are the pixels themselves.
     46 *
     47 * This basic structure is used both for colorkeyed surfaces, used for simple
     48 * binary transparency and for per-surface alpha blending, and for surfaces
     49 * with per-pixel alpha. The details differ, however:
     50 *
     51 * Encoding of colorkeyed surfaces:
     52 *
     53 *   Encoded pixels always have the same format as the target surface.
     54 *   <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth
     55 *   where they are 16 bit. This makes the pixel data aligned at all times.
     56 *   Segments never wrap around from one scan line to the next.
     57 *
     58 *   The end of the sequence is marked by a zero <skip>,<run> pair at the *
     59 *   beginning of a line.
     60 *
     61 * Encoding of surfaces with per-pixel alpha:
     62 *
     63 *   The sequence begins with a struct RLEDestFormat describing the target
     64 *   pixel format, to provide reliable un-encoding.
     65 *
     66 *   Each scan line is encoded twice: First all completely opaque pixels,
     67 *   encoded in the target format as described above, and then all
     68 *   partially transparent (translucent) pixels (where 1 <= alpha <= 254),
     69 *   in the following 32-bit format:
     70 *
     71 *   For 32-bit targets, each pixel has the target RGB format but with
     72 *   the alpha value occupying the highest 8 bits. The <skip> and <run>
     73 *   counts are 16 bit.
     74 *
     75 *   For 16-bit targets, each pixel has the target RGB format, but with
     76 *   the middle component (usually green) shifted 16 steps to the left,
     77 *   and the hole filled with the 5 most significant bits of the alpha value.
     78 *   i.e. if the target has the format         rrrrrggggggbbbbb,
     79 *   the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb.
     80 *   The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit
     81 *   for the translucent lines. Two padding bytes may be inserted
     82 *   before each translucent line to keep them 32-bit aligned.
     83 *
     84 *   The end of the sequence is marked by a zero <skip>,<run> pair at the
     85 *   beginning of an opaque line.
     86 */
     87
     88#include "SDL_video.h"
     89#include "SDL_sysvideo.h"
     90#include "SDL_blit.h"
     91#include "SDL_RLEaccel_c.h"
     92
     93#ifndef MAX
     94#define MAX(a, b) ((a) > (b) ? (a) : (b))
     95#endif
     96#ifndef MIN
     97#define MIN(a, b) ((a) < (b) ? (a) : (b))
     98#endif
     99
    100#define PIXEL_COPY(to, from, len, bpp)          \
    101    SDL_memcpy(to, from, (size_t)(len) * (bpp))
    102
    103/*
    104 * Various colorkey blit methods, for opaque and per-surface alpha
    105 */
    106
    107#define OPAQUE_BLIT(to, from, length, bpp, alpha)   \
    108    PIXEL_COPY(to, from, length, bpp)
    109
    110/*
    111 * For 32bpp pixels on the form 0x00rrggbb:
    112 * If we treat the middle component separately, we can process the two
    113 * remaining in parallel. This is safe to do because of the gap to the left
    114 * of each component, so the bits from the multiplication don't collide.
    115 * This can be used for any RGB permutation of course.
    116 */
    117#define ALPHA_BLIT32_888(to, from, length, bpp, alpha)      \
    118    do {                                                    \
    119        int i;                                              \
    120        Uint32 *src = (Uint32 *)(from);                     \
    121        Uint32 *dst = (Uint32 *)(to);                       \
    122        for (i = 0; i < (int)(length); i++) {               \
    123            Uint32 s = *src++;                              \
    124            Uint32 d = *dst;                                \
    125            Uint32 s1 = s & 0xff00ff;                       \
    126            Uint32 d1 = d & 0xff00ff;                       \
    127            d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
    128            s &= 0xff00;                                    \
    129            d &= 0xff00;                                    \
    130            d = (d + ((s - d) * alpha >> 8)) & 0xff00;      \
    131            *dst++ = d1 | d;                                \
    132        }                                                   \
    133    } while (0)
    134
    135/*
    136 * For 16bpp pixels we can go a step further: put the middle component
    137 * in the high 16 bits of a 32 bit word, and process all three RGB
    138 * components at the same time. Since the smallest gap is here just
    139 * 5 bits, we have to scale alpha down to 5 bits as well.
    140 */
    141#define ALPHA_BLIT16_565(to, from, length, bpp, alpha)  \
    142    do {                                                \
    143        int i;                                          \
    144        Uint16 *src = (Uint16 *)(from);                 \
    145        Uint16 *dst = (Uint16 *)(to);                   \
    146        Uint32 ALPHA = alpha >> 3;                      \
    147        for(i = 0; i < (int)(length); i++) {            \
    148            Uint32 s = *src++;                          \
    149            Uint32 d = *dst;                            \
    150            s = (s | s << 16) & 0x07e0f81f;             \
    151            d = (d | d << 16) & 0x07e0f81f;             \
    152            d += (s - d) * ALPHA >> 5;                  \
    153            d &= 0x07e0f81f;                            \
    154            *dst++ = (Uint16)(d | d >> 16);             \
    155        }                                               \
    156    } while(0)
    157
    158#define ALPHA_BLIT16_555(to, from, length, bpp, alpha)  \
    159    do {                                                \
    160        int i;                                          \
    161        Uint16 *src = (Uint16 *)(from);                 \
    162        Uint16 *dst = (Uint16 *)(to);                   \
    163        Uint32 ALPHA = alpha >> 3;                      \
    164        for(i = 0; i < (int)(length); i++) {            \
    165            Uint32 s = *src++;                          \
    166            Uint32 d = *dst;                            \
    167            s = (s | s << 16) & 0x03e07c1f;             \
    168            d = (d | d << 16) & 0x03e07c1f;             \
    169            d += (s - d) * ALPHA >> 5;                  \
    170            d &= 0x03e07c1f;                            \
    171            *dst++ = (Uint16)(d | d >> 16);             \
    172        }                                               \
    173    } while(0)
    174
    175/*
    176 * The general slow catch-all function, for remaining depths and formats
    177 */
    178#define ALPHA_BLIT_ANY(to, from, length, bpp, alpha)            \
    179    do {                                                        \
    180        int i;                                                  \
    181        Uint8 *src = from;                                      \
    182        Uint8 *dst = to;                                        \
    183        for (i = 0; i < (int)(length); i++) {                   \
    184            Uint32 s, d;                                        \
    185            unsigned rs, gs, bs, rd, gd, bd;                    \
    186            switch (bpp) {                                      \
    187            case 2:                                             \
    188                s = *(Uint16 *)src;                             \
    189                d = *(Uint16 *)dst;                             \
    190                break;                                          \
    191            case 3:                                             \
    192                if (SDL_BYTEORDER == SDL_BIG_ENDIAN) {          \
    193                    s = (src[0] << 16) | (src[1] << 8) | src[2]; \
    194                    d = (dst[0] << 16) | (dst[1] << 8) | dst[2]; \
    195                } else {                                        \
    196                    s = (src[2] << 16) | (src[1] << 8) | src[0]; \
    197                    d = (dst[2] << 16) | (dst[1] << 8) | dst[0]; \
    198                }                                               \
    199                break;                                          \
    200            case 4:                                             \
    201                s = *(Uint32 *)src;                             \
    202                d = *(Uint32 *)dst;                             \
    203                break;                                          \
    204            }                                                   \
    205            RGB_FROM_PIXEL(s, fmt, rs, gs, bs);                 \
    206            RGB_FROM_PIXEL(d, fmt, rd, gd, bd);                 \
    207            rd += (rs - rd) * alpha >> 8;                       \
    208            gd += (gs - gd) * alpha >> 8;                       \
    209            bd += (bs - bd) * alpha >> 8;                       \
    210            PIXEL_FROM_RGB(d, fmt, rd, gd, bd);                 \
    211            switch (bpp) {                                      \
    212            case 2:                                             \
    213                *(Uint16 *)dst = (Uint16)d;                     \
    214                break;                                          \
    215            case 3:                                             \
    216                if (SDL_BYTEORDER == SDL_BIG_ENDIAN) {          \
    217                    dst[0] = (Uint8)(d >> 16);                  \
    218                    dst[1] = (Uint8)(d >> 8);                   \
    219                    dst[2] = (Uint8)(d);                        \
    220                } else {                                        \
    221                    dst[0] = (Uint8)d;                          \
    222                    dst[1] = (Uint8)(d >> 8);                   \
    223                    dst[2] = (Uint8)(d >> 16);                  \
    224                }                                               \
    225                break;                                          \
    226            case 4:                                             \
    227                *(Uint32 *)dst = d;                             \
    228                break;                                          \
    229            }                                                   \
    230            src += bpp;                                         \
    231            dst += bpp;                                         \
    232        }                                                       \
    233    } while(0)
    234
    235/*
    236 * Special case: 50% alpha (alpha=128)
    237 * This is treated specially because it can be optimized very well, and
    238 * since it is good for many cases of semi-translucency.
    239 * The theory is to do all three components at the same time:
    240 * First zero the lowest bit of each component, which gives us room to
    241 * add them. Then shift right and add the sum of the lowest bits.
    242 */
    243#define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha)       \
    244    do {                                                        \
    245        int i;                                                  \
    246        Uint32 *src = (Uint32 *)(from);                         \
    247        Uint32 *dst = (Uint32 *)(to);                           \
    248        for(i = 0; i < (int)(length); i++) {                    \
    249            Uint32 s = *src++;                                  \
    250            Uint32 d = *dst;                                    \
    251            *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \
    252                 + (s & d & 0x00010101);                        \
    253        }                                                       \
    254    } while(0)
    255
    256/*
    257 * For 16bpp, we can actually blend two pixels in parallel, if we take
    258 * care to shift before we add, not after.
    259 */
    260
    261/* helper: blend a single 16 bit pixel at 50% */
    262#define BLEND16_50(dst, src, mask)                              \
    263    do {                                                        \
    264        Uint32 s = *src++;                                      \
    265        Uint32 d = *dst;                                        \
    266        *dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) +    \
    267                          (s & d & (~mask & 0xffff)));          \
    268    } while(0)
    269
    270/* basic 16bpp blender. mask is the pixels to keep when adding. */
    271#define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask)     \
    272    do {                                                        \
    273        unsigned n = (length);                                  \
    274        Uint16 *src = (Uint16 *)(from);                         \
    275        Uint16 *dst = (Uint16 *)(to);                           \
    276        if (((uintptr_t)src ^ (uintptr_t)dst) & 3) {            \
    277            /* source and destination not in phase, blit one by one */ \
    278            while (n--)                                         \
    279                BLEND16_50(dst, src, mask);                     \
    280        } else {                                                \
    281            if ((uintptr_t)src & 3) {                           \
    282                /* first odd pixel */                           \
    283                BLEND16_50(dst, src, mask);                     \
    284                n--;                                            \
    285            }                                                   \
    286            for (; n > 1; n -= 2) {                             \
    287                Uint32 s = *(Uint32 *)src;                      \
    288                Uint32 d = *(Uint32 *)dst;                      \
    289                *(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1) \
    290                    + ((d & (mask | mask << 16)) >> 1)          \
    291                    + (s & d & (~(mask | mask << 16)));         \
    292                src += 2;                                       \
    293                dst += 2;                                       \
    294            }                                                   \
    295            if (n)                                              \
    296                BLEND16_50(dst, src, mask); /* last odd pixel */ \
    297        }                                                       \
    298    } while(0)
    299
    300#define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha)       \
    301    ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de)
    302
    303#define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha)       \
    304    ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde)
    305
    306#define CHOOSE_BLIT(blitter, alpha, fmt)                        \
    307    do {                                                        \
    308        if (alpha == 255) {                                     \
    309            switch (fmt->BytesPerPixel) {                       \
    310            case 1: blitter(1, Uint8, OPAQUE_BLIT); break;      \
    311            case 2: blitter(2, Uint8, OPAQUE_BLIT); break;      \
    312            case 3: blitter(3, Uint8, OPAQUE_BLIT); break;      \
    313            case 4: blitter(4, Uint16, OPAQUE_BLIT); break;     \
    314            }                                                   \
    315        } else {                                                \
    316            switch (fmt->BytesPerPixel) {                       \
    317            case 1:                                             \
    318                /* No 8bpp alpha blitting */                    \
    319                break;                                          \
    320                                                                \
    321            case 2:                                             \
    322                switch (fmt->Rmask | fmt->Gmask | fmt->Bmask) { \
    323                case 0xffff:                                    \
    324                    if (fmt->Gmask == 0x07e0                    \
    325                        || fmt->Rmask == 0x07e0                 \
    326                        || fmt->Bmask == 0x07e0) {              \
    327                        if (alpha == 128) {                     \
    328                            blitter(2, Uint8, ALPHA_BLIT16_565_50); \
    329                        } else {                                \
    330                            blitter(2, Uint8, ALPHA_BLIT16_565); \
    331                        }                                       \
    332                    } else                                      \
    333                        goto general16;                         \
    334                    break;                                      \
    335                                                                \
    336                case 0x7fff:                                    \
    337                    if (fmt->Gmask == 0x03e0                    \
    338                        || fmt->Rmask == 0x03e0                 \
    339                        || fmt->Bmask == 0x03e0) {              \
    340                        if (alpha == 128) {                     \
    341                            blitter(2, Uint8, ALPHA_BLIT16_555_50); \
    342                        } else {                                \
    343                            blitter(2, Uint8, ALPHA_BLIT16_555); \
    344                        }                                       \
    345                        break;                                  \
    346                    } else                                      \
    347                        goto general16;                         \
    348                    break;                                      \
    349                                                                \
    350                default:                                        \
    351    general16:                                                  \
    352                    blitter(2, Uint8, ALPHA_BLIT_ANY);          \
    353                }                                               \
    354                break;                                          \
    355                                                                \
    356            case 3:                                             \
    357                blitter(3, Uint8, ALPHA_BLIT_ANY);              \
    358                break;                                          \
    359                                                                \
    360            case 4:                                             \
    361                if ((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \
    362                    && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \
    363                    || fmt->Bmask == 0xff00)) {                 \
    364                    if (alpha == 128) {                         \
    365                        blitter(4, Uint16, ALPHA_BLIT32_888_50); \
    366                    } else {                                    \
    367                        blitter(4, Uint16, ALPHA_BLIT32_888);   \
    368                    }                                           \
    369                } else                                          \
    370                    blitter(4, Uint16, ALPHA_BLIT_ANY);         \
    371                break;                                          \
    372            }                                                   \
    373        }                                                       \
    374    } while(0)
    375
    376/*
    377 * This takes care of the case when the surface is clipped on the left and/or
    378 * right. Top clipping has already been taken care of.
    379 */
    380static void
    381RLEClipBlit(int w, Uint8 * srcbuf, SDL_Surface * surf_dst,
    382            Uint8 * dstbuf, SDL_Rect * srcrect, unsigned alpha)
    383{
    384    SDL_PixelFormat *fmt = surf_dst->format;
    385
    386#define RLECLIPBLIT(bpp, Type, do_blit)                         \
    387    do {                                                        \
    388        int linecount = srcrect->h;                             \
    389        int ofs = 0;                                            \
    390        int left = srcrect->x;                                  \
    391        int right = left + srcrect->w;                          \
    392        dstbuf -= left * bpp;                                   \
    393        for (;;) {                                              \
    394            int run;                                            \
    395            ofs += *(Type *)srcbuf;                             \
    396            run = ((Type *)srcbuf)[1];                          \
    397            srcbuf += 2 * sizeof(Type);                         \
    398            if (run) {                                          \
    399                /* clip to left and right borders */            \
    400                if (ofs < right) {                              \
    401                    int start = 0;                              \
    402                    int len = run;                              \
    403                    int startcol;                               \
    404                    if (left - ofs > 0) {                       \
    405                        start = left - ofs;                     \
    406                        len -= start;                           \
    407                        if (len <= 0)                           \
    408                            goto nocopy ## bpp ## do_blit;      \
    409                    }                                           \
    410                    startcol = ofs + start;                     \
    411                    if (len > right - startcol)                 \
    412                        len = right - startcol;                 \
    413                    do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \
    414                        len, bpp, alpha);                       \
    415                }                                               \
    416    nocopy ## bpp ## do_blit:                                   \
    417                srcbuf += run * bpp;                            \
    418                ofs += run;                                     \
    419            } else if (!ofs)                                    \
    420                break;                                          \
    421                                                                \
    422            if (ofs == w) {                                     \
    423                ofs = 0;                                        \
    424                dstbuf += surf_dst->pitch;                      \
    425                if (!--linecount)                               \
    426                    break;                                      \
    427            }                                                   \
    428        }                                                       \
    429    } while(0)
    430
    431    CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt);
    432
    433#undef RLECLIPBLIT
    434
    435}
    436
    437
    438/* blit a colorkeyed RLE surface */
    439int
    440SDL_RLEBlit(SDL_Surface * surf_src, SDL_Rect * srcrect,
    441            SDL_Surface * surf_dst, SDL_Rect * dstrect)
    442{
    443    Uint8 *dstbuf;
    444    Uint8 *srcbuf;
    445    int x, y;
    446    int w = surf_src->w;
    447    unsigned alpha;
    448
    449    /* Lock the destination if necessary */
    450    if (SDL_MUSTLOCK(surf_dst)) {
    451        if (SDL_LockSurface(surf_dst) < 0) {
    452            return (-1);
    453        }
    454    }
    455
    456    /* Set up the source and destination pointers */
    457    x = dstrect->x;
    458    y = dstrect->y;
    459    dstbuf = (Uint8 *) surf_dst->pixels
    460        + y * surf_dst->pitch + x * surf_src->format->BytesPerPixel;
    461    srcbuf = (Uint8 *) surf_src->map->data;
    462
    463    {
    464        /* skip lines at the top if necessary */
    465        int vskip = srcrect->y;
    466        int ofs = 0;
    467        if (vskip) {
    468
    469#define RLESKIP(bpp, Type)          \
    470        for(;;) {           \
    471            int run;            \
    472            ofs += *(Type *)srcbuf; \
    473            run = ((Type *)srcbuf)[1];  \
    474            srcbuf += sizeof(Type) * 2; \
    475            if(run) {           \
    476            srcbuf += run * bpp;    \
    477            ofs += run;     \
    478            } else if(!ofs)     \
    479            goto done;      \
    480            if(ofs == w) {      \
    481            ofs = 0;        \
    482            if(!--vskip)        \
    483                break;      \
    484            }               \
    485        }
    486
    487            switch (surf_src->format->BytesPerPixel) {
    488            case 1:
    489                RLESKIP(1, Uint8);
    490                break;
    491            case 2:
    492                RLESKIP(2, Uint8);
    493                break;
    494            case 3:
    495                RLESKIP(3, Uint8);
    496                break;
    497            case 4:
    498                RLESKIP(4, Uint16);
    499                break;
    500            }
    501
    502#undef RLESKIP
    503
    504        }
    505    }
    506
    507    alpha = surf_src->map->info.a;
    508    /* if left or right edge clipping needed, call clip blit */
    509    if (srcrect->x || srcrect->w != surf_src->w) {
    510        RLEClipBlit(w, srcbuf, surf_dst, dstbuf, srcrect, alpha);
    511    } else {
    512        SDL_PixelFormat *fmt = surf_src->format;
    513
    514#define RLEBLIT(bpp, Type, do_blit)                       \
    515        do {                                  \
    516        int linecount = srcrect->h;                   \
    517        int ofs = 0;                              \
    518        for(;;) {                             \
    519            unsigned run;                         \
    520            ofs += *(Type *)srcbuf;                   \
    521            run = ((Type *)srcbuf)[1];                    \
    522            srcbuf += 2 * sizeof(Type);                   \
    523            if(run) {                             \
    524            do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \
    525            srcbuf += run * bpp;                      \
    526            ofs += run;                       \
    527            } else if(!ofs)                       \
    528            break;                            \
    529            if(ofs == w) {                        \
    530            ofs = 0;                          \
    531            dstbuf += surf_dst->pitch;                     \
    532            if(!--linecount)                      \
    533                break;                        \
    534            }                                 \
    535        }                                 \
    536        } while(0)
    537
    538        CHOOSE_BLIT(RLEBLIT, alpha, fmt);
    539
    540#undef RLEBLIT
    541    }
    542
    543  done:
    544    /* Unlock the destination if necessary */
    545    if (SDL_MUSTLOCK(surf_dst)) {
    546        SDL_UnlockSurface(surf_dst);
    547    }
    548    return (0);
    549}
    550
    551#undef OPAQUE_BLIT
    552
    553/*
    554 * Per-pixel blitting macros for translucent pixels:
    555 * These use the same techniques as the per-surface blitting macros
    556 */
    557
    558/*
    559 * For 32bpp pixels, we have made sure the alpha is stored in the top
    560 * 8 bits, so proceed as usual
    561 */
    562#define BLIT_TRANSL_888(src, dst)               \
    563    do {                            \
    564        Uint32 s = src;                     \
    565    Uint32 d = dst;                     \
    566    unsigned alpha = s >> 24;               \
    567    Uint32 s1 = s & 0xff00ff;               \
    568    Uint32 d1 = d & 0xff00ff;               \
    569    d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;    \
    570    s &= 0xff00;                        \
    571    d &= 0xff00;                        \
    572    d = (d + ((s - d) * alpha >> 8)) & 0xff00;      \
    573    dst = d1 | d | 0xff000000;              \
    574    } while(0)
    575
    576/*
    577 * For 16bpp pixels, we have stored the 5 most significant alpha bits in
    578 * bits 5-10. As before, we can process all 3 RGB components at the same time.
    579 */
    580#define BLIT_TRANSL_565(src, dst)       \
    581    do {                    \
    582    Uint32 s = src;             \
    583    Uint32 d = dst;             \
    584    unsigned alpha = (s & 0x3e0) >> 5;  \
    585    s &= 0x07e0f81f;            \
    586    d = (d | d << 16) & 0x07e0f81f;     \
    587    d += (s - d) * alpha >> 5;      \
    588    d &= 0x07e0f81f;            \
    589    dst = (Uint16)(d | d >> 16);            \
    590    } while(0)
    591
    592#define BLIT_TRANSL_555(src, dst)       \
    593    do {                    \
    594    Uint32 s = src;             \
    595    Uint32 d = dst;             \
    596    unsigned alpha = (s & 0x3e0) >> 5;  \
    597    s &= 0x03e07c1f;            \
    598    d = (d | d << 16) & 0x03e07c1f;     \
    599    d += (s - d) * alpha >> 5;      \
    600    d &= 0x03e07c1f;            \
    601    dst = (Uint16)(d | d >> 16);            \
    602    } while(0)
    603
    604/* used to save the destination format in the encoding. Designed to be
    605   macro-compatible with SDL_PixelFormat but without the unneeded fields */
    606typedef struct
    607{
    608    Uint8 BytesPerPixel;
    609    Uint8 padding[3];
    610    Uint32 Rmask;
    611    Uint32 Gmask;
    612    Uint32 Bmask;
    613    Uint32 Amask;
    614    Uint8 Rloss;
    615    Uint8 Gloss;
    616    Uint8 Bloss;
    617    Uint8 Aloss;
    618    Uint8 Rshift;
    619    Uint8 Gshift;
    620    Uint8 Bshift;
    621    Uint8 Ashift;
    622} RLEDestFormat;
    623
    624/* blit a pixel-alpha RLE surface clipped at the right and/or left edges */
    625static void
    626RLEAlphaClipBlit(int w, Uint8 * srcbuf, SDL_Surface * surf_dst,
    627                 Uint8 * dstbuf, SDL_Rect * srcrect)
    628{
    629    SDL_PixelFormat *df = surf_dst->format;
    630    /*
    631     * clipped blitter: Ptype is the destination pixel type,
    632     * Ctype the translucent count type, and do_blend the macro
    633     * to blend one pixel.
    634     */
    635#define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend)              \
    636    do {                                  \
    637    int linecount = srcrect->h;                   \
    638    int left = srcrect->x;                        \
    639    int right = left + srcrect->w;                    \
    640    dstbuf -= left * sizeof(Ptype);                   \
    641    do {                                  \
    642        int ofs = 0;                          \
    643        /* blit opaque pixels on one line */              \
    644        do {                              \
    645        unsigned run;                         \
    646        ofs += ((Ctype *)srcbuf)[0];                  \
    647        run = ((Ctype *)srcbuf)[1];               \
    648        srcbuf += 2 * sizeof(Ctype);                  \
    649        if(run) {                         \
    650            /* clip to left and right borders */          \
    651            int cofs = ofs;                   \
    652            int crun = run;                   \
    653            if(left - cofs > 0) {                 \
    654            crun -= left - cofs;                  \
    655            cofs = left;                      \
    656            }                             \
    657            if(crun > right - cofs)               \
    658            crun = right - cofs;                  \
    659            if(crun > 0)                      \
    660            PIXEL_COPY(dstbuf + cofs * sizeof(Ptype),     \
    661                   srcbuf + (cofs - ofs) * sizeof(Ptype), \
    662                   (unsigned)crun, sizeof(Ptype));    \
    663            srcbuf += run * sizeof(Ptype);            \
    664            ofs += run;                       \
    665        } else if(!ofs)                       \
    666            return;                       \
    667        } while(ofs < w);                         \
    668        /* skip padding if necessary */               \
    669        if(sizeof(Ptype) == 2)                    \
    670        srcbuf += (uintptr_t)srcbuf & 2;              \
    671        /* blit translucent pixels on the same line */        \
    672        ofs = 0;                              \
    673        do {                              \
    674        unsigned run;                         \
    675        ofs += ((Uint16 *)srcbuf)[0];                 \
    676        run = ((Uint16 *)srcbuf)[1];                  \
    677        srcbuf += 4;                          \
    678        if(run) {                         \
    679            /* clip to left and right borders */          \
    680            int cofs = ofs;                   \
    681            int crun = run;                   \
    682            if(left - cofs > 0) {                 \
    683            crun -= left - cofs;                  \
    684            cofs = left;                      \
    685            }                             \
    686            if(crun > right - cofs)               \
    687            crun = right - cofs;                  \
    688            if(crun > 0) {                    \
    689            Ptype *dst = (Ptype *)dstbuf + cofs;          \
    690            Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs);    \
    691            int i;                        \
    692            for(i = 0; i < crun; i++)             \
    693                do_blend(src[i], dst[i]);             \
    694            }                             \
    695            srcbuf += run * 4;                    \
    696            ofs += run;                       \
    697        }                             \
    698        } while(ofs < w);                         \
    699        dstbuf += surf_dst->pitch;                     \
    700    } while(--linecount);                         \
    701    } while(0)
    702
    703    switch (df->BytesPerPixel) {
    704    case 2:
    705        if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0)
    706            RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565);
    707        else
    708            RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555);
    709        break;
    710    case 4:
    711        RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888);
    712        break;
    713    }
    714}
    715
    716/* blit a pixel-alpha RLE surface */
    717int
    718SDL_RLEAlphaBlit(SDL_Surface * surf_src, SDL_Rect * srcrect,
    719                 SDL_Surface * surf_dst, SDL_Rect * dstrect)
    720{
    721    int x, y;
    722    int w = surf_src->w;
    723    Uint8 *srcbuf, *dstbuf;
    724    SDL_PixelFormat *df = surf_dst->format;
    725
    726    /* Lock the destination if necessary */
    727    if (SDL_MUSTLOCK(surf_dst)) {
    728        if (SDL_LockSurface(surf_dst) < 0) {
    729            return -1;
    730        }
    731    }
    732
    733    x = dstrect->x;
    734    y = dstrect->y;
    735    dstbuf = (Uint8 *) surf_dst->pixels + y * surf_dst->pitch + x * df->BytesPerPixel;
    736    srcbuf = (Uint8 *) surf_src->map->data + sizeof(RLEDestFormat);
    737
    738    {
    739        /* skip lines at the top if necessary */
    740        int vskip = srcrect->y;
    741        if (vskip) {
    742            int ofs;
    743            if (df->BytesPerPixel == 2) {
    744                /* the 16/32 interleaved format */
    745                do {
    746                    /* skip opaque line */
    747                    ofs = 0;
    748                    do {
    749                        int run;
    750                        ofs += srcbuf[0];
    751                        run = srcbuf[1];
    752                        srcbuf += 2;
    753                        if (run) {
    754                            srcbuf += 2 * run;
    755                            ofs += run;
    756                        } else if (!ofs)
    757                            goto done;
    758                    } while (ofs < w);
    759
    760                    /* skip padding */
    761                    srcbuf += (uintptr_t) srcbuf & 2;
    762
    763                    /* skip translucent line */
    764                    ofs = 0;
    765                    do {
    766                        int run;
    767                        ofs += ((Uint16 *) srcbuf)[0];
    768                        run = ((Uint16 *) srcbuf)[1];
    769                        srcbuf += 4 * (run + 1);
    770                        ofs += run;
    771                    } while (ofs < w);
    772                } while (--vskip);
    773            } else {
    774                /* the 32/32 interleaved format */
    775                vskip <<= 1;    /* opaque and translucent have same format */
    776                do {
    777                    ofs = 0;
    778                    do {
    779                        int run;
    780                        ofs += ((Uint16 *) srcbuf)[0];
    781                        run = ((Uint16 *) srcbuf)[1];
    782                        srcbuf += 4;
    783                        if (run) {
    784                            srcbuf += 4 * run;
    785                            ofs += run;
    786                        } else if (!ofs)
    787                            goto done;
    788                    } while (ofs < w);
    789                } while (--vskip);
    790            }
    791        }
    792    }
    793
    794    /* if left or right edge clipping needed, call clip blit */
    795    if (srcrect->x || srcrect->w != surf_src->w) {
    796        RLEAlphaClipBlit(w, srcbuf, surf_dst, dstbuf, srcrect);
    797    } else {
    798
    799        /*
    800         * non-clipped blitter. Ptype is the destination pixel type,
    801         * Ctype the translucent count type, and do_blend the
    802         * macro to blend one pixel.
    803         */
    804#define RLEALPHABLIT(Ptype, Ctype, do_blend)                 \
    805    do {                                 \
    806        int linecount = srcrect->h;                  \
    807        do {                             \
    808        int ofs = 0;                         \
    809        /* blit opaque pixels on one line */             \
    810        do {                             \
    811            unsigned run;                    \
    812            ofs += ((Ctype *)srcbuf)[0];             \
    813            run = ((Ctype *)srcbuf)[1];              \
    814            srcbuf += 2 * sizeof(Ctype);             \
    815            if(run) {                        \
    816            PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \
    817                   run, sizeof(Ptype));          \
    818            srcbuf += run * sizeof(Ptype);           \
    819            ofs += run;                  \
    820            } else if(!ofs)                  \
    821            goto done;                   \
    822        } while(ofs < w);                    \
    823        /* skip padding if necessary */              \
    824        if(sizeof(Ptype) == 2)                   \
    825            srcbuf += (uintptr_t)srcbuf & 2;             \
    826        /* blit translucent pixels on the same line */       \
    827        ofs = 0;                         \
    828        do {                             \
    829            unsigned run;                    \
    830            ofs += ((Uint16 *)srcbuf)[0];            \
    831            run = ((Uint16 *)srcbuf)[1];             \
    832            srcbuf += 4;                     \
    833            if(run) {                        \
    834            Ptype *dst = (Ptype *)dstbuf + ofs;      \
    835            unsigned i;                  \
    836            for(i = 0; i < run; i++) {           \
    837                Uint32 src = *(Uint32 *)srcbuf;      \
    838                do_blend(src, *dst);             \
    839                srcbuf += 4;                 \
    840                dst++;                   \
    841            }                        \
    842            ofs += run;                  \
    843            }                            \
    844        } while(ofs < w);                    \
    845        dstbuf += surf_dst->pitch;                    \
    846        } while(--linecount);                    \
    847    } while(0)
    848
    849        switch (df->BytesPerPixel) {
    850        case 2:
    851            if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0
    852                || df->Bmask == 0x07e0)
    853                RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565);
    854            else
    855                RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555);
    856            break;
    857        case 4:
    858            RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888);
    859            break;
    860        }
    861    }
    862
    863  done:
    864    /* Unlock the destination if necessary */
    865    if (SDL_MUSTLOCK(surf_dst)) {
    866        SDL_UnlockSurface(surf_dst);
    867    }
    868    return 0;
    869}
    870
    871/*
    872 * Auxiliary functions:
    873 * The encoding functions take 32bpp rgb + a, and
    874 * return the number of bytes copied to the destination.
    875 * The decoding functions copy to 32bpp rgb + a, and
    876 * return the number of bytes copied from the source.
    877 * These are only used in the encoder and un-RLE code and are therefore not
    878 * highly optimised.
    879 */
    880
    881/* encode 32bpp rgb + a into 16bpp rgb, losing alpha */
    882static int
    883copy_opaque_16(void *dst, Uint32 * src, int n,
    884               SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
    885{
    886    int i;
    887    Uint16 *d = dst;
    888    for (i = 0; i < n; i++) {
    889        unsigned r, g, b;
    890        RGB_FROM_PIXEL(*src, sfmt, r, g, b);
    891        PIXEL_FROM_RGB(*d, dfmt, r, g, b);
    892        src++;
    893        d++;
    894    }
    895    return n * 2;
    896}
    897
    898/* decode opaque pixels from 16bpp to 32bpp rgb + a */
    899static int
    900uncopy_opaque_16(Uint32 * dst, void *src, int n,
    901                 RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
    902{
    903    int i;
    904    Uint16 *s = src;
    905    unsigned alpha = dfmt->Amask ? 255 : 0;
    906    for (i = 0; i < n; i++) {
    907        unsigned r, g, b;
    908        RGB_FROM_PIXEL(*s, sfmt, r, g, b);
    909        PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha);
    910        s++;
    911        dst++;
    912    }
    913    return n * 2;
    914}
    915
    916
    917
    918/* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */
    919static int
    920copy_transl_565(void *dst, Uint32 * src, int n,
    921                SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
    922{
    923    int i;
    924    Uint32 *d = dst;
    925    for (i = 0; i < n; i++) {
    926        unsigned r, g, b, a;
    927        Uint16 pix;
    928        RGBA_FROM_8888(*src, sfmt, r, g, b, a);
    929        PIXEL_FROM_RGB(pix, dfmt, r, g, b);
    930        *d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0);
    931        src++;
    932        d++;
    933    }
    934    return n * 4;
    935}
    936
    937/* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */
    938static int
    939copy_transl_555(void *dst, Uint32 * src, int n,
    940                SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
    941{
    942    int i;
    943    Uint32 *d = dst;
    944    for (i = 0; i < n; i++) {
    945        unsigned r, g, b, a;
    946        Uint16 pix;
    947        RGBA_FROM_8888(*src, sfmt, r, g, b, a);
    948        PIXEL_FROM_RGB(pix, dfmt, r, g, b);
    949        *d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0);
    950        src++;
    951        d++;
    952    }
    953    return n * 4;
    954}
    955
    956/* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */
    957static int
    958uncopy_transl_16(Uint32 * dst, void *src, int n,
    959                 RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
    960{
    961    int i;
    962    Uint32 *s = src;
    963    for (i = 0; i < n; i++) {
    964        unsigned r, g, b, a;
    965        Uint32 pix = *s++;
    966        a = (pix & 0x3e0) >> 2;
    967        pix = (pix & ~0x3e0) | pix >> 16;
    968        RGB_FROM_PIXEL(pix, sfmt, r, g, b);
    969        PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
    970        dst++;
    971    }
    972    return n * 4;
    973}
    974
    975/* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
    976static int
    977copy_32(void *dst, Uint32 * src, int n,
    978        SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
    979{
    980    int i;
    981    Uint32 *d = dst;
    982    for (i = 0; i < n; i++) {
    983        unsigned r, g, b, a;
    984        RGBA_FROM_8888(*src, sfmt, r, g, b, a);
    985        PIXEL_FROM_RGBA(*d, dfmt, r, g, b, a);
    986        d++;
    987        src++;
    988    }
    989    return n * 4;
    990}
    991
    992/* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
    993static int
    994uncopy_32(Uint32 * dst, void *src, int n,
    995          RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
    996{
    997    int i;
    998    Uint32 *s = src;
    999    for (i = 0; i < n; i++) {
   1000        unsigned r, g, b, a;
   1001        Uint32 pixel = *s++;
   1002        RGB_FROM_PIXEL(pixel, sfmt, r, g, b);
   1003        a = pixel >> 24;
   1004        PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
   1005        dst++;
   1006    }
   1007    return n * 4;
   1008}
   1009
   1010#define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255)
   1011
   1012#define ISTRANSL(pixel, fmt)    \
   1013    ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U)
   1014
   1015/* convert surface to be quickly alpha-blittable onto dest, if possible */
   1016static int
   1017RLEAlphaSurface(SDL_Surface * surface)
   1018{
   1019    SDL_Surface *dest;
   1020    SDL_PixelFormat *df;
   1021    int maxsize = 0;
   1022    int max_opaque_run;
   1023    int max_transl_run = 65535;
   1024    unsigned masksum;
   1025    Uint8 *rlebuf, *dst;
   1026    int (*copy_opaque) (void *, Uint32 *, int,
   1027                        SDL_PixelFormat *, SDL_PixelFormat *);
   1028    int (*copy_transl) (void *, Uint32 *, int,
   1029                        SDL_PixelFormat *, SDL_PixelFormat *);
   1030
   1031    dest = surface->map->dst;
   1032    if (!dest)
   1033        return -1;
   1034    df = dest->format;
   1035    if (surface->format->BitsPerPixel != 32)
   1036        return -1;              /* only 32bpp source supported */
   1037
   1038    /* find out whether the destination is one we support,
   1039       and determine the max size of the encoded result */
   1040    masksum = df->Rmask | df->Gmask | df->Bmask;
   1041    switch (df->BytesPerPixel) {
   1042    case 2:
   1043        /* 16bpp: only support 565 and 555 formats */
   1044        switch (masksum) {
   1045        case 0xffff:
   1046            if (df->Gmask == 0x07e0
   1047                || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) {
   1048                copy_opaque = copy_opaque_16;
   1049                copy_transl = copy_transl_565;
   1050            } else
   1051                return -1;
   1052            break;
   1053        case 0x7fff:
   1054            if (df->Gmask == 0x03e0
   1055                || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) {
   1056                copy_opaque = copy_opaque_16;
   1057                copy_transl = copy_transl_555;
   1058            } else
   1059                return -1;
   1060            break;
   1061        default:
   1062            return -1;
   1063        }
   1064        max_opaque_run = 255;   /* runs stored as bytes */
   1065
   1066        /* worst case is alternating opaque and translucent pixels,
   1067           with room for alignment padding between lines */
   1068        maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2;
   1069        break;
   1070    case 4:
   1071        if (masksum != 0x00ffffff)
   1072            return -1;          /* requires unused high byte */
   1073        copy_opaque = copy_32;
   1074        copy_transl = copy_32;
   1075        max_opaque_run = 255;   /* runs stored as short ints */
   1076
   1077        /* worst case is alternating opaque and translucent pixels */
   1078        maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4;
   1079        break;
   1080    default:
   1081        return -1;              /* anything else unsupported right now */
   1082    }
   1083
   1084    maxsize += sizeof(RLEDestFormat);
   1085    rlebuf = (Uint8 *) SDL_malloc(maxsize);
   1086    if (!rlebuf) {
   1087        return SDL_OutOfMemory();
   1088    }
   1089    {
   1090        /* save the destination format so we can undo the encoding later */
   1091        RLEDestFormat *r = (RLEDestFormat *) rlebuf;
   1092        r->BytesPerPixel = df->BytesPerPixel;
   1093        r->Rmask = df->Rmask;
   1094        r->Gmask = df->Gmask;
   1095        r->Bmask = df->Bmask;
   1096        r->Amask = df->Amask;
   1097        r->Rloss = df->Rloss;
   1098        r->Gloss = df->Gloss;
   1099        r->Bloss = df->Bloss;
   1100        r->Aloss = df->Aloss;
   1101        r->Rshift = df->Rshift;
   1102        r->Gshift = df->Gshift;
   1103        r->Bshift = df->Bshift;
   1104        r->Ashift = df->Ashift;
   1105    }
   1106    dst = rlebuf + sizeof(RLEDestFormat);
   1107
   1108    /* Do the actual encoding */
   1109    {
   1110        int x, y;
   1111        int h = surface->h, w = surface->w;
   1112        SDL_PixelFormat *sf = surface->format;
   1113        Uint32 *src = (Uint32 *) surface->pixels;
   1114        Uint8 *lastline = dst;  /* end of last non-blank line */
   1115
   1116        /* opaque counts are 8 or 16 bits, depending on target depth */
   1117#define ADD_OPAQUE_COUNTS(n, m)         \
   1118    if(df->BytesPerPixel == 4) {        \
   1119        ((Uint16 *)dst)[0] = n;     \
   1120        ((Uint16 *)dst)[1] = m;     \
   1121        dst += 4;               \
   1122    } else {                \
   1123        dst[0] = n;             \
   1124        dst[1] = m;             \
   1125        dst += 2;               \
   1126    }
   1127
   1128        /* translucent counts are always 16 bit */
   1129#define ADD_TRANSL_COUNTS(n, m)     \
   1130    (((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4)
   1131
   1132        for (y = 0; y < h; y++) {
   1133            int runstart, skipstart;
   1134            int blankline = 0;
   1135            /* First encode all opaque pixels of a scan line */
   1136            x = 0;
   1137            do {
   1138                int run, skip, len;
   1139                skipstart = x;
   1140                while (x < w && !ISOPAQUE(src[x], sf))
   1141                    x++;
   1142                runstart = x;
   1143                while (x < w && ISOPAQUE(src[x], sf))
   1144                    x++;
   1145                skip = runstart - skipstart;
   1146                if (skip == w)
   1147                    blankline = 1;
   1148                run = x - runstart;
   1149                while (skip > max_opaque_run) {
   1150                    ADD_OPAQUE_COUNTS(max_opaque_run, 0);
   1151                    skip -= max_opaque_run;
   1152                }
   1153                len = MIN(run, max_opaque_run);
   1154                ADD_OPAQUE_COUNTS(skip, len);
   1155                dst += copy_opaque(dst, src + runstart, len, sf, df);
   1156                runstart += len;
   1157                run -= len;
   1158                while (run) {
   1159                    len = MIN(run, max_opaque_run);
   1160                    ADD_OPAQUE_COUNTS(0, len);
   1161                    dst += copy_opaque(dst, src + runstart, len, sf, df);
   1162                    runstart += len;
   1163                    run -= len;
   1164                }
   1165            } while (x < w);
   1166
   1167            /* Make sure the next output address is 32-bit aligned */
   1168            dst += (uintptr_t) dst & 2;
   1169
   1170            /* Next, encode all translucent pixels of the same scan line */
   1171            x = 0;
   1172            do {
   1173                int run, skip, len;
   1174                skipstart = x;
   1175                while (x < w && !ISTRANSL(src[x], sf))
   1176                    x++;
   1177                runstart = x;
   1178                while (x < w && ISTRANSL(src[x], sf))
   1179                    x++;
   1180                skip = runstart - skipstart;
   1181                blankline &= (skip == w);
   1182                run = x - runstart;
   1183                while (skip > max_transl_run) {
   1184                    ADD_TRANSL_COUNTS(max_transl_run, 0);
   1185                    skip -= max_transl_run;
   1186                }
   1187                len = MIN(run, max_transl_run);
   1188                ADD_TRANSL_COUNTS(skip, len);
   1189                dst += copy_transl(dst, src + runstart, len, sf, df);
   1190                runstart += len;
   1191                run -= len;
   1192                while (run) {
   1193                    len = MIN(run, max_transl_run);
   1194                    ADD_TRANSL_COUNTS(0, len);
   1195                    dst += copy_transl(dst, src + runstart, len, sf, df);
   1196                    runstart += len;
   1197                    run -= len;
   1198                }
   1199                if (!blankline)
   1200                    lastline = dst;
   1201            } while (x < w);
   1202
   1203            src += surface->pitch >> 2;
   1204        }
   1205        dst = lastline;         /* back up past trailing blank lines */
   1206        ADD_OPAQUE_COUNTS(0, 0);
   1207    }
   1208
   1209#undef ADD_OPAQUE_COUNTS
   1210#undef ADD_TRANSL_COUNTS
   1211
   1212    /* Now that we have it encoded, release the original pixels */
   1213    if (!(surface->flags & SDL_PREALLOC)) {
   1214        SDL_free(surface->pixels);
   1215        surface->pixels = NULL;
   1216    }
   1217
   1218    /* realloc the buffer to release unused memory */
   1219    {
   1220        Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
   1221        if (!p)
   1222            p = rlebuf;
   1223        surface->map->data = p;
   1224    }
   1225
   1226    return 0;
   1227}
   1228
   1229static Uint32
   1230getpix_8(Uint8 * srcbuf)
   1231{
   1232    return *srcbuf;
   1233}
   1234
   1235static Uint32
   1236getpix_16(Uint8 * srcbuf)
   1237{
   1238    return *(Uint16 *) srcbuf;
   1239}
   1240
   1241static Uint32
   1242getpix_24(Uint8 * srcbuf)
   1243{
   1244#if SDL_BYTEORDER == SDL_LIL_ENDIAN
   1245    return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16);
   1246#else
   1247    return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2];
   1248#endif
   1249}
   1250
   1251static Uint32
   1252getpix_32(Uint8 * srcbuf)
   1253{
   1254    return *(Uint32 *) srcbuf;
   1255}
   1256
   1257typedef Uint32(*getpix_func) (Uint8 *);
   1258
   1259static const getpix_func getpixes[4] = {
   1260    getpix_8, getpix_16, getpix_24, getpix_32
   1261};
   1262
   1263static int
   1264RLEColorkeySurface(SDL_Surface * surface)
   1265{
   1266    Uint8 *rlebuf, *dst;
   1267    int maxn;
   1268    int y;
   1269    Uint8 *srcbuf, *lastline;
   1270    int maxsize = 0;
   1271    int bpp = surface->format->BytesPerPixel;
   1272    getpix_func getpix;
   1273    Uint32 ckey, rgbmask;
   1274    int w, h;
   1275
   1276    /* calculate the worst case size for the compressed surface */
   1277    switch (bpp) {
   1278    case 1:
   1279        /* worst case is alternating opaque and transparent pixels,
   1280           starting with an opaque pixel */
   1281        maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2;
   1282        break;
   1283    case 2:
   1284    case 3:
   1285        /* worst case is solid runs, at most 255 pixels wide */
   1286        maxsize = surface->h * (2 * (surface->w / 255 + 1)
   1287                                + surface->w * bpp) + 2;
   1288        break;
   1289    case 4:
   1290        /* worst case is solid runs, at most 65535 pixels wide */
   1291        maxsize = surface->h * (4 * (surface->w / 65535 + 1)
   1292                                + surface->w * 4) + 4;
   1293        break;
   1294    }
   1295
   1296    rlebuf = (Uint8 *) SDL_malloc(maxsize);
   1297    if (rlebuf == NULL) {
   1298        return SDL_OutOfMemory();
   1299    }
   1300
   1301    /* Set up the conversion */
   1302    srcbuf = (Uint8 *) surface->pixels;
   1303    maxn = bpp == 4 ? 65535 : 255;
   1304    dst = rlebuf;
   1305    rgbmask = ~surface->format->Amask;
   1306    ckey = surface->map->info.colorkey & rgbmask;
   1307    lastline = dst;
   1308    getpix = getpixes[bpp - 1];
   1309    w = surface->w;
   1310    h = surface->h;
   1311
   1312#define ADD_COUNTS(n, m)            \
   1313    if(bpp == 4) {              \
   1314        ((Uint16 *)dst)[0] = n;     \
   1315        ((Uint16 *)dst)[1] = m;     \
   1316        dst += 4;               \
   1317    } else {                \
   1318        dst[0] = n;             \
   1319        dst[1] = m;             \
   1320        dst += 2;               \
   1321    }
   1322
   1323    for (y = 0; y < h; y++) {
   1324        int x = 0;
   1325        int blankline = 0;
   1326        do {
   1327            int run, skip, len;
   1328            int runstart;
   1329            int skipstart = x;
   1330
   1331            /* find run of transparent, then opaque pixels */
   1332            while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey)
   1333                x++;
   1334            runstart = x;
   1335            while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey)
   1336                x++;
   1337            skip = runstart - skipstart;
   1338            if (skip == w)
   1339                blankline = 1;
   1340            run = x - runstart;
   1341
   1342            /* encode segment */
   1343            while (skip > maxn) {
   1344                ADD_COUNTS(maxn, 0);
   1345                skip -= maxn;
   1346            }
   1347            len = MIN(run, maxn);
   1348            ADD_COUNTS(skip, len);
   1349            SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
   1350            dst += len * bpp;
   1351            run -= len;
   1352            runstart += len;
   1353            while (run) {
   1354                len = MIN(run, maxn);
   1355                ADD_COUNTS(0, len);
   1356                SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
   1357                dst += len * bpp;
   1358                runstart += len;
   1359                run -= len;
   1360            }
   1361            if (!blankline)
   1362                lastline = dst;
   1363        } while (x < w);
   1364
   1365        srcbuf += surface->pitch;
   1366    }
   1367    dst = lastline;             /* back up bast trailing blank lines */
   1368    ADD_COUNTS(0, 0);
   1369
   1370#undef ADD_COUNTS
   1371
   1372    /* Now that we have it encoded, release the original pixels */
   1373    if (!(surface->flags & SDL_PREALLOC)) {
   1374        SDL_free(surface->pixels);
   1375        surface->pixels = NULL;
   1376    }
   1377
   1378    /* realloc the buffer to release unused memory */
   1379    {
   1380        /* If realloc returns NULL, the original block is left intact */
   1381        Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
   1382        if (!p)
   1383            p = rlebuf;
   1384        surface->map->data = p;
   1385    }
   1386
   1387    return (0);
   1388}
   1389
   1390int
   1391SDL_RLESurface(SDL_Surface * surface)
   1392{
   1393    int flags;
   1394
   1395    /* Clear any previous RLE conversion */
   1396    if ((surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL) {
   1397        SDL_UnRLESurface(surface, 1);
   1398    }
   1399
   1400    /* We don't support RLE encoding of bitmaps */
   1401    if (surface->format->BitsPerPixel < 8) {
   1402        return -1;
   1403    }
   1404
   1405    /* Make sure the pixels are available */
   1406    if (!surface->pixels) {
   1407        return -1;
   1408    }
   1409
   1410    /* If we don't have colorkey or blending, nothing to do... */
   1411    flags = surface->map->info.flags;
   1412    if (!(flags & (SDL_COPY_COLORKEY | SDL_COPY_BLEND))) {
   1413        return -1;
   1414    }
   1415
   1416    /* Pass on combinations not supported */
   1417    if ((flags & SDL_COPY_MODULATE_COLOR) ||
   1418        ((flags & SDL_COPY_MODULATE_ALPHA) && surface->format->Amask) ||
   1419        (flags & (SDL_COPY_ADD | SDL_COPY_MOD)) ||
   1420        (flags & SDL_COPY_NEAREST)) {
   1421        return -1;
   1422    }
   1423
   1424    /* Encode and set up the blit */
   1425    if (!surface->format->Amask || !(flags & SDL_COPY_BLEND)) {
   1426        if (!surface->map->identity) {
   1427            return -1;
   1428        }
   1429        if (RLEColorkeySurface(surface) < 0) {
   1430            return -1;
   1431        }
   1432        surface->map->blit = SDL_RLEBlit;
   1433        surface->map->info.flags |= SDL_COPY_RLE_COLORKEY;
   1434    } else {
   1435        if (RLEAlphaSurface(surface) < 0) {
   1436            return -1;
   1437        }
   1438        surface->map->blit = SDL_RLEAlphaBlit;
   1439        surface->map->info.flags |= SDL_COPY_RLE_ALPHAKEY;
   1440    }
   1441
   1442    /* The surface is now accelerated */
   1443    surface->flags |= SDL_RLEACCEL;
   1444
   1445    return (0);
   1446}
   1447
   1448/*
   1449 * Un-RLE a surface with pixel alpha
   1450 * This may not give back exactly the image before RLE-encoding; all
   1451 * completely transparent pixels will be lost, and color and alpha depth
   1452 * may have been reduced (when encoding for 16bpp targets).
   1453 */
   1454static SDL_bool
   1455UnRLEAlpha(SDL_Surface * surface)
   1456{
   1457    Uint8 *srcbuf;
   1458    Uint32 *dst;
   1459    SDL_PixelFormat *sf = surface->format;
   1460    RLEDestFormat *df = surface->map->data;
   1461    int (*uncopy_opaque) (Uint32 *, void *, int,
   1462                          RLEDestFormat *, SDL_PixelFormat *);
   1463    int (*uncopy_transl) (Uint32 *, void *, int,
   1464                          RLEDestFormat *, SDL_PixelFormat *);
   1465    int w = surface->w;
   1466    int bpp = df->BytesPerPixel;
   1467
   1468    if (bpp == 2) {
   1469        uncopy_opaque = uncopy_opaque_16;
   1470        uncopy_transl = uncopy_transl_16;
   1471    } else {
   1472        uncopy_opaque = uncopy_transl = uncopy_32;
   1473    }
   1474
   1475    surface->pixels = SDL_malloc(surface->h * surface->pitch);
   1476    if (!surface->pixels) {
   1477        return (SDL_FALSE);
   1478    }
   1479    /* fill background with transparent pixels */
   1480    SDL_memset(surface->pixels, 0, surface->h * surface->pitch);
   1481
   1482    dst = surface->pixels;
   1483    srcbuf = (Uint8 *) (df + 1);
   1484    for (;;) {
   1485        /* copy opaque pixels */
   1486        int ofs = 0;
   1487        do {
   1488            unsigned run;
   1489            if (bpp == 2) {
   1490                ofs += srcbuf[0];
   1491                run = srcbuf[1];
   1492                srcbuf += 2;
   1493            } else {
   1494                ofs += ((Uint16 *) srcbuf)[0];
   1495                run = ((Uint16 *) srcbuf)[1];
   1496                srcbuf += 4;
   1497            }
   1498            if (run) {
   1499                srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf);
   1500                ofs += run;
   1501            } else if (!ofs)
   1502                return (SDL_TRUE);
   1503        } while (ofs < w);
   1504
   1505        /* skip padding if needed */
   1506        if (bpp == 2)
   1507            srcbuf += (uintptr_t) srcbuf & 2;
   1508
   1509        /* copy translucent pixels */
   1510        ofs = 0;
   1511        do {
   1512            unsigned run;
   1513            ofs += ((Uint16 *) srcbuf)[0];
   1514            run = ((Uint16 *) srcbuf)[1];
   1515            srcbuf += 4;
   1516            if (run) {
   1517                srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf);
   1518                ofs += run;
   1519            }
   1520        } while (ofs < w);
   1521        dst += surface->pitch >> 2;
   1522    }
   1523    /* Make the compiler happy */
   1524    return (SDL_TRUE);
   1525}
   1526
   1527void
   1528SDL_UnRLESurface(SDL_Surface * surface, int recode)
   1529{
   1530    if (surface->flags & SDL_RLEACCEL) {
   1531        surface->flags &= ~SDL_RLEACCEL;
   1532
   1533        if (recode && !(surface->flags & SDL_PREALLOC)) {
   1534            if (surface->map->info.flags & SDL_COPY_RLE_COLORKEY) {
   1535                SDL_Rect full;
   1536
   1537                /* re-create the original surface */
   1538                surface->pixels = SDL_malloc(surface->h * surface->pitch);
   1539                if (!surface->pixels) {
   1540                    /* Oh crap... */
   1541                    surface->flags |= SDL_RLEACCEL;
   1542                    return;
   1543                }
   1544
   1545                /* fill it with the background color */
   1546                SDL_FillRect(surface, NULL, surface->map->info.colorkey);
   1547
   1548                /* now render the encoded surface */
   1549                full.x = full.y = 0;
   1550                full.w = surface->w;
   1551                full.h = surface->h;
   1552                SDL_RLEBlit(surface, &full, surface, &full);
   1553            } else {
   1554                if (!UnRLEAlpha(surface)) {
   1555                    /* Oh crap... */
   1556                    surface->flags |= SDL_RLEACCEL;
   1557                    return;
   1558                }
   1559            }
   1560        }
   1561        surface->map->info.flags &=
   1562            ~(SDL_COPY_RLE_COLORKEY | SDL_COPY_RLE_ALPHAKEY);
   1563
   1564        SDL_free(surface->map->data);
   1565        surface->map->data = NULL;
   1566    }
   1567}
   1568
   1569/* vi: set ts=4 sw=4 expandtab: */