cscg22-gearboy

CSCG 2022 Challenge 'Gearboy'
git clone https://git.sinitax.com/sinitax/cscg22-gearboy
Log | Files | Refs | sfeed.txt

SDL_blit_N.c (92261B)


      1/*
      2  Simple DirectMedia Layer
      3  Copyright (C) 1997-2014 Sam Lantinga <slouken@libsdl.org>
      4
      5  This software is provided 'as-is', without any express or implied
      6  warranty.  In no event will the authors be held liable for any damages
      7  arising from the use of this software.
      8
      9  Permission is granted to anyone to use this software for any purpose,
     10  including commercial applications, and to alter it and redistribute it
     11  freely, subject to the following restrictions:
     12
     13  1. The origin of this software must not be misrepresented; you must not
     14     claim that you wrote the original software. If you use this software
     15     in a product, an acknowledgment in the product documentation would be
     16     appreciated but is not required.
     17  2. Altered source versions must be plainly marked as such, and must not be
     18     misrepresented as being the original software.
     19  3. This notice may not be removed or altered from any source distribution.
     20*/
     21#include "../SDL_internal.h"
     22
     23#include "SDL_video.h"
     24#include "SDL_endian.h"
     25#include "SDL_cpuinfo.h"
     26#include "SDL_blit.h"
     27
     28#include "SDL_assert.h"
     29
     30/* Functions to blit from N-bit surfaces to other surfaces */
     31
     32#if SDL_ALTIVEC_BLITTERS
     33#ifdef HAVE_ALTIVEC_H
     34#include <altivec.h>
     35#endif
     36#ifdef __MACOSX__
     37#include <sys/sysctl.h>
     38static size_t
     39GetL3CacheSize(void)
     40{
     41    const char key[] = "hw.l3cachesize";
     42    u_int64_t result = 0;
     43    size_t typeSize = sizeof(result);
     44
     45
     46    int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
     47    if (0 != err)
     48        return 0;
     49
     50    return result;
     51}
     52#else
     53static size_t
     54GetL3CacheSize(void)
     55{
     56    /* XXX: Just guess G4 */
     57    return 2097152;
     58}
     59#endif /* __MACOSX__ */
     60
     61#if (defined(__MACOSX__) && (__GNUC__ < 4))
     62#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
     63        (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
     64#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
     65        (vector unsigned short) ( a,b,c,d,e,f,g,h )
     66#else
     67#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
     68        (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
     69#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
     70        (vector unsigned short) { a,b,c,d,e,f,g,h }
     71#endif
     72
     73#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
     74#define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
     75                               ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
     76                                 0x04+a, 0x04+b, 0x04+c, 0x04+d, \
     77                                 0x08+a, 0x08+b, 0x08+c, 0x08+d, \
     78                                 0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
     79
     80#define MAKE8888(dstfmt, r, g, b, a)  \
     81    ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
     82      ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
     83      ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
     84      ((a<<dstfmt->Ashift)&dstfmt->Amask) )
     85
     86/*
     87 * Data Stream Touch...Altivec cache prefetching.
     88 *
     89 *  Don't use this on a G5...however, the speed boost is very significant
     90 *   on a G4.
     91 */
     92#define DST_CHAN_SRC 1
     93#define DST_CHAN_DEST 2
     94
     95/* macro to set DST control word value... */
     96#define DST_CTRL(size, count, stride) \
     97    (((size) << 24) | ((count) << 16) | (stride))
     98
     99#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
    100    ? vec_lvsl(0, src) \
    101    : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
    102
    103/* Calculate the permute vector used for 32->32 swizzling */
    104static vector unsigned char
    105calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
    106{
    107    /*
    108     * We have to assume that the bits that aren't used by other
    109     *  colors is alpha, and it's one complete byte, since some formats
    110     *  leave alpha with a zero mask, but we should still swizzle the bits.
    111     */
    112    /* ARGB */
    113    const static const struct SDL_PixelFormat default_pixel_format = {
    114        0, NULL, 0, 0,
    115        {0, 0},
    116        0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
    117        0, 0, 0, 0,
    118        16, 8, 0, 24,
    119        0, NULL
    120    };
    121    if (!srcfmt) {
    122        srcfmt = &default_pixel_format;
    123    }
    124    if (!dstfmt) {
    125        dstfmt = &default_pixel_format;
    126    }
    127    const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
    128                                                       0x04, 0x04, 0x04, 0x04,
    129                                                       0x08, 0x08, 0x08, 0x08,
    130                                                       0x0C, 0x0C, 0x0C,
    131                                                       0x0C);
    132    vector unsigned char vswiz;
    133    vector unsigned int srcvec;
    134#define RESHIFT(X) (3 - ((X) >> 3))
    135    Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
    136    Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
    137    Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
    138    Uint32 amask;
    139    /* Use zero for alpha if either surface doesn't have alpha */
    140    if (dstfmt->Amask) {
    141        amask =
    142            ((srcfmt->Amask) ? RESHIFT(srcfmt->
    143                                       Ashift) : 0x10) << (dstfmt->Ashift);
    144    } else {
    145        amask =
    146            0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
    147                          0xFFFFFFFF);
    148    }
    149#undef RESHIFT
    150    ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
    151    vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
    152    return (vswiz);
    153}
    154
    155static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
    156static void
    157Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
    158{
    159    int height = info->dst_h;
    160    Uint8 *src = (Uint8 *) info->src;
    161    int srcskip = info->src_skip;
    162    Uint8 *dst = (Uint8 *) info->dst;
    163    int dstskip = info->dst_skip;
    164    SDL_PixelFormat *srcfmt = info->src_fmt;
    165    vector unsigned char valpha = vec_splat_u8(0);
    166    vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
    167    vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
    168                                                    0x00, 0x0a, 0x00, 0x0e,
    169                                                    0x00, 0x12, 0x00, 0x16,
    170                                                    0x00, 0x1a, 0x00, 0x1e);
    171    vector unsigned short v1 = vec_splat_u16(1);
    172    vector unsigned short v3 = vec_splat_u16(3);
    173    vector unsigned short v3f =
    174        VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
    175                          0x003f, 0x003f, 0x003f, 0x003f);
    176    vector unsigned short vfc =
    177        VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
    178                          0x00fc, 0x00fc, 0x00fc, 0x00fc);
    179    vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
    180    vf800 = vec_sl(vf800, vec_splat_u16(8));
    181
    182    while (height--) {
    183        vector unsigned char valigner;
    184        vector unsigned char voverflow;
    185        vector unsigned char vsrc;
    186
    187        int width = info->dst_w;
    188        int extrawidth;
    189
    190        /* do scalar until we can align... */
    191#define ONE_PIXEL_BLEND(condition, widthvar) \
    192        while (condition) { \
    193            Uint32 Pixel; \
    194            unsigned sR, sG, sB, sA; \
    195            DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
    196                          sR, sG, sB, sA); \
    197            *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
    198                                ((sG << 3) & 0x000007E0) | \
    199                                ((sB >> 3) & 0x0000001F)); \
    200            dst += 2; \
    201            src += 4; \
    202            widthvar--; \
    203        }
    204
    205        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
    206
    207        /* After all that work, here's the vector part! */
    208        extrawidth = (width % 8);       /* trailing unaligned stores */
    209        width -= extrawidth;
    210        vsrc = vec_ld(0, src);
    211        valigner = VEC_ALIGNER(src);
    212
    213        while (width) {
    214            vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
    215            vector unsigned int vsrc1, vsrc2;
    216            vector unsigned char vdst;
    217
    218            voverflow = vec_ld(15, src);
    219            vsrc = vec_perm(vsrc, voverflow, valigner);
    220            vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
    221            src += 16;
    222            vsrc = voverflow;
    223            voverflow = vec_ld(15, src);
    224            vsrc = vec_perm(vsrc, voverflow, valigner);
    225            vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
    226            /* 1555 */
    227            vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
    228            vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
    229            vgpixel = vec_and(vgpixel, vfc);
    230            vgpixel = vec_sl(vgpixel, v3);
    231            vrpixel = vec_sl(vpixel, v1);
    232            vrpixel = vec_and(vrpixel, vf800);
    233            vbpixel = vec_and(vpixel, v3f);
    234            vdst =
    235                vec_or((vector unsigned char) vrpixel,
    236                       (vector unsigned char) vgpixel);
    237            /* 565 */
    238            vdst = vec_or(vdst, (vector unsigned char) vbpixel);
    239            vec_st(vdst, 0, dst);
    240
    241            width -= 8;
    242            src += 16;
    243            dst += 16;
    244            vsrc = voverflow;
    245        }
    246
    247        SDL_assert(width == 0);
    248
    249        /* do scalar until we can align... */
    250        ONE_PIXEL_BLEND((extrawidth), extrawidth);
    251#undef ONE_PIXEL_BLEND
    252
    253        src += srcskip;         /* move to next row, accounting for pitch. */
    254        dst += dstskip;
    255    }
    256
    257
    258}
    259
    260static void
    261Blit_RGB565_32Altivec(SDL_BlitInfo * info)
    262{
    263    int height = info->dst_h;
    264    Uint8 *src = (Uint8 *) info->src;
    265    int srcskip = info->src_skip;
    266    Uint8 *dst = (Uint8 *) info->dst;
    267    int dstskip = info->dst_skip;
    268    SDL_PixelFormat *srcfmt = info->src_fmt;
    269    SDL_PixelFormat *dstfmt = info->dst_fmt;
    270    unsigned alpha;
    271    vector unsigned char valpha;
    272    vector unsigned char vpermute;
    273    vector unsigned short vf800;
    274    vector unsigned int v8 = vec_splat_u32(8);
    275    vector unsigned int v16 = vec_add(v8, v8);
    276    vector unsigned short v2 = vec_splat_u16(2);
    277    vector unsigned short v3 = vec_splat_u16(3);
    278    /*
    279       0x10 - 0x1f is the alpha
    280       0x00 - 0x0e evens are the red
    281       0x01 - 0x0f odds are zero
    282     */
    283    vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
    284                                                       0x10, 0x02, 0x01, 0x01,
    285                                                       0x10, 0x04, 0x01, 0x01,
    286                                                       0x10, 0x06, 0x01,
    287                                                       0x01);
    288    vector unsigned char vredalpha2 =
    289        (vector unsigned
    290         char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
    291        );
    292    /*
    293       0x00 - 0x0f is ARxx ARxx ARxx ARxx
    294       0x11 - 0x0f odds are blue
    295     */
    296    vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
    297                                                   0x04, 0x05, 0x06, 0x13,
    298                                                   0x08, 0x09, 0x0a, 0x15,
    299                                                   0x0c, 0x0d, 0x0e, 0x17);
    300    vector unsigned char vblue2 =
    301        (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
    302        );
    303    /*
    304       0x00 - 0x0f is ARxB ARxB ARxB ARxB
    305       0x10 - 0x0e evens are green
    306     */
    307    vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
    308                                                    0x04, 0x05, 0x12, 0x07,
    309                                                    0x08, 0x09, 0x14, 0x0b,
    310                                                    0x0c, 0x0d, 0x16, 0x0f);
    311    vector unsigned char vgreen2 =
    312        (vector unsigned
    313         char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
    314        );
    315
    316    SDL_assert(srcfmt->BytesPerPixel == 2);
    317    SDL_assert(dstfmt->BytesPerPixel == 4);
    318
    319    vf800 = (vector unsigned short) vec_splat_u8(-7);
    320    vf800 = vec_sl(vf800, vec_splat_u16(8));
    321
    322    if (dstfmt->Amask && info->a) {
    323        ((unsigned char *) &valpha)[0] = alpha = info->a;
    324        valpha = vec_splat(valpha, 0);
    325    } else {
    326        alpha = 0;
    327        valpha = vec_splat_u8(0);
    328    }
    329
    330    vpermute = calc_swizzle32(NULL, dstfmt);
    331    while (height--) {
    332        vector unsigned char valigner;
    333        vector unsigned char voverflow;
    334        vector unsigned char vsrc;
    335
    336        int width = info->dst_w;
    337        int extrawidth;
    338
    339        /* do scalar until we can align... */
    340#define ONE_PIXEL_BLEND(condition, widthvar) \
    341        while (condition) { \
    342            unsigned sR, sG, sB; \
    343            unsigned short Pixel = *((unsigned short *)src); \
    344            sR = (Pixel >> 8) & 0xf8; \
    345            sG = (Pixel >> 3) & 0xfc; \
    346            sB = (Pixel << 3) & 0xf8; \
    347            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
    348            src += 2; \
    349            dst += 4; \
    350            widthvar--; \
    351        }
    352        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
    353
    354        /* After all that work, here's the vector part! */
    355        extrawidth = (width % 8);       /* trailing unaligned stores */
    356        width -= extrawidth;
    357        vsrc = vec_ld(0, src);
    358        valigner = VEC_ALIGNER(src);
    359
    360        while (width) {
    361            vector unsigned short vR, vG, vB;
    362            vector unsigned char vdst1, vdst2;
    363
    364            voverflow = vec_ld(15, src);
    365            vsrc = vec_perm(vsrc, voverflow, valigner);
    366
    367            vR = vec_and((vector unsigned short) vsrc, vf800);
    368            vB = vec_sl((vector unsigned short) vsrc, v3);
    369            vG = vec_sl(vB, v2);
    370
    371            vdst1 =
    372                (vector unsigned char) vec_perm((vector unsigned char) vR,
    373                                                valpha, vredalpha1);
    374            vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
    375            vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
    376            vdst1 = vec_perm(vdst1, valpha, vpermute);
    377            vec_st(vdst1, 0, dst);
    378
    379            vdst2 =
    380                (vector unsigned char) vec_perm((vector unsigned char) vR,
    381                                                valpha, vredalpha2);
    382            vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
    383            vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
    384            vdst2 = vec_perm(vdst2, valpha, vpermute);
    385            vec_st(vdst2, 16, dst);
    386
    387            width -= 8;
    388            dst += 32;
    389            src += 16;
    390            vsrc = voverflow;
    391        }
    392
    393        SDL_assert(width == 0);
    394
    395
    396        /* do scalar until we can align... */
    397        ONE_PIXEL_BLEND((extrawidth), extrawidth);
    398#undef ONE_PIXEL_BLEND
    399
    400        src += srcskip;         /* move to next row, accounting for pitch. */
    401        dst += dstskip;
    402    }
    403
    404}
    405
    406
    407static void
    408Blit_RGB555_32Altivec(SDL_BlitInfo * info)
    409{
    410    int height = info->dst_h;
    411    Uint8 *src = (Uint8 *) info->src;
    412    int srcskip = info->src_skip;
    413    Uint8 *dst = (Uint8 *) info->dst;
    414    int dstskip = info->dst_skip;
    415    SDL_PixelFormat *srcfmt = info->src_fmt;
    416    SDL_PixelFormat *dstfmt = info->dst_fmt;
    417    unsigned alpha;
    418    vector unsigned char valpha;
    419    vector unsigned char vpermute;
    420    vector unsigned short vf800;
    421    vector unsigned int v8 = vec_splat_u32(8);
    422    vector unsigned int v16 = vec_add(v8, v8);
    423    vector unsigned short v1 = vec_splat_u16(1);
    424    vector unsigned short v3 = vec_splat_u16(3);
    425    /*
    426       0x10 - 0x1f is the alpha
    427       0x00 - 0x0e evens are the red
    428       0x01 - 0x0f odds are zero
    429     */
    430    vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
    431                                                       0x10, 0x02, 0x01, 0x01,
    432                                                       0x10, 0x04, 0x01, 0x01,
    433                                                       0x10, 0x06, 0x01,
    434                                                       0x01);
    435    vector unsigned char vredalpha2 =
    436        (vector unsigned
    437         char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
    438        );
    439    /*
    440       0x00 - 0x0f is ARxx ARxx ARxx ARxx
    441       0x11 - 0x0f odds are blue
    442     */
    443    vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
    444                                                   0x04, 0x05, 0x06, 0x13,
    445                                                   0x08, 0x09, 0x0a, 0x15,
    446                                                   0x0c, 0x0d, 0x0e, 0x17);
    447    vector unsigned char vblue2 =
    448        (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
    449        );
    450    /*
    451       0x00 - 0x0f is ARxB ARxB ARxB ARxB
    452       0x10 - 0x0e evens are green
    453     */
    454    vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
    455                                                    0x04, 0x05, 0x12, 0x07,
    456                                                    0x08, 0x09, 0x14, 0x0b,
    457                                                    0x0c, 0x0d, 0x16, 0x0f);
    458    vector unsigned char vgreen2 =
    459        (vector unsigned
    460         char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
    461        );
    462
    463    SDL_assert(srcfmt->BytesPerPixel == 2);
    464    SDL_assert(dstfmt->BytesPerPixel == 4);
    465
    466    vf800 = (vector unsigned short) vec_splat_u8(-7);
    467    vf800 = vec_sl(vf800, vec_splat_u16(8));
    468
    469    if (dstfmt->Amask && info->a) {
    470        ((unsigned char *) &valpha)[0] = alpha = info->a;
    471        valpha = vec_splat(valpha, 0);
    472    } else {
    473        alpha = 0;
    474        valpha = vec_splat_u8(0);
    475    }
    476
    477    vpermute = calc_swizzle32(NULL, dstfmt);
    478    while (height--) {
    479        vector unsigned char valigner;
    480        vector unsigned char voverflow;
    481        vector unsigned char vsrc;
    482
    483        int width = info->dst_w;
    484        int extrawidth;
    485
    486        /* do scalar until we can align... */
    487#define ONE_PIXEL_BLEND(condition, widthvar) \
    488        while (condition) { \
    489            unsigned sR, sG, sB; \
    490            unsigned short Pixel = *((unsigned short *)src); \
    491            sR = (Pixel >> 7) & 0xf8; \
    492            sG = (Pixel >> 2) & 0xf8; \
    493            sB = (Pixel << 3) & 0xf8; \
    494            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
    495            src += 2; \
    496            dst += 4; \
    497            widthvar--; \
    498        }
    499        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
    500
    501        /* After all that work, here's the vector part! */
    502        extrawidth = (width % 8);       /* trailing unaligned stores */
    503        width -= extrawidth;
    504        vsrc = vec_ld(0, src);
    505        valigner = VEC_ALIGNER(src);
    506
    507        while (width) {
    508            vector unsigned short vR, vG, vB;
    509            vector unsigned char vdst1, vdst2;
    510
    511            voverflow = vec_ld(15, src);
    512            vsrc = vec_perm(vsrc, voverflow, valigner);
    513
    514            vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
    515            vB = vec_sl((vector unsigned short) vsrc, v3);
    516            vG = vec_sl(vB, v3);
    517
    518            vdst1 =
    519                (vector unsigned char) vec_perm((vector unsigned char) vR,
    520                                                valpha, vredalpha1);
    521            vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
    522            vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
    523            vdst1 = vec_perm(vdst1, valpha, vpermute);
    524            vec_st(vdst1, 0, dst);
    525
    526            vdst2 =
    527                (vector unsigned char) vec_perm((vector unsigned char) vR,
    528                                                valpha, vredalpha2);
    529            vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
    530            vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
    531            vdst2 = vec_perm(vdst2, valpha, vpermute);
    532            vec_st(vdst2, 16, dst);
    533
    534            width -= 8;
    535            dst += 32;
    536            src += 16;
    537            vsrc = voverflow;
    538        }
    539
    540        SDL_assert(width == 0);
    541
    542
    543        /* do scalar until we can align... */
    544        ONE_PIXEL_BLEND((extrawidth), extrawidth);
    545#undef ONE_PIXEL_BLEND
    546
    547        src += srcskip;         /* move to next row, accounting for pitch. */
    548        dst += dstskip;
    549    }
    550
    551}
    552
    553static void BlitNtoNKey(SDL_BlitInfo * info);
    554static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
    555static void
    556Blit32to32KeyAltivec(SDL_BlitInfo * info)
    557{
    558    int height = info->dst_h;
    559    Uint32 *srcp = (Uint32 *) info->src;
    560    int srcskip = info->src_skip / 4;
    561    Uint32 *dstp = (Uint32 *) info->dst;
    562    int dstskip = info->dst_skip / 4;
    563    SDL_PixelFormat *srcfmt = info->src_fmt;
    564    int srcbpp = srcfmt->BytesPerPixel;
    565    SDL_PixelFormat *dstfmt = info->dst_fmt;
    566    int dstbpp = dstfmt->BytesPerPixel;
    567    int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
    568    unsigned alpha = dstfmt->Amask ? info->a : 0;
    569    Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
    570    Uint32 ckey = info->colorkey;
    571    vector unsigned int valpha;
    572    vector unsigned char vpermute;
    573    vector unsigned char vzero;
    574    vector unsigned int vckey;
    575    vector unsigned int vrgbmask;
    576    vpermute = calc_swizzle32(srcfmt, dstfmt);
    577    if (info->dst_w < 16) {
    578        if (copy_alpha) {
    579            BlitNtoNKeyCopyAlpha(info);
    580        } else {
    581            BlitNtoNKey(info);
    582        }
    583        return;
    584    }
    585    vzero = vec_splat_u8(0);
    586    if (alpha) {
    587        ((unsigned char *) &valpha)[0] = (unsigned char) alpha;
    588        valpha =
    589            (vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
    590    } else {
    591        valpha = (vector unsigned int) vzero;
    592    }
    593    ckey &= rgbmask;
    594    ((unsigned int *) (char *) &vckey)[0] = ckey;
    595    vckey = vec_splat(vckey, 0);
    596    ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
    597    vrgbmask = vec_splat(vrgbmask, 0);
    598
    599    while (height--) {
    600#define ONE_PIXEL_BLEND(condition, widthvar) \
    601        if (copy_alpha) { \
    602            while (condition) { \
    603                Uint32 Pixel; \
    604                unsigned sR, sG, sB, sA; \
    605                DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
    606                          sR, sG, sB, sA); \
    607                if ( (Pixel & rgbmask) != ckey ) { \
    608                      ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
    609                            sR, sG, sB, sA); \
    610                } \
    611                dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
    612                srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
    613                widthvar--; \
    614            } \
    615        } else { \
    616            while (condition) { \
    617                Uint32 Pixel; \
    618                unsigned sR, sG, sB; \
    619                RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
    620                if ( Pixel != ckey ) { \
    621                    RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
    622                    ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
    623                              sR, sG, sB, alpha); \
    624                } \
    625                dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
    626                srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
    627                widthvar--; \
    628            } \
    629        }
    630        int width = info->dst_w;
    631        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
    632        SDL_assert(width > 0);
    633        if (width > 0) {
    634            int extrawidth = (width % 4);
    635            vector unsigned char valigner = VEC_ALIGNER(srcp);
    636            vector unsigned int vs = vec_ld(0, srcp);
    637            width -= extrawidth;
    638            SDL_assert(width >= 4);
    639            while (width) {
    640                vector unsigned char vsel;
    641                vector unsigned int vd;
    642                vector unsigned int voverflow = vec_ld(15, srcp);
    643                /* load the source vec */
    644                vs = vec_perm(vs, voverflow, valigner);
    645                /* vsel is set for items that match the key */
    646                vsel = (vector unsigned char) vec_and(vs, vrgbmask);
    647                vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
    648                /* permute the src vec to the dest format */
    649                vs = vec_perm(vs, valpha, vpermute);
    650                /* load the destination vec */
    651                vd = vec_ld(0, dstp);
    652                /* select the source and dest into vs */
    653                vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
    654                                                   (vector unsigned char) vd,
    655                                                   vsel);
    656
    657                vec_st(vd, 0, dstp);
    658                srcp += 4;
    659                width -= 4;
    660                dstp += 4;
    661                vs = voverflow;
    662            }
    663            ONE_PIXEL_BLEND((extrawidth), extrawidth);
    664#undef ONE_PIXEL_BLEND
    665            srcp += srcskip;
    666            dstp += dstskip;
    667        }
    668    }
    669}
    670
    671/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
    672/* Use this on a G5 */
    673static void
    674ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
    675{
    676    int height = info->dst_h;
    677    Uint32 *src = (Uint32 *) info->src;
    678    int srcskip = info->src_skip / 4;
    679    Uint32 *dst = (Uint32 *) info->dst;
    680    int dstskip = info->dst_skip / 4;
    681    SDL_PixelFormat *srcfmt = info->src_fmt;
    682    SDL_PixelFormat *dstfmt = info->dst_fmt;
    683    vector unsigned int vzero = vec_splat_u32(0);
    684    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
    685    if (dstfmt->Amask && !srcfmt->Amask) {
    686        if (info->a) {
    687            vector unsigned char valpha;
    688            ((unsigned char *) &valpha)[0] = info->a;
    689            vzero = (vector unsigned int) vec_splat(valpha, 0);
    690        }
    691    }
    692
    693    SDL_assert(srcfmt->BytesPerPixel == 4);
    694    SDL_assert(dstfmt->BytesPerPixel == 4);
    695
    696    while (height--) {
    697        vector unsigned char valigner;
    698        vector unsigned int vbits;
    699        vector unsigned int voverflow;
    700        Uint32 bits;
    701        Uint8 r, g, b, a;
    702
    703        int width = info->dst_w;
    704        int extrawidth;
    705
    706        /* do scalar until we can align... */
    707        while ((UNALIGNED_PTR(dst)) && (width)) {
    708            bits = *(src++);
    709            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
    710            if(!srcfmt->Amask)
    711              a = info->a;
    712            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
    713            width--;
    714        }
    715
    716        /* After all that work, here's the vector part! */
    717        extrawidth = (width % 4);
    718        width -= extrawidth;
    719        valigner = VEC_ALIGNER(src);
    720        vbits = vec_ld(0, src);
    721
    722        while (width) {
    723            voverflow = vec_ld(15, src);
    724            src += 4;
    725            width -= 4;
    726            vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
    727            vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
    728            vec_st(vbits, 0, dst);      /* store it back out. */
    729            dst += 4;
    730            vbits = voverflow;
    731        }
    732
    733        SDL_assert(width == 0);
    734
    735        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
    736        while (extrawidth) {
    737            bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
    738            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
    739            if(!srcfmt->Amask)
    740              a = info->a;
    741            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
    742            extrawidth--;
    743        }
    744
    745        src += srcskip;
    746        dst += dstskip;
    747    }
    748
    749}
    750
    751/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
    752/* Use this on a G4 */
    753static void
    754ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
    755{
    756    const int scalar_dst_lead = sizeof(Uint32) * 4;
    757    const int vector_dst_lead = sizeof(Uint32) * 16;
    758
    759    int height = info->dst_h;
    760    Uint32 *src = (Uint32 *) info->src;
    761    int srcskip = info->src_skip / 4;
    762    Uint32 *dst = (Uint32 *) info->dst;
    763    int dstskip = info->dst_skip / 4;
    764    SDL_PixelFormat *srcfmt = info->src_fmt;
    765    SDL_PixelFormat *dstfmt = info->dst_fmt;
    766    vector unsigned int vzero = vec_splat_u32(0);
    767    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
    768    if (dstfmt->Amask && !srcfmt->Amask) {
    769        if (info->a) {
    770            vector unsigned char valpha;
    771            ((unsigned char *) &valpha)[0] = info->a;
    772            vzero = (vector unsigned int) vec_splat(valpha, 0);
    773        }
    774    }
    775
    776    SDL_assert(srcfmt->BytesPerPixel == 4);
    777    SDL_assert(dstfmt->BytesPerPixel == 4);
    778
    779    while (height--) {
    780        vector unsigned char valigner;
    781        vector unsigned int vbits;
    782        vector unsigned int voverflow;
    783        Uint32 bits;
    784        Uint8 r, g, b, a;
    785
    786        int width = info->dst_w;
    787        int extrawidth;
    788
    789        /* do scalar until we can align... */
    790        while ((UNALIGNED_PTR(dst)) && (width)) {
    791            vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
    792                     DST_CHAN_SRC);
    793            vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
    794                      DST_CHAN_DEST);
    795            bits = *(src++);
    796            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
    797            if(!srcfmt->Amask)
    798              a = info->a;
    799            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
    800            width--;
    801        }
    802
    803        /* After all that work, here's the vector part! */
    804        extrawidth = (width % 4);
    805        width -= extrawidth;
    806        valigner = VEC_ALIGNER(src);
    807        vbits = vec_ld(0, src);
    808
    809        while (width) {
    810            vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
    811                     DST_CHAN_SRC);
    812            vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
    813                      DST_CHAN_DEST);
    814            voverflow = vec_ld(15, src);
    815            src += 4;
    816            width -= 4;
    817            vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
    818            vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
    819            vec_st(vbits, 0, dst);      /* store it back out. */
    820            dst += 4;
    821            vbits = voverflow;
    822        }
    823
    824        SDL_assert(width == 0);
    825
    826        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
    827        while (extrawidth) {
    828            bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
    829            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
    830            if(!srcfmt->Amask)
    831              a = info->a;
    832            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
    833            extrawidth--;
    834        }
    835
    836        src += srcskip;
    837        dst += dstskip;
    838    }
    839
    840    vec_dss(DST_CHAN_SRC);
    841    vec_dss(DST_CHAN_DEST);
    842}
    843
    844static Uint32
    845GetBlitFeatures(void)
    846{
    847    static Uint32 features = 0xffffffff;
    848    if (features == 0xffffffff) {
    849        /* Provide an override for testing .. */
    850        char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
    851        if (override) {
    852            features = 0;
    853            SDL_sscanf(override, "%u", &features);
    854        } else {
    855            features = (0
    856                        /* Feature 1 is has-MMX */
    857                        | ((SDL_HasMMX())? 1 : 0)
    858                        /* Feature 2 is has-AltiVec */
    859                        | ((SDL_HasAltiVec())? 2 : 0)
    860                        /* Feature 4 is dont-use-prefetch */
    861                        /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
    862                        | ((GetL3CacheSize() == 0) ? 4 : 0)
    863                );
    864        }
    865    }
    866    return features;
    867}
    868
    869#if __MWERKS__
    870#pragma altivec_model off
    871#endif
    872#else
    873/* Feature 1 is has-MMX */
    874#define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
    875#endif
    876
    877/* This is now endian dependent */
    878#if SDL_BYTEORDER == SDL_LIL_ENDIAN
    879#define HI  1
    880#define LO  0
    881#else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
    882#define HI  0
    883#define LO  1
    884#endif
    885
    886/* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
    887#define RGB888_RGB332(dst, src) { \
    888    dst = (Uint8)((((src)&0x00E00000)>>16)| \
    889                  (((src)&0x0000E000)>>11)| \
    890                  (((src)&0x000000C0)>>6)); \
    891}
    892static void
    893Blit_RGB888_index8(SDL_BlitInfo * info)
    894{
    895#ifndef USE_DUFFS_LOOP
    896    int c;
    897#endif
    898    int width, height;
    899    Uint32 *src;
    900    const Uint8 *map;
    901    Uint8 *dst;
    902    int srcskip, dstskip;
    903
    904    /* Set up some basic variables */
    905    width = info->dst_w;
    906    height = info->dst_h;
    907    src = (Uint32 *) info->src;
    908    srcskip = info->src_skip / 4;
    909    dst = info->dst;
    910    dstskip = info->dst_skip;
    911    map = info->table;
    912
    913    if (map == NULL) {
    914        while (height--) {
    915#ifdef USE_DUFFS_LOOP
    916            /* *INDENT-OFF* */
    917            DUFFS_LOOP(
    918                RGB888_RGB332(*dst++, *src);
    919            , width);
    920            /* *INDENT-ON* */
    921#else
    922            for (c = width / 4; c; --c) {
    923                /* Pack RGB into 8bit pixel */
    924                ++src;
    925                RGB888_RGB332(*dst++, *src);
    926                ++src;
    927                RGB888_RGB332(*dst++, *src);
    928                ++src;
    929                RGB888_RGB332(*dst++, *src);
    930                ++src;
    931            }
    932            switch (width & 3) {
    933            case 3:
    934                RGB888_RGB332(*dst++, *src);
    935                ++src;
    936            case 2:
    937                RGB888_RGB332(*dst++, *src);
    938                ++src;
    939            case 1:
    940                RGB888_RGB332(*dst++, *src);
    941                ++src;
    942            }
    943#endif /* USE_DUFFS_LOOP */
    944            src += srcskip;
    945            dst += dstskip;
    946        }
    947    } else {
    948        int Pixel;
    949
    950        while (height--) {
    951#ifdef USE_DUFFS_LOOP
    952            /* *INDENT-OFF* */
    953            DUFFS_LOOP(
    954                RGB888_RGB332(Pixel, *src);
    955                *dst++ = map[Pixel];
    956                ++src;
    957            , width);
    958            /* *INDENT-ON* */
    959#else
    960            for (c = width / 4; c; --c) {
    961                /* Pack RGB into 8bit pixel */
    962                RGB888_RGB332(Pixel, *src);
    963                *dst++ = map[Pixel];
    964                ++src;
    965                RGB888_RGB332(Pixel, *src);
    966                *dst++ = map[Pixel];
    967                ++src;
    968                RGB888_RGB332(Pixel, *src);
    969                *dst++ = map[Pixel];
    970                ++src;
    971                RGB888_RGB332(Pixel, *src);
    972                *dst++ = map[Pixel];
    973                ++src;
    974            }
    975            switch (width & 3) {
    976            case 3:
    977                RGB888_RGB332(Pixel, *src);
    978                *dst++ = map[Pixel];
    979                ++src;
    980            case 2:
    981                RGB888_RGB332(Pixel, *src);
    982                *dst++ = map[Pixel];
    983                ++src;
    984            case 1:
    985                RGB888_RGB332(Pixel, *src);
    986                *dst++ = map[Pixel];
    987                ++src;
    988            }
    989#endif /* USE_DUFFS_LOOP */
    990            src += srcskip;
    991            dst += dstskip;
    992        }
    993    }
    994}
    995
    996/* Special optimized blit for RGB 10-10-10 --> RGB 3-3-2 */
    997#define RGB101010_RGB332(dst, src) { \
    998    dst = (Uint8)((((src)&0x38000000)>>22)| \
    999                  (((src)&0x000E0000)>>15)| \
   1000                  (((src)&0x00000300)>>8)); \
   1001}
   1002static void
   1003Blit_RGB101010_index8(SDL_BlitInfo * info)
   1004{
   1005#ifndef USE_DUFFS_LOOP
   1006    int c;
   1007#endif
   1008    int width, height;
   1009    Uint32 *src;
   1010    const Uint8 *map;
   1011    Uint8 *dst;
   1012    int srcskip, dstskip;
   1013
   1014    /* Set up some basic variables */
   1015    width = info->dst_w;
   1016    height = info->dst_h;
   1017    src = (Uint32 *) info->src;
   1018    srcskip = info->src_skip / 4;
   1019    dst = info->dst;
   1020    dstskip = info->dst_skip;
   1021    map = info->table;
   1022
   1023    if (map == NULL) {
   1024        while (height--) {
   1025#ifdef USE_DUFFS_LOOP
   1026            /* *INDENT-OFF* */
   1027            DUFFS_LOOP(
   1028                RGB101010_RGB332(*dst++, *src);
   1029            , width);
   1030            /* *INDENT-ON* */
   1031#else
   1032            for (c = width / 4; c; --c) {
   1033                /* Pack RGB into 8bit pixel */
   1034                ++src;
   1035                RGB101010_RGB332(*dst++, *src);
   1036                ++src;
   1037                RGB101010_RGB332(*dst++, *src);
   1038                ++src;
   1039                RGB101010_RGB332(*dst++, *src);
   1040                ++src;
   1041            }
   1042            switch (width & 3) {
   1043            case 3:
   1044                RGB101010_RGB332(*dst++, *src);
   1045                ++src;
   1046            case 2:
   1047                RGB101010_RGB332(*dst++, *src);
   1048                ++src;
   1049            case 1:
   1050                RGB101010_RGB332(*dst++, *src);
   1051                ++src;
   1052            }
   1053#endif /* USE_DUFFS_LOOP */
   1054            src += srcskip;
   1055            dst += dstskip;
   1056        }
   1057    } else {
   1058        int Pixel;
   1059
   1060        while (height--) {
   1061#ifdef USE_DUFFS_LOOP
   1062            /* *INDENT-OFF* */
   1063            DUFFS_LOOP(
   1064                RGB101010_RGB332(Pixel, *src);
   1065                *dst++ = map[Pixel];
   1066                ++src;
   1067            , width);
   1068            /* *INDENT-ON* */
   1069#else
   1070            for (c = width / 4; c; --c) {
   1071                /* Pack RGB into 8bit pixel */
   1072                RGB101010_RGB332(Pixel, *src);
   1073                *dst++ = map[Pixel];
   1074                ++src;
   1075                RGB101010_RGB332(Pixel, *src);
   1076                *dst++ = map[Pixel];
   1077                ++src;
   1078                RGB101010_RGB332(Pixel, *src);
   1079                *dst++ = map[Pixel];
   1080                ++src;
   1081                RGB101010_RGB332(Pixel, *src);
   1082                *dst++ = map[Pixel];
   1083                ++src;
   1084            }
   1085            switch (width & 3) {
   1086            case 3:
   1087                RGB101010_RGB332(Pixel, *src);
   1088                *dst++ = map[Pixel];
   1089                ++src;
   1090            case 2:
   1091                RGB101010_RGB332(Pixel, *src);
   1092                *dst++ = map[Pixel];
   1093                ++src;
   1094            case 1:
   1095                RGB101010_RGB332(Pixel, *src);
   1096                *dst++ = map[Pixel];
   1097                ++src;
   1098            }
   1099#endif /* USE_DUFFS_LOOP */
   1100            src += srcskip;
   1101            dst += dstskip;
   1102        }
   1103    }
   1104}
   1105
   1106/* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
   1107#define RGB888_RGB555(dst, src) { \
   1108    *(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
   1109                                (((*src)&0x0000F800)>>6)| \
   1110                                (((*src)&0x000000F8)>>3)); \
   1111}
   1112#define RGB888_RGB555_TWO(dst, src) { \
   1113    *(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
   1114                         (((src[HI])&0x0000F800)>>6)| \
   1115                         (((src[HI])&0x000000F8)>>3))<<16)| \
   1116                         (((src[LO])&0x00F80000)>>9)| \
   1117                         (((src[LO])&0x0000F800)>>6)| \
   1118                         (((src[LO])&0x000000F8)>>3); \
   1119}
   1120static void
   1121Blit_RGB888_RGB555(SDL_BlitInfo * info)
   1122{
   1123#ifndef USE_DUFFS_LOOP
   1124    int c;
   1125#endif
   1126    int width, height;
   1127    Uint32 *src;
   1128    Uint16 *dst;
   1129    int srcskip, dstskip;
   1130
   1131    /* Set up some basic variables */
   1132    width = info->dst_w;
   1133    height = info->dst_h;
   1134    src = (Uint32 *) info->src;
   1135    srcskip = info->src_skip / 4;
   1136    dst = (Uint16 *) info->dst;
   1137    dstskip = info->dst_skip / 2;
   1138
   1139#ifdef USE_DUFFS_LOOP
   1140    while (height--) {
   1141        /* *INDENT-OFF* */
   1142        DUFFS_LOOP(
   1143            RGB888_RGB555(dst, src);
   1144            ++src;
   1145            ++dst;
   1146        , width);
   1147        /* *INDENT-ON* */
   1148        src += srcskip;
   1149        dst += dstskip;
   1150    }
   1151#else
   1152    /* Memory align at 4-byte boundary, if necessary */
   1153    if ((long) dst & 0x03) {
   1154        /* Don't do anything if width is 0 */
   1155        if (width == 0) {
   1156            return;
   1157        }
   1158        --width;
   1159
   1160        while (height--) {
   1161            /* Perform copy alignment */
   1162            RGB888_RGB555(dst, src);
   1163            ++src;
   1164            ++dst;
   1165
   1166            /* Copy in 4 pixel chunks */
   1167            for (c = width / 4; c; --c) {
   1168                RGB888_RGB555_TWO(dst, src);
   1169                src += 2;
   1170                dst += 2;
   1171                RGB888_RGB555_TWO(dst, src);
   1172                src += 2;
   1173                dst += 2;
   1174            }
   1175            /* Get any leftovers */
   1176            switch (width & 3) {
   1177            case 3:
   1178                RGB888_RGB555(dst, src);
   1179                ++src;
   1180                ++dst;
   1181            case 2:
   1182                RGB888_RGB555_TWO(dst, src);
   1183                src += 2;
   1184                dst += 2;
   1185                break;
   1186            case 1:
   1187                RGB888_RGB555(dst, src);
   1188                ++src;
   1189                ++dst;
   1190                break;
   1191            }
   1192            src += srcskip;
   1193            dst += dstskip;
   1194        }
   1195    } else {
   1196        while (height--) {
   1197            /* Copy in 4 pixel chunks */
   1198            for (c = width / 4; c; --c) {
   1199                RGB888_RGB555_TWO(dst, src);
   1200                src += 2;
   1201                dst += 2;
   1202                RGB888_RGB555_TWO(dst, src);
   1203                src += 2;
   1204                dst += 2;
   1205            }
   1206            /* Get any leftovers */
   1207            switch (width & 3) {
   1208            case 3:
   1209                RGB888_RGB555(dst, src);
   1210                ++src;
   1211                ++dst;
   1212            case 2:
   1213                RGB888_RGB555_TWO(dst, src);
   1214                src += 2;
   1215                dst += 2;
   1216                break;
   1217            case 1:
   1218                RGB888_RGB555(dst, src);
   1219                ++src;
   1220                ++dst;
   1221                break;
   1222            }
   1223            src += srcskip;
   1224            dst += dstskip;
   1225        }
   1226    }
   1227#endif /* USE_DUFFS_LOOP */
   1228}
   1229
   1230/* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
   1231#define RGB888_RGB565(dst, src) { \
   1232    *(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
   1233                                (((*src)&0x0000FC00)>>5)| \
   1234                                (((*src)&0x000000F8)>>3)); \
   1235}
   1236#define RGB888_RGB565_TWO(dst, src) { \
   1237    *(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
   1238                         (((src[HI])&0x0000FC00)>>5)| \
   1239                         (((src[HI])&0x000000F8)>>3))<<16)| \
   1240                         (((src[LO])&0x00F80000)>>8)| \
   1241                         (((src[LO])&0x0000FC00)>>5)| \
   1242                         (((src[LO])&0x000000F8)>>3); \
   1243}
   1244static void
   1245Blit_RGB888_RGB565(SDL_BlitInfo * info)
   1246{
   1247#ifndef USE_DUFFS_LOOP
   1248    int c;
   1249#endif
   1250    int width, height;
   1251    Uint32 *src;
   1252    Uint16 *dst;
   1253    int srcskip, dstskip;
   1254
   1255    /* Set up some basic variables */
   1256    width = info->dst_w;
   1257    height = info->dst_h;
   1258    src = (Uint32 *) info->src;
   1259    srcskip = info->src_skip / 4;
   1260    dst = (Uint16 *) info->dst;
   1261    dstskip = info->dst_skip / 2;
   1262
   1263#ifdef USE_DUFFS_LOOP
   1264    while (height--) {
   1265        /* *INDENT-OFF* */
   1266        DUFFS_LOOP(
   1267            RGB888_RGB565(dst, src);
   1268            ++src;
   1269            ++dst;
   1270        , width);
   1271        /* *INDENT-ON* */
   1272        src += srcskip;
   1273        dst += dstskip;
   1274    }
   1275#else
   1276    /* Memory align at 4-byte boundary, if necessary */
   1277    if ((long) dst & 0x03) {
   1278        /* Don't do anything if width is 0 */
   1279        if (width == 0) {
   1280            return;
   1281        }
   1282        --width;
   1283
   1284        while (height--) {
   1285            /* Perform copy alignment */
   1286            RGB888_RGB565(dst, src);
   1287            ++src;
   1288            ++dst;
   1289
   1290            /* Copy in 4 pixel chunks */
   1291            for (c = width / 4; c; --c) {
   1292                RGB888_RGB565_TWO(dst, src);
   1293                src += 2;
   1294                dst += 2;
   1295                RGB888_RGB565_TWO(dst, src);
   1296                src += 2;
   1297                dst += 2;
   1298            }
   1299            /* Get any leftovers */
   1300            switch (width & 3) {
   1301            case 3:
   1302                RGB888_RGB565(dst, src);
   1303                ++src;
   1304                ++dst;
   1305            case 2:
   1306                RGB888_RGB565_TWO(dst, src);
   1307                src += 2;
   1308                dst += 2;
   1309                break;
   1310            case 1:
   1311                RGB888_RGB565(dst, src);
   1312                ++src;
   1313                ++dst;
   1314                break;
   1315            }
   1316            src += srcskip;
   1317            dst += dstskip;
   1318        }
   1319    } else {
   1320        while (height--) {
   1321            /* Copy in 4 pixel chunks */
   1322            for (c = width / 4; c; --c) {
   1323                RGB888_RGB565_TWO(dst, src);
   1324                src += 2;
   1325                dst += 2;
   1326                RGB888_RGB565_TWO(dst, src);
   1327                src += 2;
   1328                dst += 2;
   1329            }
   1330            /* Get any leftovers */
   1331            switch (width & 3) {
   1332            case 3:
   1333                RGB888_RGB565(dst, src);
   1334                ++src;
   1335                ++dst;
   1336            case 2:
   1337                RGB888_RGB565_TWO(dst, src);
   1338                src += 2;
   1339                dst += 2;
   1340                break;
   1341            case 1:
   1342                RGB888_RGB565(dst, src);
   1343                ++src;
   1344                ++dst;
   1345                break;
   1346            }
   1347            src += srcskip;
   1348            dst += dstskip;
   1349        }
   1350    }
   1351#endif /* USE_DUFFS_LOOP */
   1352}
   1353
   1354
   1355/* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
   1356#define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
   1357static void
   1358Blit_RGB565_32(SDL_BlitInfo * info, const Uint32 * map)
   1359{
   1360#ifndef USE_DUFFS_LOOP
   1361    int c;
   1362#endif
   1363    int width, height;
   1364    Uint8 *src;
   1365    Uint32 *dst;
   1366    int srcskip, dstskip;
   1367
   1368    /* Set up some basic variables */
   1369    width = info->dst_w;
   1370    height = info->dst_h;
   1371    src = (Uint8 *) info->src;
   1372    srcskip = info->src_skip;
   1373    dst = (Uint32 *) info->dst;
   1374    dstskip = info->dst_skip / 4;
   1375
   1376#ifdef USE_DUFFS_LOOP
   1377    while (height--) {
   1378        /* *INDENT-OFF* */
   1379        DUFFS_LOOP(
   1380        {
   1381            *dst++ = RGB565_32(dst, src, map);
   1382            src += 2;
   1383        },
   1384        width);
   1385        /* *INDENT-ON* */
   1386        src += srcskip;
   1387        dst += dstskip;
   1388    }
   1389#else
   1390    while (height--) {
   1391        /* Copy in 4 pixel chunks */
   1392        for (c = width / 4; c; --c) {
   1393            *dst++ = RGB565_32(dst, src, map);
   1394            src += 2;
   1395            *dst++ = RGB565_32(dst, src, map);
   1396            src += 2;
   1397            *dst++ = RGB565_32(dst, src, map);
   1398            src += 2;
   1399            *dst++ = RGB565_32(dst, src, map);
   1400            src += 2;
   1401        }
   1402        /* Get any leftovers */
   1403        switch (width & 3) {
   1404        case 3:
   1405            *dst++ = RGB565_32(dst, src, map);
   1406            src += 2;
   1407        case 2:
   1408            *dst++ = RGB565_32(dst, src, map);
   1409            src += 2;
   1410        case 1:
   1411            *dst++ = RGB565_32(dst, src, map);
   1412            src += 2;
   1413            break;
   1414        }
   1415        src += srcskip;
   1416        dst += dstskip;
   1417    }
   1418#endif /* USE_DUFFS_LOOP */
   1419}
   1420
   1421/* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
   1422static const Uint32 RGB565_ARGB8888_LUT[512] = {
   1423    0x00000000, 0xff000000, 0x00000008, 0xff002000,
   1424    0x00000010, 0xff004000, 0x00000018, 0xff006100,
   1425    0x00000020, 0xff008100, 0x00000029, 0xff00a100,
   1426    0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
   1427    0x00000041, 0xff080000, 0x0000004a, 0xff082000,
   1428    0x00000052, 0xff084000, 0x0000005a, 0xff086100,
   1429    0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
   1430    0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
   1431    0x00000083, 0xff100000, 0x0000008b, 0xff102000,
   1432    0x00000094, 0xff104000, 0x0000009c, 0xff106100,
   1433    0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
   1434    0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
   1435    0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
   1436    0x000000d5, 0xff184000, 0x000000de, 0xff186100,
   1437    0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
   1438    0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
   1439    0x00000400, 0xff200000, 0x00000408, 0xff202000,
   1440    0x00000410, 0xff204000, 0x00000418, 0xff206100,
   1441    0x00000420, 0xff208100, 0x00000429, 0xff20a100,
   1442    0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
   1443    0x00000441, 0xff290000, 0x0000044a, 0xff292000,
   1444    0x00000452, 0xff294000, 0x0000045a, 0xff296100,
   1445    0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
   1446    0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
   1447    0x00000483, 0xff310000, 0x0000048b, 0xff312000,
   1448    0x00000494, 0xff314000, 0x0000049c, 0xff316100,
   1449    0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
   1450    0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
   1451    0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
   1452    0x000004d5, 0xff394000, 0x000004de, 0xff396100,
   1453    0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
   1454    0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
   1455    0x00000800, 0xff410000, 0x00000808, 0xff412000,
   1456    0x00000810, 0xff414000, 0x00000818, 0xff416100,
   1457    0x00000820, 0xff418100, 0x00000829, 0xff41a100,
   1458    0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
   1459    0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
   1460    0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
   1461    0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
   1462    0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
   1463    0x00000883, 0xff520000, 0x0000088b, 0xff522000,
   1464    0x00000894, 0xff524000, 0x0000089c, 0xff526100,
   1465    0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
   1466    0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
   1467    0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
   1468    0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
   1469    0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
   1470    0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
   1471    0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
   1472    0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
   1473    0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
   1474    0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
   1475    0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
   1476    0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
   1477    0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
   1478    0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
   1479    0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
   1480    0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
   1481    0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
   1482    0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
   1483    0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
   1484    0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
   1485    0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
   1486    0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
   1487    0x00001000, 0xff830000, 0x00001008, 0xff832000,
   1488    0x00001010, 0xff834000, 0x00001018, 0xff836100,
   1489    0x00001020, 0xff838100, 0x00001029, 0xff83a100,
   1490    0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
   1491    0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
   1492    0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
   1493    0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
   1494    0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
   1495    0x00001083, 0xff940000, 0x0000108b, 0xff942000,
   1496    0x00001094, 0xff944000, 0x0000109c, 0xff946100,
   1497    0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
   1498    0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
   1499    0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
   1500    0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
   1501    0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
   1502    0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
   1503    0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
   1504    0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
   1505    0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
   1506    0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
   1507    0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
   1508    0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
   1509    0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
   1510    0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
   1511    0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
   1512    0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
   1513    0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
   1514    0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
   1515    0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
   1516    0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
   1517    0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
   1518    0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
   1519    0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
   1520    0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
   1521    0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
   1522    0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
   1523    0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
   1524    0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
   1525    0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
   1526    0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
   1527    0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
   1528    0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
   1529    0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
   1530    0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
   1531    0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
   1532    0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
   1533    0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
   1534    0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
   1535    0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
   1536    0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
   1537    0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
   1538    0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
   1539    0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
   1540    0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
   1541    0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
   1542    0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
   1543    0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
   1544    0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
   1545    0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
   1546    0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
   1547    0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
   1548    0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
   1549    0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
   1550    0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
   1551};
   1552
   1553static void
   1554Blit_RGB565_ARGB8888(SDL_BlitInfo * info)
   1555{
   1556    Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
   1557}
   1558
   1559/* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
   1560static const Uint32 RGB565_ABGR8888_LUT[512] = {
   1561    0xff000000, 0x00000000, 0xff080000, 0x00002000,
   1562    0xff100000, 0x00004000, 0xff180000, 0x00006100,
   1563    0xff200000, 0x00008100, 0xff290000, 0x0000a100,
   1564    0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
   1565    0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
   1566    0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
   1567    0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
   1568    0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
   1569    0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
   1570    0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
   1571    0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
   1572    0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
   1573    0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
   1574    0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
   1575    0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
   1576    0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
   1577    0xff000400, 0x00000020, 0xff080400, 0x00002020,
   1578    0xff100400, 0x00004020, 0xff180400, 0x00006120,
   1579    0xff200400, 0x00008120, 0xff290400, 0x0000a120,
   1580    0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
   1581    0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
   1582    0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
   1583    0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
   1584    0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
   1585    0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
   1586    0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
   1587    0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
   1588    0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
   1589    0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
   1590    0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
   1591    0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
   1592    0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
   1593    0xff000800, 0x00000041, 0xff080800, 0x00002041,
   1594    0xff100800, 0x00004041, 0xff180800, 0x00006141,
   1595    0xff200800, 0x00008141, 0xff290800, 0x0000a141,
   1596    0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
   1597    0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
   1598    0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
   1599    0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
   1600    0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
   1601    0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
   1602    0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
   1603    0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
   1604    0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
   1605    0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
   1606    0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
   1607    0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
   1608    0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
   1609    0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
   1610    0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
   1611    0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
   1612    0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
   1613    0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
   1614    0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
   1615    0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
   1616    0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
   1617    0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
   1618    0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
   1619    0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
   1620    0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
   1621    0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
   1622    0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
   1623    0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
   1624    0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
   1625    0xff001000, 0x00000083, 0xff081000, 0x00002083,
   1626    0xff101000, 0x00004083, 0xff181000, 0x00006183,
   1627    0xff201000, 0x00008183, 0xff291000, 0x0000a183,
   1628    0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
   1629    0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
   1630    0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
   1631    0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
   1632    0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
   1633    0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
   1634    0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
   1635    0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
   1636    0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
   1637    0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
   1638    0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
   1639    0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
   1640    0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
   1641    0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
   1642    0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
   1643    0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
   1644    0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
   1645    0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
   1646    0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
   1647    0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
   1648    0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
   1649    0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
   1650    0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
   1651    0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
   1652    0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
   1653    0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
   1654    0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
   1655    0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
   1656    0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
   1657    0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
   1658    0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
   1659    0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
   1660    0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
   1661    0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
   1662    0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
   1663    0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
   1664    0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
   1665    0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
   1666    0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
   1667    0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
   1668    0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
   1669    0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
   1670    0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
   1671    0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
   1672    0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
   1673    0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
   1674    0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
   1675    0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
   1676    0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
   1677    0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
   1678    0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
   1679    0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
   1680    0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
   1681    0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
   1682    0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
   1683    0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
   1684    0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
   1685    0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
   1686    0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
   1687    0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
   1688    0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
   1689};
   1690
   1691static void
   1692Blit_RGB565_ABGR8888(SDL_BlitInfo * info)
   1693{
   1694    Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
   1695}
   1696
   1697/* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
   1698static const Uint32 RGB565_RGBA8888_LUT[512] = {
   1699    0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
   1700    0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
   1701    0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
   1702    0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
   1703    0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
   1704    0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
   1705    0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
   1706    0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
   1707    0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
   1708    0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
   1709    0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
   1710    0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
   1711    0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
   1712    0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
   1713    0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
   1714    0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
   1715    0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
   1716    0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
   1717    0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
   1718    0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
   1719    0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
   1720    0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
   1721    0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
   1722    0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
   1723    0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
   1724    0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
   1725    0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
   1726    0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
   1727    0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
   1728    0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
   1729    0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
   1730    0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
   1731    0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
   1732    0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
   1733    0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
   1734    0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
   1735    0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
   1736    0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
   1737    0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
   1738    0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
   1739    0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
   1740    0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
   1741    0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
   1742    0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
   1743    0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
   1744    0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
   1745    0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
   1746    0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
   1747    0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
   1748    0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
   1749    0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
   1750    0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
   1751    0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
   1752    0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
   1753    0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
   1754    0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
   1755    0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
   1756    0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
   1757    0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
   1758    0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
   1759    0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
   1760    0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
   1761    0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
   1762    0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
   1763    0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
   1764    0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
   1765    0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
   1766    0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
   1767    0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
   1768    0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
   1769    0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
   1770    0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
   1771    0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
   1772    0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
   1773    0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
   1774    0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
   1775    0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
   1776    0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
   1777    0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
   1778    0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
   1779    0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
   1780    0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
   1781    0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
   1782    0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
   1783    0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
   1784    0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
   1785    0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
   1786    0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
   1787    0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
   1788    0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
   1789    0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
   1790    0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
   1791    0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
   1792    0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
   1793    0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
   1794    0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
   1795    0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
   1796    0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
   1797    0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
   1798    0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
   1799    0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
   1800    0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
   1801    0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
   1802    0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
   1803    0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
   1804    0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
   1805    0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
   1806    0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
   1807    0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
   1808    0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
   1809    0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
   1810    0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
   1811    0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
   1812    0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
   1813    0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
   1814    0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
   1815    0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
   1816    0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
   1817    0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
   1818    0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
   1819    0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
   1820    0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
   1821    0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
   1822    0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
   1823    0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
   1824    0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
   1825    0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
   1826    0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
   1827};
   1828
   1829static void
   1830Blit_RGB565_RGBA8888(SDL_BlitInfo * info)
   1831{
   1832    Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
   1833}
   1834
   1835/* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
   1836static const Uint32 RGB565_BGRA8888_LUT[512] = {
   1837    0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
   1838    0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
   1839    0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
   1840    0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
   1841    0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
   1842    0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
   1843    0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
   1844    0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
   1845    0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
   1846    0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
   1847    0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
   1848    0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
   1849    0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
   1850    0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
   1851    0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
   1852    0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
   1853    0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
   1854    0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
   1855    0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
   1856    0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
   1857    0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
   1858    0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
   1859    0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
   1860    0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
   1861    0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
   1862    0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
   1863    0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
   1864    0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
   1865    0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
   1866    0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
   1867    0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
   1868    0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
   1869    0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
   1870    0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
   1871    0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
   1872    0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
   1873    0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
   1874    0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
   1875    0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
   1876    0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
   1877    0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
   1878    0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
   1879    0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
   1880    0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
   1881    0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
   1882    0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
   1883    0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
   1884    0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
   1885    0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
   1886    0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
   1887    0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
   1888    0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
   1889    0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
   1890    0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
   1891    0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
   1892    0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
   1893    0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
   1894    0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
   1895    0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
   1896    0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
   1897    0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
   1898    0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
   1899    0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
   1900    0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
   1901    0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
   1902    0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
   1903    0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
   1904    0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
   1905    0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
   1906    0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
   1907    0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
   1908    0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
   1909    0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
   1910    0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
   1911    0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
   1912    0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
   1913    0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
   1914    0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
   1915    0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
   1916    0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
   1917    0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
   1918    0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
   1919    0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
   1920    0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
   1921    0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
   1922    0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
   1923    0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
   1924    0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
   1925    0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
   1926    0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
   1927    0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
   1928    0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
   1929    0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
   1930    0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
   1931    0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
   1932    0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
   1933    0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
   1934    0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
   1935    0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
   1936    0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
   1937    0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
   1938    0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
   1939    0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
   1940    0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
   1941    0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
   1942    0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
   1943    0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
   1944    0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
   1945    0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
   1946    0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
   1947    0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
   1948    0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
   1949    0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
   1950    0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
   1951    0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
   1952    0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
   1953    0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
   1954    0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
   1955    0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
   1956    0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
   1957    0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
   1958    0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
   1959    0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
   1960    0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
   1961    0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
   1962    0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
   1963    0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
   1964    0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
   1965};
   1966
   1967static void
   1968Blit_RGB565_BGRA8888(SDL_BlitInfo * info)
   1969{
   1970    Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
   1971}
   1972
   1973static void
   1974BlitNto1(SDL_BlitInfo * info)
   1975{
   1976#ifndef USE_DUFFS_LOOP
   1977    int c;
   1978#endif
   1979    int width, height;
   1980    Uint8 *src;
   1981    const Uint8 *map;
   1982    Uint8 *dst;
   1983    int srcskip, dstskip;
   1984    int srcbpp;
   1985    Uint32 Pixel;
   1986    int sR, sG, sB;
   1987    SDL_PixelFormat *srcfmt;
   1988
   1989    /* Set up some basic variables */
   1990    width = info->dst_w;
   1991    height = info->dst_h;
   1992    src = info->src;
   1993    srcskip = info->src_skip;
   1994    dst = info->dst;
   1995    dstskip = info->dst_skip;
   1996    map = info->table;
   1997    srcfmt = info->src_fmt;
   1998    srcbpp = srcfmt->BytesPerPixel;
   1999
   2000    if (map == NULL) {
   2001        while (height--) {
   2002#ifdef USE_DUFFS_LOOP
   2003            /* *INDENT-OFF* */
   2004            DUFFS_LOOP(
   2005                DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
   2006                                sR, sG, sB);
   2007                if ( 1 ) {
   2008                    /* Pack RGB into 8bit pixel */
   2009                    *dst = ((sR>>5)<<(3+2))|
   2010                            ((sG>>5)<<(2)) |
   2011                            ((sB>>6)<<(0)) ;
   2012                }
   2013                dst++;
   2014                src += srcbpp;
   2015            , width);
   2016            /* *INDENT-ON* */
   2017#else
   2018            for (c = width; c; --c) {
   2019                DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
   2020                if (1) {
   2021                    /* Pack RGB into 8bit pixel */
   2022                    *dst = ((sR >> 5) << (3 + 2)) |
   2023                        ((sG >> 5) << (2)) | ((sB >> 6) << (0));
   2024                }
   2025                dst++;
   2026                src += srcbpp;
   2027            }
   2028#endif
   2029            src += srcskip;
   2030            dst += dstskip;
   2031        }
   2032    } else {
   2033        while (height--) {
   2034#ifdef USE_DUFFS_LOOP
   2035            /* *INDENT-OFF* */
   2036            DUFFS_LOOP(
   2037                DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
   2038                                sR, sG, sB);
   2039                if ( 1 ) {
   2040                    /* Pack RGB into 8bit pixel */
   2041                    *dst = map[((sR>>5)<<(3+2))|
   2042                           ((sG>>5)<<(2))  |
   2043                           ((sB>>6)<<(0))  ];
   2044                }
   2045                dst++;
   2046                src += srcbpp;
   2047            , width);
   2048            /* *INDENT-ON* */
   2049#else
   2050            for (c = width; c; --c) {
   2051                DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
   2052                if (1) {
   2053                    /* Pack RGB into 8bit pixel */
   2054                    *dst = map[((sR >> 5) << (3 + 2)) |
   2055                               ((sG >> 5) << (2)) | ((sB >> 6) << (0))];
   2056                }
   2057                dst++;
   2058                src += srcbpp;
   2059            }
   2060#endif /* USE_DUFFS_LOOP */
   2061            src += srcskip;
   2062            dst += dstskip;
   2063        }
   2064    }
   2065}
   2066
   2067/* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
   2068static void
   2069Blit4to4MaskAlpha(SDL_BlitInfo * info)
   2070{
   2071    int width = info->dst_w;
   2072    int height = info->dst_h;
   2073    Uint32 *src = (Uint32 *) info->src;
   2074    int srcskip = info->src_skip;
   2075    Uint32 *dst = (Uint32 *) info->dst;
   2076    int dstskip = info->dst_skip;
   2077    SDL_PixelFormat *srcfmt = info->src_fmt;
   2078    SDL_PixelFormat *dstfmt = info->dst_fmt;
   2079
   2080    if (dstfmt->Amask) {
   2081        /* RGB->RGBA, SET_ALPHA */
   2082        Uint32 mask = (info->a >> dstfmt->Aloss) << dstfmt->Ashift;
   2083
   2084        while (height--) {
   2085            /* *INDENT-OFF* */
   2086            DUFFS_LOOP(
   2087            {
   2088                *dst = *src | mask;
   2089                ++dst;
   2090                ++src;
   2091            },
   2092            width);
   2093            /* *INDENT-ON* */
   2094            src = (Uint32 *) ((Uint8 *) src + srcskip);
   2095            dst = (Uint32 *) ((Uint8 *) dst + dstskip);
   2096        }
   2097    } else {
   2098        /* RGBA->RGB, NO_ALPHA */
   2099        Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   2100
   2101        while (height--) {
   2102            /* *INDENT-OFF* */
   2103            DUFFS_LOOP(
   2104            {
   2105                *dst = *src & mask;
   2106                ++dst;
   2107                ++src;
   2108            },
   2109            width);
   2110            /* *INDENT-ON* */
   2111            src = (Uint32 *) ((Uint8 *) src + srcskip);
   2112            dst = (Uint32 *) ((Uint8 *) dst + dstskip);
   2113        }
   2114    }
   2115}
   2116
   2117static void
   2118BlitNtoN(SDL_BlitInfo * info)
   2119{
   2120    int width = info->dst_w;
   2121    int height = info->dst_h;
   2122    Uint8 *src = info->src;
   2123    int srcskip = info->src_skip;
   2124    Uint8 *dst = info->dst;
   2125    int dstskip = info->dst_skip;
   2126    SDL_PixelFormat *srcfmt = info->src_fmt;
   2127    int srcbpp = srcfmt->BytesPerPixel;
   2128    SDL_PixelFormat *dstfmt = info->dst_fmt;
   2129    int dstbpp = dstfmt->BytesPerPixel;
   2130    unsigned alpha = dstfmt->Amask ? info->a : 0;
   2131
   2132    while (height--) {
   2133        /* *INDENT-OFF* */
   2134        DUFFS_LOOP(
   2135        {
   2136            Uint32 Pixel;
   2137            unsigned sR;
   2138            unsigned sG;
   2139            unsigned sB;
   2140            DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
   2141            ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
   2142            dst += dstbpp;
   2143            src += srcbpp;
   2144        },
   2145        width);
   2146        /* *INDENT-ON* */
   2147        src += srcskip;
   2148        dst += dstskip;
   2149    }
   2150}
   2151
   2152static void
   2153BlitNtoNCopyAlpha(SDL_BlitInfo * info)
   2154{
   2155    int width = info->dst_w;
   2156    int height = info->dst_h;
   2157    Uint8 *src = info->src;
   2158    int srcskip = info->src_skip;
   2159    Uint8 *dst = info->dst;
   2160    int dstskip = info->dst_skip;
   2161    SDL_PixelFormat *srcfmt = info->src_fmt;
   2162    int srcbpp = srcfmt->BytesPerPixel;
   2163    SDL_PixelFormat *dstfmt = info->dst_fmt;
   2164    int dstbpp = dstfmt->BytesPerPixel;
   2165    int c;
   2166
   2167    while (height--) {
   2168        for (c = width; c; --c) {
   2169            Uint32 Pixel;
   2170            unsigned sR, sG, sB, sA;
   2171            DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
   2172            ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
   2173            dst += dstbpp;
   2174            src += srcbpp;
   2175        }
   2176        src += srcskip;
   2177        dst += dstskip;
   2178    }
   2179}
   2180
   2181static void
   2182BlitNto1Key(SDL_BlitInfo * info)
   2183{
   2184    int width = info->dst_w;
   2185    int height = info->dst_h;
   2186    Uint8 *src = info->src;
   2187    int srcskip = info->src_skip;
   2188    Uint8 *dst = info->dst;
   2189    int dstskip = info->dst_skip;
   2190    SDL_PixelFormat *srcfmt = info->src_fmt;
   2191    const Uint8 *palmap = info->table;
   2192    Uint32 ckey = info->colorkey;
   2193    Uint32 rgbmask = ~srcfmt->Amask;
   2194    int srcbpp;
   2195    Uint32 Pixel;
   2196    unsigned sR, sG, sB;
   2197
   2198    /* Set up some basic variables */
   2199    srcbpp = srcfmt->BytesPerPixel;
   2200    ckey &= rgbmask;
   2201
   2202    if (palmap == NULL) {
   2203        while (height--) {
   2204            /* *INDENT-OFF* */
   2205            DUFFS_LOOP(
   2206            {
   2207                DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
   2208                                sR, sG, sB);
   2209                if ( (Pixel & rgbmask) != ckey ) {
   2210                    /* Pack RGB into 8bit pixel */
   2211                    *dst = (Uint8)(((sR>>5)<<(3+2))|
   2212                                   ((sG>>5)<<(2)) |
   2213                                   ((sB>>6)<<(0)));
   2214                }
   2215                dst++;
   2216                src += srcbpp;
   2217            },
   2218            width);
   2219            /* *INDENT-ON* */
   2220            src += srcskip;
   2221            dst += dstskip;
   2222        }
   2223    } else {
   2224        while (height--) {
   2225            /* *INDENT-OFF* */
   2226            DUFFS_LOOP(
   2227            {
   2228                DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
   2229                                sR, sG, sB);
   2230                if ( (Pixel & rgbmask) != ckey ) {
   2231                    /* Pack RGB into 8bit pixel */
   2232                    *dst = (Uint8)palmap[((sR>>5)<<(3+2))|
   2233                                         ((sG>>5)<<(2))  |
   2234                                         ((sB>>6)<<(0))  ];
   2235                }
   2236                dst++;
   2237                src += srcbpp;
   2238            },
   2239            width);
   2240            /* *INDENT-ON* */
   2241            src += srcskip;
   2242            dst += dstskip;
   2243        }
   2244    }
   2245}
   2246
   2247static void
   2248Blit2to2Key(SDL_BlitInfo * info)
   2249{
   2250    int width = info->dst_w;
   2251    int height = info->dst_h;
   2252    Uint16 *srcp = (Uint16 *) info->src;
   2253    int srcskip = info->src_skip;
   2254    Uint16 *dstp = (Uint16 *) info->dst;
   2255    int dstskip = info->dst_skip;
   2256    Uint32 ckey = info->colorkey;
   2257    Uint32 rgbmask = ~info->src_fmt->Amask;
   2258
   2259    /* Set up some basic variables */
   2260    srcskip /= 2;
   2261    dstskip /= 2;
   2262    ckey &= rgbmask;
   2263
   2264    while (height--) {
   2265        /* *INDENT-OFF* */
   2266        DUFFS_LOOP(
   2267        {
   2268            if ( (*srcp & rgbmask) != ckey ) {
   2269                *dstp = *srcp;
   2270            }
   2271            dstp++;
   2272            srcp++;
   2273        },
   2274        width);
   2275        /* *INDENT-ON* */
   2276        srcp += srcskip;
   2277        dstp += dstskip;
   2278    }
   2279}
   2280
   2281static void
   2282BlitNtoNKey(SDL_BlitInfo * info)
   2283{
   2284    int width = info->dst_w;
   2285    int height = info->dst_h;
   2286    Uint8 *src = info->src;
   2287    int srcskip = info->src_skip;
   2288    Uint8 *dst = info->dst;
   2289    int dstskip = info->dst_skip;
   2290    Uint32 ckey = info->colorkey;
   2291    SDL_PixelFormat *srcfmt = info->src_fmt;
   2292    SDL_PixelFormat *dstfmt = info->dst_fmt;
   2293    int srcbpp = srcfmt->BytesPerPixel;
   2294    int dstbpp = dstfmt->BytesPerPixel;
   2295    unsigned alpha = dstfmt->Amask ? info->a : 0;
   2296    Uint32 rgbmask = ~srcfmt->Amask;
   2297
   2298    /* Set up some basic variables */
   2299    ckey &= rgbmask;
   2300
   2301    while (height--) {
   2302        /* *INDENT-OFF* */
   2303        DUFFS_LOOP(
   2304        {
   2305            Uint32 Pixel;
   2306            unsigned sR;
   2307            unsigned sG;
   2308            unsigned sB;
   2309            RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
   2310            if ( (Pixel & rgbmask) != ckey ) {
   2311                RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
   2312                ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
   2313            }
   2314            dst += dstbpp;
   2315            src += srcbpp;
   2316        },
   2317        width);
   2318        /* *INDENT-ON* */
   2319        src += srcskip;
   2320        dst += dstskip;
   2321    }
   2322}
   2323
   2324static void
   2325BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info)
   2326{
   2327    int width = info->dst_w;
   2328    int height = info->dst_h;
   2329    Uint8 *src = info->src;
   2330    int srcskip = info->src_skip;
   2331    Uint8 *dst = info->dst;
   2332    int dstskip = info->dst_skip;
   2333    Uint32 ckey = info->colorkey;
   2334    SDL_PixelFormat *srcfmt = info->src_fmt;
   2335    SDL_PixelFormat *dstfmt = info->dst_fmt;
   2336    Uint32 rgbmask = ~srcfmt->Amask;
   2337
   2338    Uint8 srcbpp;
   2339    Uint8 dstbpp;
   2340    Uint32 Pixel;
   2341    unsigned sR, sG, sB, sA;
   2342
   2343    /* Set up some basic variables */
   2344    srcbpp = srcfmt->BytesPerPixel;
   2345    dstbpp = dstfmt->BytesPerPixel;
   2346    ckey &= rgbmask;
   2347
   2348    while (height--) {
   2349        /* *INDENT-OFF* */
   2350        DUFFS_LOOP(
   2351        {
   2352            DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
   2353            if ( (Pixel & rgbmask) != ckey ) {
   2354                  ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
   2355            }
   2356            dst += dstbpp;
   2357            src += srcbpp;
   2358        },
   2359        width);
   2360        /* *INDENT-ON* */
   2361        src += srcskip;
   2362        dst += dstskip;
   2363    }
   2364}
   2365
   2366/* Special optimized blit for ARGB 2-10-10-10 --> RGBA */
   2367static void
   2368Blit2101010toN(SDL_BlitInfo * info)
   2369{
   2370    int width = info->dst_w;
   2371    int height = info->dst_h;
   2372    Uint8 *src = info->src;
   2373    int srcskip = info->src_skip;
   2374    Uint8 *dst = info->dst;
   2375    int dstskip = info->dst_skip;
   2376    SDL_PixelFormat *dstfmt = info->dst_fmt;
   2377    int dstbpp = dstfmt->BytesPerPixel;
   2378    Uint32 Pixel;
   2379    unsigned sR, sG, sB, sA;
   2380
   2381    while (height--) {
   2382        /* *INDENT-OFF* */
   2383        DUFFS_LOOP(
   2384        {
   2385            Pixel = *(Uint32 *)src;
   2386            RGBA_FROM_ARGB2101010(Pixel, sR, sG, sB, sA);
   2387            ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
   2388            dst += dstbpp;
   2389            src += 4;
   2390        },
   2391        width);
   2392        /* *INDENT-ON* */
   2393        src += srcskip;
   2394        dst += dstskip;
   2395    }
   2396}
   2397
   2398/* Special optimized blit for RGBA --> ARGB 2-10-10-10 */
   2399static void
   2400BlitNto2101010(SDL_BlitInfo * info)
   2401{
   2402    int width = info->dst_w;
   2403    int height = info->dst_h;
   2404    Uint8 *src = info->src;
   2405    int srcskip = info->src_skip;
   2406    Uint8 *dst = info->dst;
   2407    int dstskip = info->dst_skip;
   2408    SDL_PixelFormat *srcfmt = info->src_fmt;
   2409    int srcbpp = srcfmt->BytesPerPixel;
   2410    Uint32 Pixel;
   2411    unsigned sR, sG, sB, sA;
   2412
   2413    while (height--) {
   2414        /* *INDENT-OFF* */
   2415        DUFFS_LOOP(
   2416        {
   2417            DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
   2418            ARGB2101010_FROM_RGBA(Pixel, sR, sG, sB, sA);
   2419            *(Uint32 *)dst = Pixel;
   2420            dst += 4;
   2421            src += srcbpp;
   2422        },
   2423        width);
   2424        /* *INDENT-ON* */
   2425        src += srcskip;
   2426        dst += dstskip;
   2427    }
   2428}
   2429
   2430/* Normal N to N optimized blitters */
   2431struct blit_table
   2432{
   2433    Uint32 srcR, srcG, srcB;
   2434    int dstbpp;
   2435    Uint32 dstR, dstG, dstB;
   2436    Uint32 blit_features;
   2437    SDL_BlitFunc blitfunc;
   2438    enum
   2439    { NO_ALPHA = 1, SET_ALPHA = 2, COPY_ALPHA = 4 } alpha;
   2440};
   2441static const struct blit_table normal_blit_1[] = {
   2442    /* Default for 8-bit RGB source, never optimized */
   2443    {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
   2444};
   2445
   2446static const struct blit_table normal_blit_2[] = {
   2447#if SDL_ALTIVEC_BLITTERS
   2448    /* has-altivec */
   2449    {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
   2450     2, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   2451    {0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
   2452     2, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   2453#endif
   2454    {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
   2455     0, Blit_RGB565_ARGB8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   2456    {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
   2457     0, Blit_RGB565_ABGR8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   2458    {0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000, 0x0000FF00,
   2459     0, Blit_RGB565_RGBA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   2460    {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000, 0xFF000000,
   2461     0, Blit_RGB565_BGRA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   2462
   2463    /* Default for 16-bit RGB source, used if no other blitter matches */
   2464    {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
   2465};
   2466
   2467static const struct blit_table normal_blit_3[] = {
   2468    /* Default for 24-bit RGB source, never optimized */
   2469    {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
   2470};
   2471
   2472static const struct blit_table normal_blit_4[] = {
   2473#if SDL_ALTIVEC_BLITTERS
   2474    /* has-altivec | dont-use-prefetch */
   2475    {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
   2476     6, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   2477    /* has-altivec */
   2478    {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
   2479     2, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
   2480    /* has-altivec */
   2481    {0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F,
   2482     2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
   2483#endif
   2484    {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0, 0x0000001F,
   2485     0, Blit_RGB888_RGB565, NO_ALPHA},
   2486    {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0, 0x0000001F,
   2487     0, Blit_RGB888_RGB555, NO_ALPHA},
   2488    /* Default for 32-bit RGB source, used if no other blitter matches */
   2489    {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
   2490};
   2491
   2492static const struct blit_table *const normal_blit[] = {
   2493    normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
   2494};
   2495
   2496/* Mask matches table, or table entry is zero */
   2497#define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
   2498
   2499SDL_BlitFunc
   2500SDL_CalculateBlitN(SDL_Surface * surface)
   2501{
   2502    SDL_PixelFormat *srcfmt;
   2503    SDL_PixelFormat *dstfmt;
   2504    const struct blit_table *table;
   2505    int which;
   2506    SDL_BlitFunc blitfun;
   2507
   2508    /* Set up data for choosing the blit */
   2509    srcfmt = surface->format;
   2510    dstfmt = surface->map->dst->format;
   2511
   2512    /* We don't support destinations less than 8-bits */
   2513    if (dstfmt->BitsPerPixel < 8) {
   2514        return (NULL);
   2515    }
   2516
   2517    switch (surface->map->info.flags & ~SDL_COPY_RLE_MASK) {
   2518    case 0:
   2519        blitfun = NULL;
   2520        if (dstfmt->BitsPerPixel == 8) {
   2521            if ((srcfmt->BytesPerPixel == 4) &&
   2522                (srcfmt->Rmask == 0x00FF0000) &&
   2523                (srcfmt->Gmask == 0x0000FF00) &&
   2524                (srcfmt->Bmask == 0x000000FF)) {
   2525                blitfun = Blit_RGB888_index8;
   2526            } else if ((srcfmt->BytesPerPixel == 4) &&
   2527                (srcfmt->Rmask == 0x3FF00000) &&
   2528                (srcfmt->Gmask == 0x000FFC00) &&
   2529                (srcfmt->Bmask == 0x000003FF)) {
   2530                blitfun = Blit_RGB101010_index8;
   2531            } else {
   2532                blitfun = BlitNto1;
   2533            }
   2534        } else {
   2535            /* Now the meat, choose the blitter we want */
   2536            int a_need = NO_ALPHA;
   2537            if (dstfmt->Amask)
   2538                a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
   2539            table = normal_blit[srcfmt->BytesPerPixel - 1];
   2540            for (which = 0; table[which].dstbpp; ++which) {
   2541                if (MASKOK(srcfmt->Rmask, table[which].srcR) &&
   2542                    MASKOK(srcfmt->Gmask, table[which].srcG) &&
   2543                    MASKOK(srcfmt->Bmask, table[which].srcB) &&
   2544                    MASKOK(dstfmt->Rmask, table[which].dstR) &&
   2545                    MASKOK(dstfmt->Gmask, table[which].dstG) &&
   2546                    MASKOK(dstfmt->Bmask, table[which].dstB) &&
   2547                    dstfmt->BytesPerPixel == table[which].dstbpp &&
   2548                    (a_need & table[which].alpha) == a_need &&
   2549                    ((table[which].blit_features & GetBlitFeatures()) ==
   2550                     table[which].blit_features))
   2551                    break;
   2552            }
   2553            blitfun = table[which].blitfunc;
   2554
   2555            if (blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
   2556                if (srcfmt->format == SDL_PIXELFORMAT_ARGB2101010) {
   2557                    blitfun = Blit2101010toN;
   2558                } else if (dstfmt->format == SDL_PIXELFORMAT_ARGB2101010) {
   2559                    blitfun = BlitNto2101010;
   2560                } else if (srcfmt->BytesPerPixel == 4 &&
   2561                            dstfmt->BytesPerPixel == 4 &&
   2562                            srcfmt->Rmask == dstfmt->Rmask &&
   2563                            srcfmt->Gmask == dstfmt->Gmask &&
   2564                            srcfmt->Bmask == dstfmt->Bmask) {
   2565                    /* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
   2566                    blitfun = Blit4to4MaskAlpha;
   2567                } else if (a_need == COPY_ALPHA) {
   2568                    blitfun = BlitNtoNCopyAlpha;
   2569                }
   2570            }
   2571        }
   2572        return (blitfun);
   2573
   2574    case SDL_COPY_COLORKEY:
   2575        /* colorkey blit: Here we don't have too many options, mostly
   2576           because RLE is the preferred fast way to deal with this.
   2577           If a particular case turns out to be useful we'll add it. */
   2578
   2579        if (srcfmt->BytesPerPixel == 2 && surface->map->identity)
   2580            return Blit2to2Key;
   2581        else if (dstfmt->BytesPerPixel == 1)
   2582            return BlitNto1Key;
   2583        else {
   2584#if SDL_ALTIVEC_BLITTERS
   2585            if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4)
   2586                && SDL_HasAltiVec()) {
   2587                return Blit32to32KeyAltivec;
   2588            } else
   2589#endif
   2590            if (srcfmt->Amask && dstfmt->Amask) {
   2591                return BlitNtoNKeyCopyAlpha;
   2592            } else {
   2593                return BlitNtoNKey;
   2594            }
   2595        }
   2596    }
   2597
   2598    return NULL;
   2599}
   2600
   2601/* vi: set ts=4 sw=4 expandtab: */