cscg22-gearboy

CSCG 2022 Challenge 'Gearboy'
git clone https://git.sinitax.com/sinitax/cscg22-gearboy
Log | Files | Refs | sfeed.txt

SDL_blit_copy.c (3836B)


      1/*
      2  Simple DirectMedia Layer
      3  Copyright (C) 1997-2014 Sam Lantinga <slouken@libsdl.org>
      4
      5  This software is provided 'as-is', without any express or implied
      6  warranty.  In no event will the authors be held liable for any damages
      7  arising from the use of this software.
      8
      9  Permission is granted to anyone to use this software for any purpose,
     10  including commercial applications, and to alter it and redistribute it
     11  freely, subject to the following restrictions:
     12
     13  1. The origin of this software must not be misrepresented; you must not
     14     claim that you wrote the original software. If you use this software
     15     in a product, an acknowledgment in the product documentation would be
     16     appreciated but is not required.
     17  2. Altered source versions must be plainly marked as such, and must not be
     18     misrepresented as being the original software.
     19  3. This notice may not be removed or altered from any source distribution.
     20*/
     21#include "../SDL_internal.h"
     22
     23#include "SDL_video.h"
     24#include "SDL_blit.h"
     25#include "SDL_blit_copy.h"
     26
     27
     28#ifdef __SSE__
     29/* This assumes 16-byte aligned src and dst */
     30static SDL_INLINE void
     31SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len)
     32{
     33    int i;
     34
     35    __m128 values[4];
     36    for (i = len / 64; i--;) {
     37        _mm_prefetch(src, _MM_HINT_NTA);
     38        values[0] = *(__m128 *) (src + 0);
     39        values[1] = *(__m128 *) (src + 16);
     40        values[2] = *(__m128 *) (src + 32);
     41        values[3] = *(__m128 *) (src + 48);
     42        _mm_stream_ps((float *) (dst + 0), values[0]);
     43        _mm_stream_ps((float *) (dst + 16), values[1]);
     44        _mm_stream_ps((float *) (dst + 32), values[2]);
     45        _mm_stream_ps((float *) (dst + 48), values[3]);
     46        src += 64;
     47        dst += 64;
     48    }
     49
     50    if (len & 63)
     51        SDL_memcpy(dst, src, len & 63);
     52}
     53#endif /* __SSE__ */
     54
     55#ifdef __MMX__
     56#ifdef _MSC_VER
     57#pragma warning(disable:4799)
     58#endif
     59static SDL_INLINE void
     60SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
     61{
     62    const int remain = (len & 63);
     63    int i;
     64
     65    __m64* d64 = (__m64*)dst;
     66    __m64* s64 = (__m64*)src;
     67
     68    for(i= len / 64; i--;) {
     69        d64[0] = s64[0];
     70        d64[1] = s64[1];
     71        d64[2] = s64[2];
     72        d64[3] = s64[3];
     73        d64[4] = s64[4];
     74        d64[5] = s64[5];
     75        d64[6] = s64[6];
     76        d64[7] = s64[7];
     77
     78        d64 += 8;
     79        s64 += 8;
     80    }
     81
     82    if (remain)
     83    {
     84        const int skip = len - remain;
     85        SDL_memcpy(dst + skip, src + skip, remain);
     86    }
     87}
     88#endif /* __MMX__ */
     89
     90void
     91SDL_BlitCopy(SDL_BlitInfo * info)
     92{
     93    SDL_bool overlap;
     94    Uint8 *src, *dst;
     95    int w, h;
     96    int srcskip, dstskip;
     97
     98    w = info->dst_w * info->dst_fmt->BytesPerPixel;
     99    h = info->dst_h;
    100    src = info->src;
    101    dst = info->dst;
    102    srcskip = info->src_pitch;
    103    dstskip = info->dst_pitch;
    104
    105    /* Properly handle overlapping blits */
    106    if (src < dst) {
    107        overlap = (dst < (src + h*srcskip));
    108    } else {
    109        overlap = (src < (dst + h*dstskip));
    110    }
    111    if (overlap) {
    112        while (h--) {
    113            SDL_memmove(dst, src, w);
    114            src += srcskip;
    115            dst += dstskip;
    116        }
    117        return;
    118    }
    119
    120#ifdef __SSE__
    121    if (SDL_HasSSE() &&
    122        !((uintptr_t) src & 15) && !(srcskip & 15) &&
    123        !((uintptr_t) dst & 15) && !(dstskip & 15)) {
    124        while (h--) {
    125            SDL_memcpySSE(dst, src, w);
    126            src += srcskip;
    127            dst += dstskip;
    128        }
    129        return;
    130    }
    131#endif
    132
    133#ifdef __MMX__
    134    if (SDL_HasMMX() && !(srcskip & 7) && !(dstskip & 7)) {
    135        while (h--) {
    136            SDL_memcpyMMX(dst, src, w);
    137            src += srcskip;
    138            dst += dstskip;
    139        }
    140        _mm_empty();
    141        return;
    142    }
    143#endif
    144
    145    while (h--) {
    146        SDL_memcpy(dst, src, w);
    147        src += srcskip;
    148        dst += dstskip;
    149    }
    150}
    151
    152/* vi: set ts=4 sw=4 expandtab: */