SDL_yuv_sw.c - cscg22-gearboy - CSCG 2022 Challenge 'Gearboy'

	cscg22-gearboy CSCG 2022 Challenge 'Gearboy'
	git clone https://git.sinitax.com/sinitax/cscg22-gearboy
	Log \| Files \| Refs \| sfeed.txt
SDL_yuv_sw.c (44792B)
      1/*
      2  Simple DirectMedia Layer
      3  Copyright (C) 1997-2014 Sam Lantinga <slouken@libsdl.org>
      4
      5  This software is provided 'as-is', without any express or implied
      6  warranty.  In no event will the authors be held liable for any damages
      7  arising from the use of this software.
      8
      9  Permission is granted to anyone to use this software for any purpose,
     10  including commercial applications, and to alter it and redistribute it
     11  freely, subject to the following restrictions:
     12
     13  1. The origin of this software must not be misrepresented; you must not
     14     claim that you wrote the original software. If you use this software
     15     in a product, an acknowledgment in the product documentation would be
     16     appreciated but is not required.
     17  2. Altered source versions must be plainly marked as such, and must not be
     18     misrepresented as being the original software.
     19  3. This notice may not be removed or altered from any source distribution.
     20*/
     21#include "../SDL_internal.h"
     22
     23/* This is the software implementation of the YUV texture support */
     24
     25/* This code was derived from code carrying the following copyright notices:
     26
     27 * Copyright (c) 1995 The Regents of the University of California.
     28 * All rights reserved.
     29 *
     30 * Permission to use, copy, modify, and distribute this software and its
     31 * documentation for any purpose, without fee, and without written agreement is
     32 * hereby granted, provided that the above copyright notice and the following
     33 * two paragraphs appear in all copies of this software.
     34 *
     35 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
     36 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
     37 * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
     38 * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     39 *
     40 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
     41 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
     42 * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     43 * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
     44 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     45
     46 * Copyright (c) 1995 Erik Corry
     47 * All rights reserved.
     48 *
     49 * Permission to use, copy, modify, and distribute this software and its
     50 * documentation for any purpose, without fee, and without written agreement is
     51 * hereby granted, provided that the above copyright notice and the following
     52 * two paragraphs appear in all copies of this software.
     53 *
     54 * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
     55 * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
     56 * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
     57 * OF THE POSSIBILITY OF SUCH DAMAGE.
     58 *
     59 * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
     60 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
     61 * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
     62 * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
     63 * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     64
     65 * Portions of this software Copyright (c) 1995 Brown University.
     66 * All rights reserved.
     67 *
     68 * Permission to use, copy, modify, and distribute this software and its
     69 * documentation for any purpose, without fee, and without written agreement
     70 * is hereby granted, provided that the above copyright notice and the
     71 * following two paragraphs appear in all copies of this software.
     72 *
     73 * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
     74 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
     75 * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
     76 * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     77 *
     78 * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
     79 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
     80 * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
     81 * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
     82 * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     83 */
     84
     85#include "SDL_assert.h"
     86#include "SDL_video.h"
     87#include "SDL_cpuinfo.h"
     88#include "SDL_yuv_sw_c.h"
     89
     90
     91/* The colorspace conversion functions */
     92
     93#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
     94extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
     95                                    unsigned char *lum, unsigned char *cr,
     96                                    unsigned char *cb, unsigned char *out,
     97                                    int rows, int cols, int mod);
     98extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
     99                                    unsigned char *lum, unsigned char *cr,
    100                                    unsigned char *cb, unsigned char *out,
    101                                    int rows, int cols, int mod);
    102#endif
    103
    104static void
    105Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
    106                       unsigned char *lum, unsigned char *cr,
    107                       unsigned char *cb, unsigned char *out,
    108                       int rows, int cols, int mod)
    109{
    110    unsigned short *row1;
    111    unsigned short *row2;
    112    unsigned char *lum2;
    113    int x, y;
    114    int cr_r;
    115    int crb_g;
    116    int cb_b;
    117    int cols_2 = cols / 2;
    118
    119    row1 = (unsigned short *) out;
    120    row2 = row1 + cols + mod;
    121    lum2 = lum + cols;
    122
    123    mod += cols + mod;
    124
    125    y = rows / 2;
    126    while (y--) {
    127        x = cols_2;
    128        while (x--) {
    129            register int L;
    130
    131            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
    132            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
    133                + colortab[*cb + 2 * 256];
    134            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
    135            ++cr;
    136            ++cb;
    137
    138            L = *lum++;
    139            *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
    140                                        rgb_2_pix[L + crb_g] |
    141                                        rgb_2_pix[L + cb_b]);
    142
    143            L = *lum++;
    144            *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
    145                                        rgb_2_pix[L + crb_g] |
    146                                        rgb_2_pix[L + cb_b]);
    147
    148
    149            /* Now, do second row.  */
    150
    151            L = *lum2++;
    152            *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
    153                                        rgb_2_pix[L + crb_g] |
    154                                        rgb_2_pix[L + cb_b]);
    155
    156            L = *lum2++;
    157            *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
    158                                        rgb_2_pix[L + crb_g] |
    159                                        rgb_2_pix[L + cb_b]);
    160        }
    161
    162        /*
    163         * These values are at the start of the next line, (due
    164         * to the ++'s above),but they need to be at the start
    165         * of the line after that.
    166         */
    167        lum += cols;
    168        lum2 += cols;
    169        row1 += mod;
    170        row2 += mod;
    171    }
    172}
    173
    174static void
    175Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
    176                       unsigned char *lum, unsigned char *cr,
    177                       unsigned char *cb, unsigned char *out,
    178                       int rows, int cols, int mod)
    179{
    180    unsigned int value;
    181    unsigned char *row1;
    182    unsigned char *row2;
    183    unsigned char *lum2;
    184    int x, y;
    185    int cr_r;
    186    int crb_g;
    187    int cb_b;
    188    int cols_2 = cols / 2;
    189
    190    row1 = out;
    191    row2 = row1 + cols * 3 + mod * 3;
    192    lum2 = lum + cols;
    193
    194    mod += cols + mod;
    195    mod *= 3;
    196
    197    y = rows / 2;
    198    while (y--) {
    199        x = cols_2;
    200        while (x--) {
    201            register int L;
    202
    203            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
    204            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
    205                + colortab[*cb + 2 * 256];
    206            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
    207            ++cr;
    208            ++cb;
    209
    210            L = *lum++;
    211            value = (rgb_2_pix[L + cr_r] |
    212                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    213            *row1++ = (value) & 0xFF;
    214            *row1++ = (value >> 8) & 0xFF;
    215            *row1++ = (value >> 16) & 0xFF;
    216
    217            L = *lum++;
    218            value = (rgb_2_pix[L + cr_r] |
    219                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    220            *row1++ = (value) & 0xFF;
    221            *row1++ = (value >> 8) & 0xFF;
    222            *row1++ = (value >> 16) & 0xFF;
    223
    224
    225            /* Now, do second row.  */
    226
    227            L = *lum2++;
    228            value = (rgb_2_pix[L + cr_r] |
    229                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    230            *row2++ = (value) & 0xFF;
    231            *row2++ = (value >> 8) & 0xFF;
    232            *row2++ = (value >> 16) & 0xFF;
    233
    234            L = *lum2++;
    235            value = (rgb_2_pix[L + cr_r] |
    236                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    237            *row2++ = (value) & 0xFF;
    238            *row2++ = (value >> 8) & 0xFF;
    239            *row2++ = (value >> 16) & 0xFF;
    240        }
    241
    242        /*
    243         * These values are at the start of the next line, (due
    244         * to the ++'s above),but they need to be at the start
    245         * of the line after that.
    246         */
    247        lum += cols;
    248        lum2 += cols;
    249        row1 += mod;
    250        row2 += mod;
    251    }
    252}
    253
    254static void
    255Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
    256                       unsigned char *lum, unsigned char *cr,
    257                       unsigned char *cb, unsigned char *out,
    258                       int rows, int cols, int mod)
    259{
    260    unsigned int *row1;
    261    unsigned int *row2;
    262    unsigned char *lum2;
    263    int x, y;
    264    int cr_r;
    265    int crb_g;
    266    int cb_b;
    267    int cols_2 = cols / 2;
    268
    269    row1 = (unsigned int *) out;
    270    row2 = row1 + cols + mod;
    271    lum2 = lum + cols;
    272
    273    mod += cols + mod;
    274
    275    y = rows / 2;
    276    while (y--) {
    277        x = cols_2;
    278        while (x--) {
    279            register int L;
    280
    281            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
    282            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
    283                + colortab[*cb + 2 * 256];
    284            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
    285            ++cr;
    286            ++cb;
    287
    288            L = *lum++;
    289            *row1++ = (rgb_2_pix[L + cr_r] |
    290                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    291
    292            L = *lum++;
    293            *row1++ = (rgb_2_pix[L + cr_r] |
    294                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    295
    296
    297            /* Now, do second row.  */
    298
    299            L = *lum2++;
    300            *row2++ = (rgb_2_pix[L + cr_r] |
    301                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    302
    303            L = *lum2++;
    304            *row2++ = (rgb_2_pix[L + cr_r] |
    305                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    306        }
    307
    308        /*
    309         * These values are at the start of the next line, (due
    310         * to the ++'s above),but they need to be at the start
    311         * of the line after that.
    312         */
    313        lum += cols;
    314        lum2 += cols;
    315        row1 += mod;
    316        row2 += mod;
    317    }
    318}
    319
    320/*
    321 * In this function I make use of a nasty trick. The tables have the lower
    322 * 16 bits replicated in the upper 16. This means I can write ints and get
    323 * the horisontal doubling for free (almost).
    324 */
    325static void
    326Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
    327                       unsigned char *lum, unsigned char *cr,
    328                       unsigned char *cb, unsigned char *out,
    329                       int rows, int cols, int mod)
    330{
    331    unsigned int *row1 = (unsigned int *) out;
    332    const int next_row = cols + (mod / 2);
    333    unsigned int *row2 = row1 + 2 * next_row;
    334    unsigned char *lum2;
    335    int x, y;
    336    int cr_r;
    337    int crb_g;
    338    int cb_b;
    339    int cols_2 = cols / 2;
    340
    341    lum2 = lum + cols;
    342
    343    mod = (next_row * 3) + (mod / 2);
    344
    345    y = rows / 2;
    346    while (y--) {
    347        x = cols_2;
    348        while (x--) {
    349            register int L;
    350
    351            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
    352            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
    353                + colortab[*cb + 2 * 256];
    354            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
    355            ++cr;
    356            ++cb;
    357
    358            L = *lum++;
    359            row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
    360                                        rgb_2_pix[L + crb_g] |
    361                                        rgb_2_pix[L + cb_b]);
    362            row1++;
    363
    364            L = *lum++;
    365            row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
    366                                        rgb_2_pix[L + crb_g] |
    367                                        rgb_2_pix[L + cb_b]);
    368            row1++;
    369
    370
    371            /* Now, do second row. */
    372
    373            L = *lum2++;
    374            row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
    375                                        rgb_2_pix[L + crb_g] |
    376                                        rgb_2_pix[L + cb_b]);
    377            row2++;
    378
    379            L = *lum2++;
    380            row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
    381                                        rgb_2_pix[L + crb_g] |
    382                                        rgb_2_pix[L + cb_b]);
    383            row2++;
    384        }
    385
    386        /*
    387         * These values are at the start of the next line, (due
    388         * to the ++'s above),but they need to be at the start
    389         * of the line after that.
    390         */
    391        lum += cols;
    392        lum2 += cols;
    393        row1 += mod;
    394        row2 += mod;
    395    }
    396}
    397
    398static void
    399Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
    400                       unsigned char *lum, unsigned char *cr,
    401                       unsigned char *cb, unsigned char *out,
    402                       int rows, int cols, int mod)
    403{
    404    unsigned int value;
    405    unsigned char *row1 = out;
    406    const int next_row = (cols * 2 + mod) * 3;
    407    unsigned char *row2 = row1 + 2 * next_row;
    408    unsigned char *lum2;
    409    int x, y;
    410    int cr_r;
    411    int crb_g;
    412    int cb_b;
    413    int cols_2 = cols / 2;
    414
    415    lum2 = lum + cols;
    416
    417    mod = next_row * 3 + mod * 3;
    418
    419    y = rows / 2;
    420    while (y--) {
    421        x = cols_2;
    422        while (x--) {
    423            register int L;
    424
    425            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
    426            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
    427                + colortab[*cb + 2 * 256];
    428            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
    429            ++cr;
    430            ++cb;
    431
    432            L = *lum++;
    433            value = (rgb_2_pix[L + cr_r] |
    434                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    435            row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
    436                row1[next_row + 3 + 0] = (value) & 0xFF;
    437            row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
    438                row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
    439            row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
    440                row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
    441            row1 += 2 * 3;
    442
    443            L = *lum++;
    444            value = (rgb_2_pix[L + cr_r] |
    445                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    446            row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
    447                row1[next_row + 3 + 0] = (value) & 0xFF;
    448            row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
    449                row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
    450            row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
    451                row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
    452            row1 += 2 * 3;
    453
    454
    455            /* Now, do second row. */
    456
    457            L = *lum2++;
    458            value = (rgb_2_pix[L + cr_r] |
    459                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    460            row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
    461                row2[next_row + 3 + 0] = (value) & 0xFF;
    462            row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
    463                row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
    464            row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
    465                row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
    466            row2 += 2 * 3;
    467
    468            L = *lum2++;
    469            value = (rgb_2_pix[L + cr_r] |
    470                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    471            row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
    472                row2[next_row + 3 + 0] = (value) & 0xFF;
    473            row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
    474                row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
    475            row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
    476                row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
    477            row2 += 2 * 3;
    478        }
    479
    480        /*
    481         * These values are at the start of the next line, (due
    482         * to the ++'s above),but they need to be at the start
    483         * of the line after that.
    484         */
    485        lum += cols;
    486        lum2 += cols;
    487        row1 += mod;
    488        row2 += mod;
    489    }
    490}
    491
    492static void
    493Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
    494                       unsigned char *lum, unsigned char *cr,
    495                       unsigned char *cb, unsigned char *out,
    496                       int rows, int cols, int mod)
    497{
    498    unsigned int *row1 = (unsigned int *) out;
    499    const int next_row = cols * 2 + mod;
    500    unsigned int *row2 = row1 + 2 * next_row;
    501    unsigned char *lum2;
    502    int x, y;
    503    int cr_r;
    504    int crb_g;
    505    int cb_b;
    506    int cols_2 = cols / 2;
    507
    508    lum2 = lum + cols;
    509
    510    mod = (next_row * 3) + mod;
    511
    512    y = rows / 2;
    513    while (y--) {
    514        x = cols_2;
    515        while (x--) {
    516            register int L;
    517
    518            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
    519            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
    520                + colortab[*cb + 2 * 256];
    521            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
    522            ++cr;
    523            ++cb;
    524
    525            L = *lum++;
    526            row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
    527                (rgb_2_pix[L + cr_r] |
    528                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    529            row1 += 2;
    530
    531            L = *lum++;
    532            row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
    533                (rgb_2_pix[L + cr_r] |
    534                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    535            row1 += 2;
    536
    537
    538            /* Now, do second row. */
    539
    540            L = *lum2++;
    541            row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
    542                (rgb_2_pix[L + cr_r] |
    543                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    544            row2 += 2;
    545
    546            L = *lum2++;
    547            row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
    548                (rgb_2_pix[L + cr_r] |
    549                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    550            row2 += 2;
    551        }
    552
    553        /*
    554         * These values are at the start of the next line, (due
    555         * to the ++'s above),but they need to be at the start
    556         * of the line after that.
    557         */
    558        lum += cols;
    559        lum2 += cols;
    560        row1 += mod;
    561        row2 += mod;
    562    }
    563}
    564
    565static void
    566Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
    567                       unsigned char *lum, unsigned char *cr,
    568                       unsigned char *cb, unsigned char *out,
    569                       int rows, int cols, int mod)
    570{
    571    unsigned short *row;
    572    int x, y;
    573    int cr_r;
    574    int crb_g;
    575    int cb_b;
    576    int cols_2 = cols / 2;
    577
    578    row = (unsigned short *) out;
    579
    580    y = rows;
    581    while (y--) {
    582        x = cols_2;
    583        while (x--) {
    584            register int L;
    585
    586            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
    587            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
    588                + colortab[*cb + 2 * 256];
    589            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
    590            cr += 4;
    591            cb += 4;
    592
    593            L = *lum;
    594            lum += 2;
    595            *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
    596                                       rgb_2_pix[L + crb_g] |
    597                                       rgb_2_pix[L + cb_b]);
    598
    599            L = *lum;
    600            lum += 2;
    601            *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
    602                                       rgb_2_pix[L + crb_g] |
    603                                       rgb_2_pix[L + cb_b]);
    604
    605        }
    606
    607        row += mod;
    608    }
    609}
    610
    611static void
    612Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
    613                       unsigned char *lum, unsigned char *cr,
    614                       unsigned char *cb, unsigned char *out,
    615                       int rows, int cols, int mod)
    616{
    617    unsigned int value;
    618    unsigned char *row;
    619    int x, y;
    620    int cr_r;
    621    int crb_g;
    622    int cb_b;
    623    int cols_2 = cols / 2;
    624
    625    row = (unsigned char *) out;
    626    mod *= 3;
    627    y = rows;
    628    while (y--) {
    629        x = cols_2;
    630        while (x--) {
    631            register int L;
    632
    633            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
    634            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
    635                + colortab[*cb + 2 * 256];
    636            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
    637            cr += 4;
    638            cb += 4;
    639
    640            L = *lum;
    641            lum += 2;
    642            value = (rgb_2_pix[L + cr_r] |
    643                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    644            *row++ = (value) & 0xFF;
    645            *row++ = (value >> 8) & 0xFF;
    646            *row++ = (value >> 16) & 0xFF;
    647
    648            L = *lum;
    649            lum += 2;
    650            value = (rgb_2_pix[L + cr_r] |
    651                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    652            *row++ = (value) & 0xFF;
    653            *row++ = (value >> 8) & 0xFF;
    654            *row++ = (value >> 16) & 0xFF;
    655
    656        }
    657        row += mod;
    658    }
    659}
    660
    661static void
    662Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
    663                       unsigned char *lum, unsigned char *cr,
    664                       unsigned char *cb, unsigned char *out,
    665                       int rows, int cols, int mod)
    666{
    667    unsigned int *row;
    668    int x, y;
    669    int cr_r;
    670    int crb_g;
    671    int cb_b;
    672    int cols_2 = cols / 2;
    673
    674    row = (unsigned int *) out;
    675    y = rows;
    676    while (y--) {
    677        x = cols_2;
    678        while (x--) {
    679            register int L;
    680
    681            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
    682            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
    683                + colortab[*cb + 2 * 256];
    684            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
    685            cr += 4;
    686            cb += 4;
    687
    688            L = *lum;
    689            lum += 2;
    690            *row++ = (rgb_2_pix[L + cr_r] |
    691                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    692
    693            L = *lum;
    694            lum += 2;
    695            *row++ = (rgb_2_pix[L + cr_r] |
    696                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    697
    698
    699        }
    700        row += mod;
    701    }
    702}
    703
    704/*
    705 * In this function I make use of a nasty trick. The tables have the lower
    706 * 16 bits replicated in the upper 16. This means I can write ints and get
    707 * the horisontal doubling for free (almost).
    708 */
    709static void
    710Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
    711                       unsigned char *lum, unsigned char *cr,
    712                       unsigned char *cb, unsigned char *out,
    713                       int rows, int cols, int mod)
    714{
    715    unsigned int *row = (unsigned int *) out;
    716    const int next_row = cols + (mod / 2);
    717    int x, y;
    718    int cr_r;
    719    int crb_g;
    720    int cb_b;
    721    int cols_2 = cols / 2;
    722
    723    y = rows;
    724    while (y--) {
    725        x = cols_2;
    726        while (x--) {
    727            register int L;
    728
    729            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
    730            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
    731                + colortab[*cb + 2 * 256];
    732            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
    733            cr += 4;
    734            cb += 4;
    735
    736            L = *lum;
    737            lum += 2;
    738            row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
    739                                      rgb_2_pix[L + crb_g] |
    740                                      rgb_2_pix[L + cb_b]);
    741            row++;
    742
    743            L = *lum;
    744            lum += 2;
    745            row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
    746                                      rgb_2_pix[L + crb_g] |
    747                                      rgb_2_pix[L + cb_b]);
    748            row++;
    749
    750        }
    751        row += next_row;
    752    }
    753}
    754
    755static void
    756Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
    757                       unsigned char *lum, unsigned char *cr,
    758                       unsigned char *cb, unsigned char *out,
    759                       int rows, int cols, int mod)
    760{
    761    unsigned int value;
    762    unsigned char *row = out;
    763    const int next_row = (cols * 2 + mod) * 3;
    764    int x, y;
    765    int cr_r;
    766    int crb_g;
    767    int cb_b;
    768    int cols_2 = cols / 2;
    769    y = rows;
    770    while (y--) {
    771        x = cols_2;
    772        while (x--) {
    773            register int L;
    774
    775            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
    776            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
    777                + colortab[*cb + 2 * 256];
    778            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
    779            cr += 4;
    780            cb += 4;
    781
    782            L = *lum;
    783            lum += 2;
    784            value = (rgb_2_pix[L + cr_r] |
    785                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    786            row[0 + 0] = row[3 + 0] = row[next_row + 0] =
    787                row[next_row + 3 + 0] = (value) & 0xFF;
    788            row[0 + 1] = row[3 + 1] = row[next_row + 1] =
    789                row[next_row + 3 + 1] = (value >> 8) & 0xFF;
    790            row[0 + 2] = row[3 + 2] = row[next_row + 2] =
    791                row[next_row + 3 + 2] = (value >> 16) & 0xFF;
    792            row += 2 * 3;
    793
    794            L = *lum;
    795            lum += 2;
    796            value = (rgb_2_pix[L + cr_r] |
    797                     rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    798            row[0 + 0] = row[3 + 0] = row[next_row + 0] =
    799                row[next_row + 3 + 0] = (value) & 0xFF;
    800            row[0 + 1] = row[3 + 1] = row[next_row + 1] =
    801                row[next_row + 3 + 1] = (value >> 8) & 0xFF;
    802            row[0 + 2] = row[3 + 2] = row[next_row + 2] =
    803                row[next_row + 3 + 2] = (value >> 16) & 0xFF;
    804            row += 2 * 3;
    805
    806        }
    807        row += next_row;
    808    }
    809}
    810
    811static void
    812Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
    813                       unsigned char *lum, unsigned char *cr,
    814                       unsigned char *cb, unsigned char *out,
    815                       int rows, int cols, int mod)
    816{
    817    unsigned int *row = (unsigned int *) out;
    818    const int next_row = cols * 2 + mod;
    819    int x, y;
    820    int cr_r;
    821    int crb_g;
    822    int cb_b;
    823    int cols_2 = cols / 2;
    824    mod += mod;
    825    y = rows;
    826    while (y--) {
    827        x = cols_2;
    828        while (x--) {
    829            register int L;
    830
    831            cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
    832            crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
    833                + colortab[*cb + 2 * 256];
    834            cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
    835            cr += 4;
    836            cb += 4;
    837
    838            L = *lum;
    839            lum += 2;
    840            row[0] = row[1] = row[next_row] = row[next_row + 1] =
    841                (rgb_2_pix[L + cr_r] |
    842                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    843            row += 2;
    844
    845            L = *lum;
    846            lum += 2;
    847            row[0] = row[1] = row[next_row] = row[next_row + 1] =
    848                (rgb_2_pix[L + cr_r] |
    849                 rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
    850            row += 2;
    851
    852
    853        }
    854
    855        row += next_row;
    856    }
    857}
    858
    859/*
    860 * How many 1 bits are there in the Uint32.
    861 * Low performance, do not call often.
    862 */
    863static int
    864number_of_bits_set(Uint32 a)
    865{
    866    if (!a)
    867        return 0;
    868    if (a & 1)
    869        return 1 + number_of_bits_set(a >> 1);
    870    return (number_of_bits_set(a >> 1));
    871}
    872
    873/*
    874 * How many 0 bits are there at least significant end of Uint32.
    875 * Low performance, do not call often.
    876 */
    877static int
    878free_bits_at_bottom(Uint32 a)
    879{
    880    /* assume char is 8 bits */
    881    if (!a)
    882        return sizeof(Uint32) * 8;
    883    if (((Sint32) a) & 1l)
    884        return 0;
    885    return 1 + free_bits_at_bottom(a >> 1);
    886}
    887
    888static int
    889SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
    890{
    891    Uint32 *r_2_pix_alloc;
    892    Uint32 *g_2_pix_alloc;
    893    Uint32 *b_2_pix_alloc;
    894    int i;
    895    int bpp;
    896    Uint32 Rmask, Gmask, Bmask, Amask;
    897
    898    if (!SDL_PixelFormatEnumToMasks
    899        (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
    900        return SDL_SetError("Unsupported YUV destination format");
    901    }
    902
    903    swdata->target_format = target_format;
    904    r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
    905    g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
    906    b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
    907
    908    /*
    909     * Set up entries 0-255 in rgb-to-pixel value tables.
    910     */
    911    for (i = 0; i < 256; ++i) {
    912        r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
    913        r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask);
    914        r_2_pix_alloc[i + 256] |= Amask;
    915        g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
    916        g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask);
    917        g_2_pix_alloc[i + 256] |= Amask;
    918        b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
    919        b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask);
    920        b_2_pix_alloc[i + 256] |= Amask;
    921    }
    922
    923    /*
    924     * If we have 16-bit output depth, then we double the value
    925     * in the top word. This means that we can write out both
    926     * pixels in the pixel doubling mode with one op. It is
    927     * harmless in the normal case as storing a 32-bit value
    928     * through a short pointer will lose the top bits anyway.
    929     */
    930    if (SDL_BYTESPERPIXEL(target_format) == 2) {
    931        for (i = 0; i < 256; ++i) {
    932            r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
    933            g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
    934            b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
    935        }
    936    }
    937
    938    /*
    939     * Spread out the values we have to the rest of the array so that
    940     * we do not need to check for overflow.
    941     */
    942    for (i = 0; i < 256; ++i) {
    943        r_2_pix_alloc[i] = r_2_pix_alloc[256];
    944        r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
    945        g_2_pix_alloc[i] = g_2_pix_alloc[256];
    946        g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
    947        b_2_pix_alloc[i] = b_2_pix_alloc[256];
    948        b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
    949    }
    950
    951    /* You have chosen wisely... */
    952    switch (swdata->format) {
    953    case SDL_PIXELFORMAT_YV12:
    954    case SDL_PIXELFORMAT_IYUV:
    955        if (SDL_BYTESPERPIXEL(target_format) == 2) {
    956#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
    957            /* inline assembly functions */
    958            if (SDL_HasMMX() && (Rmask == 0xF800) &&
    959                (Gmask == 0x07E0) && (Bmask == 0x001F)
    960                && (swdata->w & 15) == 0) {
    961/* printf("Using MMX 16-bit 565 dither\n"); */
    962                swdata->Display1X = Color565DitherYV12MMX1X;
    963            } else {
    964/* printf("Using C 16-bit dither\n"); */
    965                swdata->Display1X = Color16DitherYV12Mod1X;
    966            }
    967#else
    968            swdata->Display1X = Color16DitherYV12Mod1X;
    969#endif
    970            swdata->Display2X = Color16DitherYV12Mod2X;
    971        }
    972        if (SDL_BYTESPERPIXEL(target_format) == 3) {
    973            swdata->Display1X = Color24DitherYV12Mod1X;
    974            swdata->Display2X = Color24DitherYV12Mod2X;
    975        }
    976        if (SDL_BYTESPERPIXEL(target_format) == 4) {
    977#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
    978            /* inline assembly functions */
    979            if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
    980                (Gmask == 0x0000FF00) &&
    981                (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
    982/* printf("Using MMX 32-bit dither\n"); */
    983                swdata->Display1X = ColorRGBDitherYV12MMX1X;
    984            } else {
    985/* printf("Using C 32-bit dither\n"); */
    986                swdata->Display1X = Color32DitherYV12Mod1X;
    987            }
    988#else
    989            swdata->Display1X = Color32DitherYV12Mod1X;
    990#endif
    991            swdata->Display2X = Color32DitherYV12Mod2X;
    992        }
    993        break;
    994    case SDL_PIXELFORMAT_YUY2:
    995    case SDL_PIXELFORMAT_UYVY:
    996    case SDL_PIXELFORMAT_YVYU:
    997        if (SDL_BYTESPERPIXEL(target_format) == 2) {
    998            swdata->Display1X = Color16DitherYUY2Mod1X;
    999            swdata->Display2X = Color16DitherYUY2Mod2X;
   1000        }
   1001        if (SDL_BYTESPERPIXEL(target_format) == 3) {
   1002            swdata->Display1X = Color24DitherYUY2Mod1X;
   1003            swdata->Display2X = Color24DitherYUY2Mod2X;
   1004        }
   1005        if (SDL_BYTESPERPIXEL(target_format) == 4) {
   1006            swdata->Display1X = Color32DitherYUY2Mod1X;
   1007            swdata->Display2X = Color32DitherYUY2Mod2X;
   1008        }
   1009        break;
   1010    default:
   1011        /* We should never get here (caught above) */
   1012        break;
   1013    }
   1014
   1015    SDL_FreeSurface(swdata->display);
   1016    swdata->display = NULL;
   1017    return 0;
   1018}
   1019
   1020SDL_SW_YUVTexture *
   1021SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
   1022{
   1023    SDL_SW_YUVTexture *swdata;
   1024    int *Cr_r_tab;
   1025    int *Cr_g_tab;
   1026    int *Cb_g_tab;
   1027    int *Cb_b_tab;
   1028    int i;
   1029    int CR, CB;
   1030
   1031    switch (format) {
   1032    case SDL_PIXELFORMAT_YV12:
   1033    case SDL_PIXELFORMAT_IYUV:
   1034    case SDL_PIXELFORMAT_YUY2:
   1035    case SDL_PIXELFORMAT_UYVY:
   1036    case SDL_PIXELFORMAT_YVYU:
   1037        break;
   1038    default:
   1039        SDL_SetError("Unsupported YUV format");
   1040        return NULL;
   1041    }
   1042
   1043    swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
   1044    if (!swdata) {
   1045        SDL_OutOfMemory();
   1046        return NULL;
   1047    }
   1048
   1049    swdata->format = format;
   1050    swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
   1051    swdata->w = w;
   1052    swdata->h = h;
   1053    swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2);
   1054    swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
   1055    swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
   1056    if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
   1057        SDL_SW_DestroyYUVTexture(swdata);
   1058        SDL_OutOfMemory();
   1059        return NULL;
   1060    }
   1061
   1062    /* Generate the tables for the display surface */
   1063    Cr_r_tab = &swdata->colortab[0 * 256];
   1064    Cr_g_tab = &swdata->colortab[1 * 256];
   1065    Cb_g_tab = &swdata->colortab[2 * 256];
   1066    Cb_b_tab = &swdata->colortab[3 * 256];
   1067    for (i = 0; i < 256; i++) {
   1068        /* Gamma correction (luminescence table) and chroma correction
   1069           would be done here.  See the Berkeley mpeg_play sources.
   1070         */
   1071        CB = CR = (i - 128);
   1072        Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
   1073        Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
   1074        Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
   1075        Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
   1076    }
   1077
   1078    /* Find the pitch and offset values for the overlay */
   1079    switch (format) {
   1080    case SDL_PIXELFORMAT_YV12:
   1081    case SDL_PIXELFORMAT_IYUV:
   1082        swdata->pitches[0] = w;
   1083        swdata->pitches[1] = swdata->pitches[0] / 2;
   1084        swdata->pitches[2] = swdata->pitches[0] / 2;
   1085        swdata->planes[0] = swdata->pixels;
   1086        swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
   1087        swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2;
   1088        break;
   1089    case SDL_PIXELFORMAT_YUY2:
   1090    case SDL_PIXELFORMAT_UYVY:
   1091    case SDL_PIXELFORMAT_YVYU:
   1092        swdata->pitches[0] = w * 2;
   1093        swdata->planes[0] = swdata->pixels;
   1094        break;
   1095    default:
   1096        SDL_assert(0 && "We should never get here (caught above)");
   1097        break;
   1098    }
   1099
   1100    /* We're all done.. */
   1101    return (swdata);
   1102}
   1103
   1104int
   1105SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
   1106                             int *pitch)
   1107{
   1108    *pixels = swdata->planes[0];
   1109    *pitch = swdata->pitches[0];
   1110    return 0;
   1111}
   1112
   1113int
   1114SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
   1115                        const void *pixels, int pitch)
   1116{
   1117    switch (swdata->format) {
   1118    case SDL_PIXELFORMAT_YV12:
   1119    case SDL_PIXELFORMAT_IYUV:
   1120        if (rect->x == 0 && rect->y == 0 &&
   1121            rect->w == swdata->w && rect->h == swdata->h) {
   1122                SDL_memcpy(swdata->pixels, pixels,
   1123                           (swdata->h * swdata->w) + (swdata->h * swdata->w) / 2);
   1124        } else {
   1125            Uint8 *src, *dst;
   1126            int row;
   1127            size_t length;
   1128
   1129            /* Copy the Y plane */
   1130            src = (Uint8 *) pixels;
   1131            dst = swdata->pixels + rect->y * swdata->w + rect->x;
   1132            length = rect->w;
   1133            for (row = 0; row < rect->h; ++row) {
   1134                SDL_memcpy(dst, src, length);
   1135                src += pitch;
   1136                dst += swdata->w;
   1137            }
   1138
   1139            /* Copy the next plane */
   1140            src = (Uint8 *) pixels + rect->h * pitch;
   1141            dst = swdata->pixels + swdata->h * swdata->w;
   1142            dst += rect->y/2 * swdata->w/2 + rect->x/2;
   1143            length = rect->w / 2;
   1144            for (row = 0; row < rect->h/2; ++row) {
   1145                SDL_memcpy(dst, src, length);
   1146                src += pitch/2;
   1147                dst += swdata->w/2;
   1148            }
   1149
   1150            /* Copy the next plane */
   1151            src = (Uint8 *) pixels + rect->h * pitch + (rect->h * pitch) / 4;
   1152            dst = swdata->pixels + swdata->h * swdata->w +
   1153                  (swdata->h * swdata->w) / 4;
   1154            dst += rect->y/2 * swdata->w/2 + rect->x/2;
   1155            length = rect->w / 2;
   1156            for (row = 0; row < rect->h/2; ++row) {
   1157                SDL_memcpy(dst, src, length);
   1158                src += pitch/2;
   1159                dst += swdata->w/2;
   1160            }
   1161        }
   1162        break;
   1163    case SDL_PIXELFORMAT_YUY2:
   1164    case SDL_PIXELFORMAT_UYVY:
   1165    case SDL_PIXELFORMAT_YVYU:
   1166        {
   1167            Uint8 *src, *dst;
   1168            int row;
   1169            size_t length;
   1170
   1171            src = (Uint8 *) pixels;
   1172            dst =
   1173                swdata->planes[0] + rect->y * swdata->pitches[0] +
   1174                rect->x * 2;
   1175            length = rect->w * 2;
   1176            for (row = 0; row < rect->h; ++row) {
   1177                SDL_memcpy(dst, src, length);
   1178                src += pitch;
   1179                dst += swdata->pitches[0];
   1180            }
   1181        }
   1182        break;
   1183    }
   1184    return 0;
   1185}
   1186
   1187int
   1188SDL_SW_UpdateYUVTexturePlanar(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
   1189                              const Uint8 *Yplane, int Ypitch,
   1190                              const Uint8 *Uplane, int Upitch,
   1191                              const Uint8 *Vplane, int Vpitch)
   1192{
   1193    const Uint8 *src;
   1194    Uint8 *dst;
   1195    int row;
   1196    size_t length;
   1197
   1198    /* Copy the Y plane */
   1199    src = Yplane;
   1200    dst = swdata->pixels + rect->y * swdata->w + rect->x;
   1201    length = rect->w;
   1202    for (row = 0; row < rect->h; ++row) {
   1203        SDL_memcpy(dst, src, length);
   1204        src += Ypitch;
   1205        dst += swdata->w;
   1206    }
   1207
   1208    /* Copy the U plane */
   1209    src = Uplane;
   1210    if (swdata->format == SDL_PIXELFORMAT_IYUV) {
   1211        dst = swdata->pixels + swdata->h * swdata->w;
   1212    } else {
   1213        dst = swdata->pixels + swdata->h * swdata->w +
   1214              (swdata->h * swdata->w) / 4;
   1215    }
   1216    dst += rect->y/2 * swdata->w/2 + rect->x/2;
   1217    length = rect->w / 2;
   1218    for (row = 0; row < rect->h/2; ++row) {
   1219        SDL_memcpy(dst, src, length);
   1220        src += Upitch;
   1221        dst += swdata->w/2;
   1222    }
   1223
   1224    /* Copy the V plane */
   1225    src = Vplane;
   1226    if (swdata->format == SDL_PIXELFORMAT_YV12) {
   1227        dst = swdata->pixels + swdata->h * swdata->w;
   1228    } else {
   1229        dst = swdata->pixels + swdata->h * swdata->w +
   1230              (swdata->h * swdata->w) / 4;
   1231    }
   1232    dst += rect->y/2 * swdata->w/2 + rect->x/2;
   1233    length = rect->w / 2;
   1234    for (row = 0; row < rect->h/2; ++row) {
   1235        SDL_memcpy(dst, src, length);
   1236        src += Vpitch;
   1237        dst += swdata->w/2;
   1238    }
   1239    return 0;
   1240}
   1241
   1242int
   1243SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
   1244                      void **pixels, int *pitch)
   1245{
   1246    switch (swdata->format) {
   1247    case SDL_PIXELFORMAT_YV12:
   1248    case SDL_PIXELFORMAT_IYUV:
   1249        if (rect
   1250            && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
   1251                || rect->h != swdata->h)) {
   1252            return SDL_SetError
   1253                ("YV12 and IYUV textures only support full surface locks");
   1254        }
   1255        break;
   1256    }
   1257
   1258    if (rect) {
   1259        *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
   1260    } else {
   1261        *pixels = swdata->planes[0];
   1262    }
   1263    *pitch = swdata->pitches[0];
   1264    return 0;
   1265}
   1266
   1267void
   1268SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
   1269{
   1270}
   1271
   1272int
   1273SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
   1274                    Uint32 target_format, int w, int h, void *pixels,
   1275                    int pitch)
   1276{
   1277    const int targetbpp = SDL_BYTESPERPIXEL(target_format);
   1278    int stretch;
   1279    int scale_2x;
   1280    Uint8 *lum, *Cr, *Cb;
   1281    int mod;
   1282
   1283    if (targetbpp == 0) {
   1284        return SDL_SetError("Invalid target pixel format");
   1285    }
   1286
   1287    /* Make sure we're set up to display in the desired format */
   1288    if (target_format != swdata->target_format) {
   1289        if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
   1290            return -1;
   1291        }
   1292    }
   1293
   1294    stretch = 0;
   1295    scale_2x = 0;
   1296    if (srcrect->x || srcrect->y || srcrect->w < swdata->w
   1297        || srcrect->h < swdata->h) {
   1298        /* The source rectangle has been clipped.
   1299           Using a scratch surface is easier than adding clipped
   1300           source support to all the blitters, plus that would
   1301           slow them down in the general unclipped case.
   1302         */
   1303        stretch = 1;
   1304    } else if ((srcrect->w != w) || (srcrect->h != h)) {
   1305        if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
   1306            scale_2x = 1;
   1307        } else {
   1308            stretch = 1;
   1309        }
   1310    }
   1311    if (stretch) {
   1312        int bpp;
   1313        Uint32 Rmask, Gmask, Bmask, Amask;
   1314
   1315        if (swdata->display) {
   1316            swdata->display->w = w;
   1317            swdata->display->h = h;
   1318            swdata->display->pixels = pixels;
   1319            swdata->display->pitch = pitch;
   1320        } else {
   1321            /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
   1322            SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
   1323                                       &Bmask, &Amask);
   1324            swdata->display =
   1325                SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
   1326                                         Gmask, Bmask, Amask);
   1327            if (!swdata->display) {
   1328                return (-1);
   1329            }
   1330        }
   1331        if (!swdata->stretch) {
   1332            /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
   1333            SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
   1334                                       &Bmask, &Amask);
   1335            swdata->stretch =
   1336                SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
   1337                                     Gmask, Bmask, Amask);
   1338            if (!swdata->stretch) {
   1339                return (-1);
   1340            }
   1341        }
   1342        pixels = swdata->stretch->pixels;
   1343        pitch = swdata->stretch->pitch;
   1344    }
   1345    switch (swdata->format) {
   1346    case SDL_PIXELFORMAT_YV12:
   1347        lum = swdata->planes[0];
   1348        Cr = swdata->planes[1];
   1349        Cb = swdata->planes[2];
   1350        break;
   1351    case SDL_PIXELFORMAT_IYUV:
   1352        lum = swdata->planes[0];
   1353        Cr = swdata->planes[2];
   1354        Cb = swdata->planes[1];
   1355        break;
   1356    case SDL_PIXELFORMAT_YUY2:
   1357        lum = swdata->planes[0];
   1358        Cr = lum + 3;
   1359        Cb = lum + 1;
   1360        break;
   1361    case SDL_PIXELFORMAT_UYVY:
   1362        lum = swdata->planes[0] + 1;
   1363        Cr = lum + 1;
   1364        Cb = lum - 1;
   1365        break;
   1366    case SDL_PIXELFORMAT_YVYU:
   1367        lum = swdata->planes[0];
   1368        Cr = lum + 1;
   1369        Cb = lum + 3;
   1370        break;
   1371    default:
   1372        return SDL_SetError("Unsupported YUV format in copy");
   1373    }
   1374    mod = (pitch / targetbpp);
   1375
   1376    if (scale_2x) {
   1377        mod -= (swdata->w * 2);
   1378        swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
   1379                          lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
   1380    } else {
   1381        mod -= swdata->w;
   1382        swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
   1383                          lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
   1384    }
   1385    if (stretch) {
   1386        SDL_Rect rect = *srcrect;
   1387        SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
   1388    }
   1389    return 0;
   1390}
   1391
   1392void
   1393SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
   1394{
   1395    if (swdata) {
   1396        SDL_free(swdata->pixels);
   1397        SDL_free(swdata->colortab);
   1398        SDL_free(swdata->rgb_2_pix);
   1399        SDL_FreeSurface(swdata->stretch);
   1400        SDL_FreeSurface(swdata->display);
   1401        SDL_free(swdata);
   1402    }
   1403}
   1404
   1405/* vi: set ts=4 sw=4 expandtab: */