SDL_blit_A.c (43167B)
1/* 2 Simple DirectMedia Layer 3 Copyright (C) 1997-2014 Sam Lantinga <slouken@libsdl.org> 4 5 This software is provided 'as-is', without any express or implied 6 warranty. In no event will the authors be held liable for any damages 7 arising from the use of this software. 8 9 Permission is granted to anyone to use this software for any purpose, 10 including commercial applications, and to alter it and redistribute it 11 freely, subject to the following restrictions: 12 13 1. The origin of this software must not be misrepresented; you must not 14 claim that you wrote the original software. If you use this software 15 in a product, an acknowledgment in the product documentation would be 16 appreciated but is not required. 17 2. Altered source versions must be plainly marked as such, and must not be 18 misrepresented as being the original software. 19 3. This notice may not be removed or altered from any source distribution. 20*/ 21#include "../SDL_internal.h" 22 23#include "SDL_video.h" 24#include "SDL_blit.h" 25 26/* Functions to perform alpha blended blitting */ 27 28/* N->1 blending with per-surface alpha */ 29static void 30BlitNto1SurfaceAlpha(SDL_BlitInfo * info) 31{ 32 int width = info->dst_w; 33 int height = info->dst_h; 34 Uint8 *src = info->src; 35 int srcskip = info->src_skip; 36 Uint8 *dst = info->dst; 37 int dstskip = info->dst_skip; 38 Uint8 *palmap = info->table; 39 SDL_PixelFormat *srcfmt = info->src_fmt; 40 SDL_PixelFormat *dstfmt = info->dst_fmt; 41 int srcbpp = srcfmt->BytesPerPixel; 42 Uint32 Pixel; 43 unsigned sR, sG, sB; 44 unsigned dR, dG, dB; 45 const unsigned A = info->a; 46 47 while (height--) { 48 /* *INDENT-OFF* */ 49 DUFFS_LOOP4( 50 { 51 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); 52 dR = dstfmt->palette->colors[*dst].r; 53 dG = dstfmt->palette->colors[*dst].g; 54 dB = dstfmt->palette->colors[*dst].b; 55 ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB); 56 dR &= 0xff; 57 dG &= 0xff; 58 dB &= 0xff; 59 /* Pack RGB into 8bit pixel */ 60 if ( palmap == NULL ) { 61 *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0)); 62 } else { 63 *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))]; 64 } 65 dst++; 66 src += srcbpp; 67 }, 68 width); 69 /* *INDENT-ON* */ 70 src += srcskip; 71 dst += dstskip; 72 } 73} 74 75/* N->1 blending with pixel alpha */ 76static void 77BlitNto1PixelAlpha(SDL_BlitInfo * info) 78{ 79 int width = info->dst_w; 80 int height = info->dst_h; 81 Uint8 *src = info->src; 82 int srcskip = info->src_skip; 83 Uint8 *dst = info->dst; 84 int dstskip = info->dst_skip; 85 Uint8 *palmap = info->table; 86 SDL_PixelFormat *srcfmt = info->src_fmt; 87 SDL_PixelFormat *dstfmt = info->dst_fmt; 88 int srcbpp = srcfmt->BytesPerPixel; 89 Uint32 Pixel; 90 unsigned sR, sG, sB, sA; 91 unsigned dR, dG, dB; 92 93 while (height--) { 94 /* *INDENT-OFF* */ 95 DUFFS_LOOP4( 96 { 97 DISEMBLE_RGBA(src,srcbpp,srcfmt,Pixel,sR,sG,sB,sA); 98 dR = dstfmt->palette->colors[*dst].r; 99 dG = dstfmt->palette->colors[*dst].g; 100 dB = dstfmt->palette->colors[*dst].b; 101 ALPHA_BLEND_RGB(sR, sG, sB, sA, dR, dG, dB); 102 dR &= 0xff; 103 dG &= 0xff; 104 dB &= 0xff; 105 /* Pack RGB into 8bit pixel */ 106 if ( palmap == NULL ) { 107 *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0)); 108 } else { 109 *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))]; 110 } 111 dst++; 112 src += srcbpp; 113 }, 114 width); 115 /* *INDENT-ON* */ 116 src += srcskip; 117 dst += dstskip; 118 } 119} 120 121/* colorkeyed N->1 blending with per-surface alpha */ 122static void 123BlitNto1SurfaceAlphaKey(SDL_BlitInfo * info) 124{ 125 int width = info->dst_w; 126 int height = info->dst_h; 127 Uint8 *src = info->src; 128 int srcskip = info->src_skip; 129 Uint8 *dst = info->dst; 130 int dstskip = info->dst_skip; 131 Uint8 *palmap = info->table; 132 SDL_PixelFormat *srcfmt = info->src_fmt; 133 SDL_PixelFormat *dstfmt = info->dst_fmt; 134 int srcbpp = srcfmt->BytesPerPixel; 135 Uint32 ckey = info->colorkey; 136 Uint32 Pixel; 137 unsigned sR, sG, sB; 138 unsigned dR, dG, dB; 139 const unsigned A = info->a; 140 141 while (height--) { 142 /* *INDENT-OFF* */ 143 DUFFS_LOOP( 144 { 145 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); 146 if ( Pixel != ckey ) { 147 dR = dstfmt->palette->colors[*dst].r; 148 dG = dstfmt->palette->colors[*dst].g; 149 dB = dstfmt->palette->colors[*dst].b; 150 ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB); 151 dR &= 0xff; 152 dG &= 0xff; 153 dB &= 0xff; 154 /* Pack RGB into 8bit pixel */ 155 if ( palmap == NULL ) { 156 *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0)); 157 } else { 158 *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))]; 159 } 160 } 161 dst++; 162 src += srcbpp; 163 }, 164 width); 165 /* *INDENT-ON* */ 166 src += srcskip; 167 dst += dstskip; 168 } 169} 170 171#ifdef __MMX__ 172 173/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ 174static void 175BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo * info) 176{ 177 int width = info->dst_w; 178 int height = info->dst_h; 179 Uint32 *srcp = (Uint32 *) info->src; 180 int srcskip = info->src_skip >> 2; 181 Uint32 *dstp = (Uint32 *) info->dst; 182 int dstskip = info->dst_skip >> 2; 183 Uint32 dalpha = info->dst_fmt->Amask; 184 185 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta; 186 187 hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */ 188 lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */ 189 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ 190 191 while (height--) { 192 int n = width; 193 if (n & 1) { 194 Uint32 s = *srcp++; 195 Uint32 d = *dstp; 196 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) 197 + (s & d & 0x00010101)) | dalpha; 198 n--; 199 } 200 201 for (n >>= 1; n > 0; --n) { 202 dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */ 203 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ 204 205 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */ 206 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ 207 208 dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */ 209 src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */ 210 src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */ 211 src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */ 212 213 dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */ 214 dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */ 215 dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */ 216 dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */ 217 218 *(__m64 *) dstp = dst1; /* dst1 -> 2 x dst pixels */ 219 dstp += 2; 220 srcp += 2; 221 } 222 223 srcp += srcskip; 224 dstp += dstskip; 225 } 226 _mm_empty(); 227} 228 229/* fast RGB888->(A)RGB888 blending with surface alpha */ 230static void 231BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo * info) 232{ 233 SDL_PixelFormat *df = info->dst_fmt; 234 Uint32 chanmask; 235 unsigned alpha = info->a; 236 237 if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { 238 /* only call a128 version when R,G,B occupy lower bits */ 239 BlitRGBtoRGBSurfaceAlpha128MMX(info); 240 } else { 241 int width = info->dst_w; 242 int height = info->dst_h; 243 Uint32 *srcp = (Uint32 *) info->src; 244 int srcskip = info->src_skip >> 2; 245 Uint32 *dstp = (Uint32 *) info->dst; 246 int dstskip = info->dst_skip >> 2; 247 Uint32 dalpha = df->Amask; 248 Uint32 amult; 249 250 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta; 251 252 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ 253 /* form the alpha mult */ 254 amult = alpha | (alpha << 8); 255 amult = amult | (amult << 16); 256 chanmask = 257 (0xff << df->Rshift) | (0xff << df-> 258 Gshift) | (0xff << df->Bshift); 259 mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */ 260 mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */ 261 /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */ 262 dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ 263 264 while (height--) { 265 int n = width; 266 if (n & 1) { 267 /* One Pixel Blend */ 268 src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB) */ 269 src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */ 270 271 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */ 272 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ 273 274 src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */ 275 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 276 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ 277 dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */ 278 279 dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ 280 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ 281 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ 282 283 ++srcp; 284 ++dstp; 285 286 n--; 287 } 288 289 for (n >>= 1; n > 0; --n) { 290 /* Two Pixels Blend */ 291 src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */ 292 src2 = src1; /* 2 x src -> src2(ARGBARGB) */ 293 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */ 294 src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */ 295 296 dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */ 297 dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ 298 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */ 299 dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */ 300 301 src1 = _mm_sub_pi16(src1, dst1); /* src1 - dst1 -> src1 */ 302 src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */ 303 src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */ 304 dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */ 305 306 src2 = _mm_sub_pi16(src2, dst2); /* src2 - dst2 -> src2 */ 307 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 308 src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ 309 dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */ 310 311 dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */ 312 dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ 313 314 *(__m64 *) dstp = dst1; /* dst1 -> 2 x pixel */ 315 316 srcp += 2; 317 dstp += 2; 318 } 319 srcp += srcskip; 320 dstp += dstskip; 321 } 322 _mm_empty(); 323 } 324} 325 326/* fast ARGB888->(A)RGB888 blending with pixel alpha */ 327static void 328BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info) 329{ 330 int width = info->dst_w; 331 int height = info->dst_h; 332 Uint32 *srcp = (Uint32 *) info->src; 333 int srcskip = info->src_skip >> 2; 334 Uint32 *dstp = (Uint32 *) info->dst; 335 int dstskip = info->dst_skip >> 2; 336 SDL_PixelFormat *sf = info->src_fmt; 337 Uint32 amask = sf->Amask; 338 Uint32 ashift = sf->Ashift; 339 Uint64 multmask, multmask2; 340 341 __m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2; 342 343 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ 344 multmask = 0x00FF; 345 multmask <<= (ashift * 2); 346 multmask2 = 0x00FF00FF00FF00FFULL; 347 348 while (height--) { 349 /* *INDENT-OFF* */ 350 DUFFS_LOOP4({ 351 Uint32 alpha = *srcp & amask; 352 if (alpha == 0) { 353 /* do nothing */ 354 } else if (alpha == amask) { 355 *dstp = *srcp; 356 } else { 357 src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB) */ 358 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */ 359 360 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */ 361 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ 362 363 mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */ 364 mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */ 365 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ 366 mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha2 */ 367 mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask); /* 0F0A0A0A -> mm_alpha */ 368 mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2); /* 255 - mm_alpha -> mm_alpha */ 369 370 /* blend */ 371 src1 = _mm_mullo_pi16(src1, mm_alpha); 372 src1 = _mm_srli_pi16(src1, 8); 373 dst1 = _mm_mullo_pi16(dst1, mm_alpha2); 374 dst1 = _mm_srli_pi16(dst1, 8); 375 dst1 = _mm_add_pi16(src1, dst1); 376 dst1 = _mm_packs_pu16(dst1, mm_zero); 377 378 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ 379 } 380 ++srcp; 381 ++dstp; 382 }, width); 383 /* *INDENT-ON* */ 384 srcp += srcskip; 385 dstp += dstskip; 386 } 387 _mm_empty(); 388} 389 390#endif /* __MMX__ */ 391 392/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ 393static void 394BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info) 395{ 396 int width = info->dst_w; 397 int height = info->dst_h; 398 Uint32 *srcp = (Uint32 *) info->src; 399 int srcskip = info->src_skip >> 2; 400 Uint32 *dstp = (Uint32 *) info->dst; 401 int dstskip = info->dst_skip >> 2; 402 403 while (height--) { 404 /* *INDENT-OFF* */ 405 DUFFS_LOOP4({ 406 Uint32 s = *srcp++; 407 Uint32 d = *dstp; 408 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) 409 + (s & d & 0x00010101)) | 0xff000000; 410 }, width); 411 /* *INDENT-ON* */ 412 srcp += srcskip; 413 dstp += dstskip; 414 } 415} 416 417/* fast RGB888->(A)RGB888 blending with surface alpha */ 418static void 419BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo * info) 420{ 421 unsigned alpha = info->a; 422 if (alpha == 128) { 423 BlitRGBtoRGBSurfaceAlpha128(info); 424 } else { 425 int width = info->dst_w; 426 int height = info->dst_h; 427 Uint32 *srcp = (Uint32 *) info->src; 428 int srcskip = info->src_skip >> 2; 429 Uint32 *dstp = (Uint32 *) info->dst; 430 int dstskip = info->dst_skip >> 2; 431 Uint32 s; 432 Uint32 d; 433 Uint32 s1; 434 Uint32 d1; 435 436 while (height--) { 437 /* *INDENT-OFF* */ 438 DUFFS_LOOP4({ 439 s = *srcp; 440 d = *dstp; 441 s1 = s & 0xff00ff; 442 d1 = d & 0xff00ff; 443 d1 = (d1 + ((s1 - d1) * alpha >> 8)) 444 & 0xff00ff; 445 s &= 0xff00; 446 d &= 0xff00; 447 d = (d + ((s - d) * alpha >> 8)) & 0xff00; 448 *dstp = d1 | d | 0xff000000; 449 ++srcp; 450 ++dstp; 451 }, width); 452 /* *INDENT-ON* */ 453 srcp += srcskip; 454 dstp += dstskip; 455 } 456 } 457} 458 459/* fast ARGB888->(A)RGB888 blending with pixel alpha */ 460static void 461BlitRGBtoRGBPixelAlpha(SDL_BlitInfo * info) 462{ 463 int width = info->dst_w; 464 int height = info->dst_h; 465 Uint32 *srcp = (Uint32 *) info->src; 466 int srcskip = info->src_skip >> 2; 467 Uint32 *dstp = (Uint32 *) info->dst; 468 int dstskip = info->dst_skip >> 2; 469 470 while (height--) { 471 /* *INDENT-OFF* */ 472 DUFFS_LOOP4({ 473 Uint32 dalpha; 474 Uint32 d; 475 Uint32 s1; 476 Uint32 d1; 477 Uint32 s = *srcp; 478 Uint32 alpha = s >> 24; 479 /* FIXME: Here we special-case opaque alpha since the 480 compositioning used (>>8 instead of /255) doesn't handle 481 it correctly. Also special-case alpha=0 for speed? 482 Benchmark this! */ 483 if (alpha) { 484 if (alpha == SDL_ALPHA_OPAQUE) { 485 *dstp = *srcp; 486 } else { 487 /* 488 * take out the middle component (green), and process 489 * the other two in parallel. One multiply less. 490 */ 491 d = *dstp; 492 dalpha = d >> 24; 493 s1 = s & 0xff00ff; 494 d1 = d & 0xff00ff; 495 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; 496 s &= 0xff00; 497 d &= 0xff00; 498 d = (d + ((s - d) * alpha >> 8)) & 0xff00; 499 dalpha = alpha + (dalpha * (alpha ^ 0xFF) >> 8); 500 *dstp = d1 | d | (dalpha << 24); 501 } 502 } 503 ++srcp; 504 ++dstp; 505 }, width); 506 /* *INDENT-ON* */ 507 srcp += srcskip; 508 dstp += dstskip; 509 } 510} 511 512#ifdef __3dNOW__ 513/* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ 514static void 515BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info) 516{ 517 int width = info->dst_w; 518 int height = info->dst_h; 519 Uint32 *srcp = (Uint32 *) info->src; 520 int srcskip = info->src_skip >> 2; 521 Uint32 *dstp = (Uint32 *) info->dst; 522 int dstskip = info->dst_skip >> 2; 523 SDL_PixelFormat *sf = info->src_fmt; 524 Uint32 amask = sf->Amask; 525 Uint32 ashift = sf->Ashift; 526 Uint64 multmask, multmask2; 527 528 __m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2; 529 530 mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ 531 multmask = 0x00FF; 532 multmask <<= (ashift * 2); 533 multmask2 = 0x00FF00FF00FF00FFULL; 534 535 while (height--) { 536 /* *INDENT-OFF* */ 537 DUFFS_LOOP4({ 538 Uint32 alpha; 539 540 _m_prefetch(srcp + 16); 541 _m_prefetch(dstp + 16); 542 543 alpha = *srcp & amask; 544 if (alpha == 0) { 545 /* do nothing */ 546 } else if (alpha == amask) { 547 *dstp = *srcp; 548 } else { 549 src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB) */ 550 src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */ 551 552 dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */ 553 dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ 554 555 mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */ 556 mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */ 557 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ 558 mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha2 */ 559 mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask); /* 0F0A0A0A -> mm_alpha */ 560 mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2); /* 255 - mm_alpha -> mm_alpha */ 561 562 563 /* blend */ 564 src1 = _mm_mullo_pi16(src1, mm_alpha); 565 src1 = _mm_srli_pi16(src1, 8); 566 dst1 = _mm_mullo_pi16(dst1, mm_alpha2); 567 dst1 = _mm_srli_pi16(dst1, 8); 568 dst1 = _mm_add_pi16(src1, dst1); 569 dst1 = _mm_packs_pu16(dst1, mm_zero); 570 571 *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ 572 } 573 ++srcp; 574 ++dstp; 575 }, width); 576 /* *INDENT-ON* */ 577 srcp += srcskip; 578 dstp += dstskip; 579 } 580 _mm_empty(); 581} 582 583#endif /* __MMX__ */ 584 585/* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */ 586 587/* blend a single 16 bit pixel at 50% */ 588#define BLEND16_50(d, s, mask) \ 589 ((((s & mask) + (d & mask)) >> 1) + (s & d & (~mask & 0xffff))) 590 591/* blend two 16 bit pixels at 50% */ 592#define BLEND2x16_50(d, s, mask) \ 593 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \ 594 + (s & d & (~(mask | mask << 16)))) 595 596static void 597Blit16to16SurfaceAlpha128(SDL_BlitInfo * info, Uint16 mask) 598{ 599 int width = info->dst_w; 600 int height = info->dst_h; 601 Uint16 *srcp = (Uint16 *) info->src; 602 int srcskip = info->src_skip >> 1; 603 Uint16 *dstp = (Uint16 *) info->dst; 604 int dstskip = info->dst_skip >> 1; 605 606 while (height--) { 607 if (((uintptr_t) srcp ^ (uintptr_t) dstp) & 2) { 608 /* 609 * Source and destination not aligned, pipeline it. 610 * This is mostly a win for big blits but no loss for 611 * small ones 612 */ 613 Uint32 prev_sw; 614 int w = width; 615 616 /* handle odd destination */ 617 if ((uintptr_t) dstp & 2) { 618 Uint16 d = *dstp, s = *srcp; 619 *dstp = BLEND16_50(d, s, mask); 620 dstp++; 621 srcp++; 622 w--; 623 } 624 srcp++; /* srcp is now 32-bit aligned */ 625 626 /* bootstrap pipeline with first halfword */ 627 prev_sw = ((Uint32 *) srcp)[-1]; 628 629 while (w > 1) { 630 Uint32 sw, dw, s; 631 sw = *(Uint32 *) srcp; 632 dw = *(Uint32 *) dstp; 633#if SDL_BYTEORDER == SDL_BIG_ENDIAN 634 s = (prev_sw << 16) + (sw >> 16); 635#else 636 s = (prev_sw >> 16) + (sw << 16); 637#endif 638 prev_sw = sw; 639 *(Uint32 *) dstp = BLEND2x16_50(dw, s, mask); 640 dstp += 2; 641 srcp += 2; 642 w -= 2; 643 } 644 645 /* final pixel if any */ 646 if (w) { 647 Uint16 d = *dstp, s; 648#if SDL_BYTEORDER == SDL_BIG_ENDIAN 649 s = (Uint16) prev_sw; 650#else 651 s = (Uint16) (prev_sw >> 16); 652#endif 653 *dstp = BLEND16_50(d, s, mask); 654 srcp++; 655 dstp++; 656 } 657 srcp += srcskip - 1; 658 dstp += dstskip; 659 } else { 660 /* source and destination are aligned */ 661 int w = width; 662 663 /* first odd pixel? */ 664 if ((uintptr_t) srcp & 2) { 665 Uint16 d = *dstp, s = *srcp; 666 *dstp = BLEND16_50(d, s, mask); 667 srcp++; 668 dstp++; 669 w--; 670 } 671 /* srcp and dstp are now 32-bit aligned */ 672 673 while (w > 1) { 674 Uint32 sw = *(Uint32 *) srcp; 675 Uint32 dw = *(Uint32 *) dstp; 676 *(Uint32 *) dstp = BLEND2x16_50(dw, sw, mask); 677 srcp += 2; 678 dstp += 2; 679 w -= 2; 680 } 681 682 /* last odd pixel? */ 683 if (w) { 684 Uint16 d = *dstp, s = *srcp; 685 *dstp = BLEND16_50(d, s, mask); 686 srcp++; 687 dstp++; 688 } 689 srcp += srcskip; 690 dstp += dstskip; 691 } 692 } 693} 694 695#ifdef __MMX__ 696 697/* fast RGB565->RGB565 blending with surface alpha */ 698static void 699Blit565to565SurfaceAlphaMMX(SDL_BlitInfo * info) 700{ 701 unsigned alpha = info->a; 702 if (alpha == 128) { 703 Blit16to16SurfaceAlpha128(info, 0xf7de); 704 } else { 705 int width = info->dst_w; 706 int height = info->dst_h; 707 Uint16 *srcp = (Uint16 *) info->src; 708 int srcskip = info->src_skip >> 1; 709 Uint16 *dstp = (Uint16 *) info->dst; 710 int dstskip = info->dst_skip >> 1; 711 Uint32 s, d; 712 713 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha; 714 715 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ 716 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ 717 alpha >>= 3; /* downscale alpha to 5 bits */ 718 719 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ 720 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ 721 /* position alpha to allow for mullo and mulhi on diff channels 722 to reduce the number of operations */ 723 mm_alpha = _mm_slli_si64(mm_alpha, 3); 724 725 /* Setup the 565 color channel masks */ 726 gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */ 727 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ 728 729 while (height--) { 730 /* *INDENT-OFF* */ 731 DUFFS_LOOP_124( 732 { 733 s = *srcp++; 734 d = *dstp; 735 /* 736 * shift out the middle component (green) to 737 * the high 16 bits, and process all three RGB 738 * components at the same time. 739 */ 740 s = (s | s << 16) & 0x07e0f81f; 741 d = (d | d << 16) & 0x07e0f81f; 742 d += (s - d) * alpha >> 5; 743 d &= 0x07e0f81f; 744 *dstp++ = (Uint16)(d | d >> 16); 745 },{ 746 s = *srcp++; 747 d = *dstp; 748 /* 749 * shift out the middle component (green) to 750 * the high 16 bits, and process all three RGB 751 * components at the same time. 752 */ 753 s = (s | s << 16) & 0x07e0f81f; 754 d = (d | d << 16) & 0x07e0f81f; 755 d += (s - d) * alpha >> 5; 756 d &= 0x07e0f81f; 757 *dstp++ = (Uint16)(d | d >> 16); 758 s = *srcp++; 759 d = *dstp; 760 /* 761 * shift out the middle component (green) to 762 * the high 16 bits, and process all three RGB 763 * components at the same time. 764 */ 765 s = (s | s << 16) & 0x07e0f81f; 766 d = (d | d << 16) & 0x07e0f81f; 767 d += (s - d) * alpha >> 5; 768 d &= 0x07e0f81f; 769 *dstp++ = (Uint16)(d | d >> 16); 770 },{ 771 src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */ 772 dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */ 773 774 /* red */ 775 src2 = src1; 776 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 [000r 000r 000r 000r] */ 777 778 dst2 = dst1; 779 dst2 = _mm_srli_pi16(dst2, 11); /* dst2 >> 11 -> dst2 [000r 000r 000r 000r] */ 780 781 /* blend */ 782 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ 783 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 784 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */ 785 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ 786 dst2 = _mm_slli_pi16(dst2, 11); /* dst2 << 11 -> dst2 */ 787 788 mm_res = dst2; /* RED -> mm_res */ 789 790 /* green -- process the bits in place */ 791 src2 = src1; 792 src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */ 793 794 dst2 = dst1; 795 dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */ 796 797 /* blend */ 798 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ 799 src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 800 src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */ 801 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ 802 803 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */ 804 805 /* blue */ 806 src2 = src1; 807 src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */ 808 809 dst2 = dst1; 810 dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */ 811 812 /* blend */ 813 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ 814 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 815 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */ 816 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ 817 dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */ 818 819 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */ 820 821 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ 822 823 srcp += 4; 824 dstp += 4; 825 }, width); 826 /* *INDENT-ON* */ 827 srcp += srcskip; 828 dstp += dstskip; 829 } 830 _mm_empty(); 831 } 832} 833 834/* fast RGB555->RGB555 blending with surface alpha */ 835static void 836Blit555to555SurfaceAlphaMMX(SDL_BlitInfo * info) 837{ 838 unsigned alpha = info->a; 839 if (alpha == 128) { 840 Blit16to16SurfaceAlpha128(info, 0xfbde); 841 } else { 842 int width = info->dst_w; 843 int height = info->dst_h; 844 Uint16 *srcp = (Uint16 *) info->src; 845 int srcskip = info->src_skip >> 1; 846 Uint16 *dstp = (Uint16 *) info->dst; 847 int dstskip = info->dst_skip >> 1; 848 Uint32 s, d; 849 850 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha; 851 852 alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */ 853 mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ 854 alpha >>= 3; /* downscale alpha to 5 bits */ 855 856 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ 857 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ 858 /* position alpha to allow for mullo and mulhi on diff channels 859 to reduce the number of operations */ 860 mm_alpha = _mm_slli_si64(mm_alpha, 3); 861 862 /* Setup the 555 color channel masks */ 863 rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */ 864 gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */ 865 bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ 866 867 while (height--) { 868 /* *INDENT-OFF* */ 869 DUFFS_LOOP_124( 870 { 871 s = *srcp++; 872 d = *dstp; 873 /* 874 * shift out the middle component (green) to 875 * the high 16 bits, and process all three RGB 876 * components at the same time. 877 */ 878 s = (s | s << 16) & 0x03e07c1f; 879 d = (d | d << 16) & 0x03e07c1f; 880 d += (s - d) * alpha >> 5; 881 d &= 0x03e07c1f; 882 *dstp++ = (Uint16)(d | d >> 16); 883 },{ 884 s = *srcp++; 885 d = *dstp; 886 /* 887 * shift out the middle component (green) to 888 * the high 16 bits, and process all three RGB 889 * components at the same time. 890 */ 891 s = (s | s << 16) & 0x03e07c1f; 892 d = (d | d << 16) & 0x03e07c1f; 893 d += (s - d) * alpha >> 5; 894 d &= 0x03e07c1f; 895 *dstp++ = (Uint16)(d | d >> 16); 896 s = *srcp++; 897 d = *dstp; 898 /* 899 * shift out the middle component (green) to 900 * the high 16 bits, and process all three RGB 901 * components at the same time. 902 */ 903 s = (s | s << 16) & 0x03e07c1f; 904 d = (d | d << 16) & 0x03e07c1f; 905 d += (s - d) * alpha >> 5; 906 d &= 0x03e07c1f; 907 *dstp++ = (Uint16)(d | d >> 16); 908 },{ 909 src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */ 910 dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */ 911 912 /* red -- process the bits in place */ 913 src2 = src1; 914 src2 = _mm_and_si64(src2, rmask); /* src & MASKRED -> src2 */ 915 916 dst2 = dst1; 917 dst2 = _mm_and_si64(dst2, rmask); /* dst & MASKRED -> dst2 */ 918 919 /* blend */ 920 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ 921 src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 922 src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */ 923 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ 924 dst2 = _mm_and_si64(dst2, rmask); /* dst2 & MASKRED -> dst2 */ 925 926 mm_res = dst2; /* RED -> mm_res */ 927 928 /* green -- process the bits in place */ 929 src2 = src1; 930 src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */ 931 932 dst2 = dst1; 933 dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */ 934 935 /* blend */ 936 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ 937 src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 938 src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */ 939 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ 940 941 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */ 942 943 /* blue */ 944 src2 = src1; /* src -> src2 */ 945 src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */ 946 947 dst2 = dst1; /* dst -> dst2 */ 948 dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */ 949 950 /* blend */ 951 src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ 952 src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 953 src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */ 954 dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ 955 dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */ 956 957 mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */ 958 959 *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ 960 961 srcp += 4; 962 dstp += 4; 963 }, width); 964 /* *INDENT-ON* */ 965 srcp += srcskip; 966 dstp += dstskip; 967 } 968 _mm_empty(); 969 } 970} 971 972#endif /* __MMX__ */ 973 974/* fast RGB565->RGB565 blending with surface alpha */ 975static void 976Blit565to565SurfaceAlpha(SDL_BlitInfo * info) 977{ 978 unsigned alpha = info->a; 979 if (alpha == 128) { 980 Blit16to16SurfaceAlpha128(info, 0xf7de); 981 } else { 982 int width = info->dst_w; 983 int height = info->dst_h; 984 Uint16 *srcp = (Uint16 *) info->src; 985 int srcskip = info->src_skip >> 1; 986 Uint16 *dstp = (Uint16 *) info->dst; 987 int dstskip = info->dst_skip >> 1; 988 alpha >>= 3; /* downscale alpha to 5 bits */ 989 990 while (height--) { 991 /* *INDENT-OFF* */ 992 DUFFS_LOOP4({ 993 Uint32 s = *srcp++; 994 Uint32 d = *dstp; 995 /* 996 * shift out the middle component (green) to 997 * the high 16 bits, and process all three RGB 998 * components at the same time. 999 */ 1000 s = (s | s << 16) & 0x07e0f81f; 1001 d = (d | d << 16) & 0x07e0f81f; 1002 d += (s - d) * alpha >> 5; 1003 d &= 0x07e0f81f; 1004 *dstp++ = (Uint16)(d | d >> 16); 1005 }, width); 1006 /* *INDENT-ON* */ 1007 srcp += srcskip; 1008 dstp += dstskip; 1009 } 1010 } 1011} 1012 1013/* fast RGB555->RGB555 blending with surface alpha */ 1014static void 1015Blit555to555SurfaceAlpha(SDL_BlitInfo * info) 1016{ 1017 unsigned alpha = info->a; /* downscale alpha to 5 bits */ 1018 if (alpha == 128) { 1019 Blit16to16SurfaceAlpha128(info, 0xfbde); 1020 } else { 1021 int width = info->dst_w; 1022 int height = info->dst_h; 1023 Uint16 *srcp = (Uint16 *) info->src; 1024 int srcskip = info->src_skip >> 1; 1025 Uint16 *dstp = (Uint16 *) info->dst; 1026 int dstskip = info->dst_skip >> 1; 1027 alpha >>= 3; /* downscale alpha to 5 bits */ 1028 1029 while (height--) { 1030 /* *INDENT-OFF* */ 1031 DUFFS_LOOP4({ 1032 Uint32 s = *srcp++; 1033 Uint32 d = *dstp; 1034 /* 1035 * shift out the middle component (green) to 1036 * the high 16 bits, and process all three RGB 1037 * components at the same time. 1038 */ 1039 s = (s | s << 16) & 0x03e07c1f; 1040 d = (d | d << 16) & 0x03e07c1f; 1041 d += (s - d) * alpha >> 5; 1042 d &= 0x03e07c1f; 1043 *dstp++ = (Uint16)(d | d >> 16); 1044 }, width); 1045 /* *INDENT-ON* */ 1046 srcp += srcskip; 1047 dstp += dstskip; 1048 } 1049 } 1050} 1051 1052/* fast ARGB8888->RGB565 blending with pixel alpha */ 1053static void 1054BlitARGBto565PixelAlpha(SDL_BlitInfo * info) 1055{ 1056 int width = info->dst_w; 1057 int height = info->dst_h; 1058 Uint32 *srcp = (Uint32 *) info->src; 1059 int srcskip = info->src_skip >> 2; 1060 Uint16 *dstp = (Uint16 *) info->dst; 1061 int dstskip = info->dst_skip >> 1; 1062 1063 while (height--) { 1064 /* *INDENT-OFF* */ 1065 DUFFS_LOOP4({ 1066 Uint32 s = *srcp; 1067 unsigned alpha = s >> 27; /* downscale alpha to 5 bits */ 1068 /* FIXME: Here we special-case opaque alpha since the 1069 compositioning used (>>8 instead of /255) doesn't handle 1070 it correctly. Also special-case alpha=0 for speed? 1071 Benchmark this! */ 1072 if(alpha) { 1073 if(alpha == (SDL_ALPHA_OPAQUE >> 3)) { 1074 *dstp = (Uint16)((s >> 8 & 0xf800) + (s >> 5 & 0x7e0) + (s >> 3 & 0x1f)); 1075 } else { 1076 Uint32 d = *dstp; 1077 /* 1078 * convert source and destination to G0RAB65565 1079 * and blend all components at the same time 1080 */ 1081 s = ((s & 0xfc00) << 11) + (s >> 8 & 0xf800) 1082 + (s >> 3 & 0x1f); 1083 d = (d | d << 16) & 0x07e0f81f; 1084 d += (s - d) * alpha >> 5; 1085 d &= 0x07e0f81f; 1086 *dstp = (Uint16)(d | d >> 16); 1087 } 1088 } 1089 srcp++; 1090 dstp++; 1091 }, width); 1092 /* *INDENT-ON* */ 1093 srcp += srcskip; 1094 dstp += dstskip; 1095 } 1096} 1097 1098/* fast ARGB8888->RGB555 blending with pixel alpha */ 1099static void 1100BlitARGBto555PixelAlpha(SDL_BlitInfo * info) 1101{ 1102 int width = info->dst_w; 1103 int height = info->dst_h; 1104 Uint32 *srcp = (Uint32 *) info->src; 1105 int srcskip = info->src_skip >> 2; 1106 Uint16 *dstp = (Uint16 *) info->dst; 1107 int dstskip = info->dst_skip >> 1; 1108 1109 while (height--) { 1110 /* *INDENT-OFF* */ 1111 DUFFS_LOOP4({ 1112 unsigned alpha; 1113 Uint32 s = *srcp; 1114 alpha = s >> 27; /* downscale alpha to 5 bits */ 1115 /* FIXME: Here we special-case opaque alpha since the 1116 compositioning used (>>8 instead of /255) doesn't handle 1117 it correctly. Also special-case alpha=0 for speed? 1118 Benchmark this! */ 1119 if(alpha) { 1120 if(alpha == (SDL_ALPHA_OPAQUE >> 3)) { 1121 *dstp = (Uint16)((s >> 9 & 0x7c00) + (s >> 6 & 0x3e0) + (s >> 3 & 0x1f)); 1122 } else { 1123 Uint32 d = *dstp; 1124 /* 1125 * convert source and destination to G0RAB65565 1126 * and blend all components at the same time 1127 */ 1128 s = ((s & 0xf800) << 10) + (s >> 9 & 0x7c00) 1129 + (s >> 3 & 0x1f); 1130 d = (d | d << 16) & 0x03e07c1f; 1131 d += (s - d) * alpha >> 5; 1132 d &= 0x03e07c1f; 1133 *dstp = (Uint16)(d | d >> 16); 1134 } 1135 } 1136 srcp++; 1137 dstp++; 1138 }, width); 1139 /* *INDENT-ON* */ 1140 srcp += srcskip; 1141 dstp += dstskip; 1142 } 1143} 1144 1145/* General (slow) N->N blending with per-surface alpha */ 1146static void 1147BlitNtoNSurfaceAlpha(SDL_BlitInfo * info) 1148{ 1149 int width = info->dst_w; 1150 int height = info->dst_h; 1151 Uint8 *src = info->src; 1152 int srcskip = info->src_skip; 1153 Uint8 *dst = info->dst; 1154 int dstskip = info->dst_skip; 1155 SDL_PixelFormat *srcfmt = info->src_fmt; 1156 SDL_PixelFormat *dstfmt = info->dst_fmt; 1157 int srcbpp = srcfmt->BytesPerPixel; 1158 int dstbpp = dstfmt->BytesPerPixel; 1159 Uint32 Pixel; 1160 unsigned sR, sG, sB; 1161 unsigned dR, dG, dB, dA; 1162 const unsigned sA = info->a; 1163 1164 if (sA) { 1165 while (height--) { 1166 /* *INDENT-OFF* */ 1167 DUFFS_LOOP4( 1168 { 1169 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); 1170 DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA); 1171 ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA); 1172 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); 1173 src += srcbpp; 1174 dst += dstbpp; 1175 }, 1176 width); 1177 /* *INDENT-ON* */ 1178 src += srcskip; 1179 dst += dstskip; 1180 } 1181 } 1182} 1183 1184/* General (slow) colorkeyed N->N blending with per-surface alpha */ 1185static void 1186BlitNtoNSurfaceAlphaKey(SDL_BlitInfo * info) 1187{ 1188 int width = info->dst_w; 1189 int height = info->dst_h; 1190 Uint8 *src = info->src; 1191 int srcskip = info->src_skip; 1192 Uint8 *dst = info->dst; 1193 int dstskip = info->dst_skip; 1194 SDL_PixelFormat *srcfmt = info->src_fmt; 1195 SDL_PixelFormat *dstfmt = info->dst_fmt; 1196 Uint32 ckey = info->colorkey; 1197 int srcbpp = srcfmt->BytesPerPixel; 1198 int dstbpp = dstfmt->BytesPerPixel; 1199 Uint32 Pixel; 1200 unsigned sR, sG, sB; 1201 unsigned dR, dG, dB, dA; 1202 const unsigned sA = info->a; 1203 1204 while (height--) { 1205 /* *INDENT-OFF* */ 1206 DUFFS_LOOP4( 1207 { 1208 RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel); 1209 if(sA && Pixel != ckey) { 1210 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); 1211 DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA); 1212 ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA); 1213 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); 1214 } 1215 src += srcbpp; 1216 dst += dstbpp; 1217 }, 1218 width); 1219 /* *INDENT-ON* */ 1220 src += srcskip; 1221 dst += dstskip; 1222 } 1223} 1224 1225/* General (slow) N->N blending with pixel alpha */ 1226static void 1227BlitNtoNPixelAlpha(SDL_BlitInfo * info) 1228{ 1229 int width = info->dst_w; 1230 int height = info->dst_h; 1231 Uint8 *src = info->src; 1232 int srcskip = info->src_skip; 1233 Uint8 *dst = info->dst; 1234 int dstskip = info->dst_skip; 1235 SDL_PixelFormat *srcfmt = info->src_fmt; 1236 SDL_PixelFormat *dstfmt = info->dst_fmt; 1237 int srcbpp; 1238 int dstbpp; 1239 Uint32 Pixel; 1240 unsigned sR, sG, sB, sA; 1241 unsigned dR, dG, dB, dA; 1242 1243 /* Set up some basic variables */ 1244 srcbpp = srcfmt->BytesPerPixel; 1245 dstbpp = dstfmt->BytesPerPixel; 1246 1247 while (height--) { 1248 /* *INDENT-OFF* */ 1249 DUFFS_LOOP4( 1250 { 1251 DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA); 1252 if(sA) { 1253 DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA); 1254 ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA); 1255 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); 1256 } 1257 src += srcbpp; 1258 dst += dstbpp; 1259 }, 1260 width); 1261 /* *INDENT-ON* */ 1262 src += srcskip; 1263 dst += dstskip; 1264 } 1265} 1266 1267 1268SDL_BlitFunc 1269SDL_CalculateBlitA(SDL_Surface * surface) 1270{ 1271 SDL_PixelFormat *sf = surface->format; 1272 SDL_PixelFormat *df = surface->map->dst->format; 1273 1274 switch (surface->map->info.flags & ~SDL_COPY_RLE_MASK) { 1275 case SDL_COPY_BLEND: 1276 /* Per-pixel alpha blits */ 1277 switch (df->BytesPerPixel) { 1278 case 1: 1279 return BlitNto1PixelAlpha; 1280 1281 case 2: 1282 if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 1283 && sf->Gmask == 0xff00 1284 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) 1285 || (sf->Bmask == 0xff && df->Bmask == 0x1f))) { 1286 if (df->Gmask == 0x7e0) 1287 return BlitARGBto565PixelAlpha; 1288 else if (df->Gmask == 0x3e0) 1289 return BlitARGBto555PixelAlpha; 1290 } 1291 return BlitNtoNPixelAlpha; 1292 1293 case 4: 1294 if (sf->Rmask == df->Rmask 1295 && sf->Gmask == df->Gmask 1296 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { 1297#if defined(__MMX__) || defined(__3dNOW__) 1298 if (sf->Rshift % 8 == 0 1299 && sf->Gshift % 8 == 0 1300 && sf->Bshift % 8 == 0 1301 && sf->Ashift % 8 == 0 && sf->Aloss == 0) { 1302#ifdef __3dNOW__ 1303 if (SDL_Has3DNow()) 1304 return BlitRGBtoRGBPixelAlphaMMX3DNOW; 1305#endif 1306#ifdef __MMX__ 1307 if (SDL_HasMMX()) 1308 return BlitRGBtoRGBPixelAlphaMMX; 1309#endif 1310 } 1311#endif /* __MMX__ || __3dNOW__ */ 1312 if (sf->Amask == 0xff000000) { 1313 return BlitRGBtoRGBPixelAlpha; 1314 } 1315 } 1316 return BlitNtoNPixelAlpha; 1317 1318 case 3: 1319 default: 1320 return BlitNtoNPixelAlpha; 1321 } 1322 break; 1323 1324 case SDL_COPY_MODULATE_ALPHA | SDL_COPY_BLEND: 1325 if (sf->Amask == 0) { 1326 /* Per-surface alpha blits */ 1327 switch (df->BytesPerPixel) { 1328 case 1: 1329 return BlitNto1SurfaceAlpha; 1330 1331 case 2: 1332 if (surface->map->identity) { 1333 if (df->Gmask == 0x7e0) { 1334#ifdef __MMX__ 1335 if (SDL_HasMMX()) 1336 return Blit565to565SurfaceAlphaMMX; 1337 else 1338#endif 1339 return Blit565to565SurfaceAlpha; 1340 } else if (df->Gmask == 0x3e0) { 1341#ifdef __MMX__ 1342 if (SDL_HasMMX()) 1343 return Blit555to555SurfaceAlphaMMX; 1344 else 1345#endif 1346 return Blit555to555SurfaceAlpha; 1347 } 1348 } 1349 return BlitNtoNSurfaceAlpha; 1350 1351 case 4: 1352 if (sf->Rmask == df->Rmask 1353 && sf->Gmask == df->Gmask 1354 && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) { 1355#ifdef __MMX__ 1356 if (sf->Rshift % 8 == 0 1357 && sf->Gshift % 8 == 0 1358 && sf->Bshift % 8 == 0 && SDL_HasMMX()) 1359 return BlitRGBtoRGBSurfaceAlphaMMX; 1360#endif 1361 if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) { 1362 return BlitRGBtoRGBSurfaceAlpha; 1363 } 1364 } 1365 return BlitNtoNSurfaceAlpha; 1366 1367 case 3: 1368 default: 1369 return BlitNtoNSurfaceAlpha; 1370 } 1371 } 1372 break; 1373 1374 case SDL_COPY_COLORKEY | SDL_COPY_MODULATE_ALPHA | SDL_COPY_BLEND: 1375 if (sf->Amask == 0) { 1376 if (df->BytesPerPixel == 1) { 1377 return BlitNto1SurfaceAlphaKey; 1378 } else { 1379 return BlitNtoNSurfaceAlphaKey; 1380 } 1381 } 1382 break; 1383 } 1384 1385 return NULL; 1386} 1387 1388/* vi: set ts=4 sw=4 expandtab: */