lmmi_helper.c (14386B)
1/* 2 * Loongson Multimedia Instruction emulation helpers for QEMU. 3 * 4 * Copyright (c) 2011 Richard Henderson <rth@twiddle.net> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20#include "qemu/osdep.h" 21#include "cpu.h" 22#include "exec/helper-proto.h" 23 24/* 25 * If the byte ordering doesn't matter, i.e. all columns are treated 26 * identically, then this union can be used directly. If byte ordering 27 * does matter, we generally ignore dumping to memory. 28 */ 29typedef union { 30 uint8_t ub[8]; 31 int8_t sb[8]; 32 uint16_t uh[4]; 33 int16_t sh[4]; 34 uint32_t uw[2]; 35 int32_t sw[2]; 36 uint64_t d; 37} LMIValue; 38 39/* Some byte ordering issues can be mitigated by XORing in the following. */ 40#ifdef HOST_WORDS_BIGENDIAN 41# define BYTE_ORDER_XOR(N) N 42#else 43# define BYTE_ORDER_XOR(N) 0 44#endif 45 46#define SATSB(x) (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x) 47#define SATUB(x) (x > 0xff ? 0xff : x) 48 49#define SATSH(x) (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x) 50#define SATUH(x) (x > 0xffff ? 0xffff : x) 51 52#define SATSW(x) \ 53 (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x) 54#define SATUW(x) (x > 0xffffffffull ? 0xffffffffull : x) 55 56uint64_t helper_paddsb(uint64_t fs, uint64_t ft) 57{ 58 LMIValue vs, vt; 59 unsigned int i; 60 61 vs.d = fs; 62 vt.d = ft; 63 for (i = 0; i < 8; ++i) { 64 int r = vs.sb[i] + vt.sb[i]; 65 vs.sb[i] = SATSB(r); 66 } 67 return vs.d; 68} 69 70uint64_t helper_paddusb(uint64_t fs, uint64_t ft) 71{ 72 LMIValue vs, vt; 73 unsigned int i; 74 75 vs.d = fs; 76 vt.d = ft; 77 for (i = 0; i < 8; ++i) { 78 int r = vs.ub[i] + vt.ub[i]; 79 vs.ub[i] = SATUB(r); 80 } 81 return vs.d; 82} 83 84uint64_t helper_paddsh(uint64_t fs, uint64_t ft) 85{ 86 LMIValue vs, vt; 87 unsigned int i; 88 89 vs.d = fs; 90 vt.d = ft; 91 for (i = 0; i < 4; ++i) { 92 int r = vs.sh[i] + vt.sh[i]; 93 vs.sh[i] = SATSH(r); 94 } 95 return vs.d; 96} 97 98uint64_t helper_paddush(uint64_t fs, uint64_t ft) 99{ 100 LMIValue vs, vt; 101 unsigned int i; 102 103 vs.d = fs; 104 vt.d = ft; 105 for (i = 0; i < 4; ++i) { 106 int r = vs.uh[i] + vt.uh[i]; 107 vs.uh[i] = SATUH(r); 108 } 109 return vs.d; 110} 111 112uint64_t helper_paddb(uint64_t fs, uint64_t ft) 113{ 114 LMIValue vs, vt; 115 unsigned int i; 116 117 vs.d = fs; 118 vt.d = ft; 119 for (i = 0; i < 8; ++i) { 120 vs.ub[i] += vt.ub[i]; 121 } 122 return vs.d; 123} 124 125uint64_t helper_paddh(uint64_t fs, uint64_t ft) 126{ 127 LMIValue vs, vt; 128 unsigned int i; 129 130 vs.d = fs; 131 vt.d = ft; 132 for (i = 0; i < 4; ++i) { 133 vs.uh[i] += vt.uh[i]; 134 } 135 return vs.d; 136} 137 138uint64_t helper_paddw(uint64_t fs, uint64_t ft) 139{ 140 LMIValue vs, vt; 141 unsigned int i; 142 143 vs.d = fs; 144 vt.d = ft; 145 for (i = 0; i < 2; ++i) { 146 vs.uw[i] += vt.uw[i]; 147 } 148 return vs.d; 149} 150 151uint64_t helper_psubsb(uint64_t fs, uint64_t ft) 152{ 153 LMIValue vs, vt; 154 unsigned int i; 155 156 vs.d = fs; 157 vt.d = ft; 158 for (i = 0; i < 8; ++i) { 159 int r = vs.sb[i] - vt.sb[i]; 160 vs.sb[i] = SATSB(r); 161 } 162 return vs.d; 163} 164 165uint64_t helper_psubusb(uint64_t fs, uint64_t ft) 166{ 167 LMIValue vs, vt; 168 unsigned int i; 169 170 vs.d = fs; 171 vt.d = ft; 172 for (i = 0; i < 8; ++i) { 173 int r = vs.ub[i] - vt.ub[i]; 174 vs.ub[i] = SATUB(r); 175 } 176 return vs.d; 177} 178 179uint64_t helper_psubsh(uint64_t fs, uint64_t ft) 180{ 181 LMIValue vs, vt; 182 unsigned int i; 183 184 vs.d = fs; 185 vt.d = ft; 186 for (i = 0; i < 4; ++i) { 187 int r = vs.sh[i] - vt.sh[i]; 188 vs.sh[i] = SATSH(r); 189 } 190 return vs.d; 191} 192 193uint64_t helper_psubush(uint64_t fs, uint64_t ft) 194{ 195 LMIValue vs, vt; 196 unsigned int i; 197 198 vs.d = fs; 199 vt.d = ft; 200 for (i = 0; i < 4; ++i) { 201 int r = vs.uh[i] - vt.uh[i]; 202 vs.uh[i] = SATUH(r); 203 } 204 return vs.d; 205} 206 207uint64_t helper_psubb(uint64_t fs, uint64_t ft) 208{ 209 LMIValue vs, vt; 210 unsigned int i; 211 212 vs.d = fs; 213 vt.d = ft; 214 for (i = 0; i < 8; ++i) { 215 vs.ub[i] -= vt.ub[i]; 216 } 217 return vs.d; 218} 219 220uint64_t helper_psubh(uint64_t fs, uint64_t ft) 221{ 222 LMIValue vs, vt; 223 unsigned int i; 224 225 vs.d = fs; 226 vt.d = ft; 227 for (i = 0; i < 4; ++i) { 228 vs.uh[i] -= vt.uh[i]; 229 } 230 return vs.d; 231} 232 233uint64_t helper_psubw(uint64_t fs, uint64_t ft) 234{ 235 LMIValue vs, vt; 236 unsigned int i; 237 238 vs.d = fs; 239 vt.d = ft; 240 for (i = 0; i < 2; ++i) { 241 vs.uw[i] -= vt.uw[i]; 242 } 243 return vs.d; 244} 245 246uint64_t helper_pshufh(uint64_t fs, uint64_t ft) 247{ 248 unsigned host = BYTE_ORDER_XOR(3); 249 LMIValue vd, vs; 250 unsigned i; 251 252 vs.d = fs; 253 vd.d = 0; 254 for (i = 0; i < 4; i++, ft >>= 2) { 255 vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host]; 256 } 257 return vd.d; 258} 259 260uint64_t helper_packsswh(uint64_t fs, uint64_t ft) 261{ 262 uint64_t fd = 0; 263 int64_t tmp; 264 265 tmp = (int32_t)(fs >> 0); 266 tmp = SATSH(tmp); 267 fd |= (tmp & 0xffff) << 0; 268 269 tmp = (int32_t)(fs >> 32); 270 tmp = SATSH(tmp); 271 fd |= (tmp & 0xffff) << 16; 272 273 tmp = (int32_t)(ft >> 0); 274 tmp = SATSH(tmp); 275 fd |= (tmp & 0xffff) << 32; 276 277 tmp = (int32_t)(ft >> 32); 278 tmp = SATSH(tmp); 279 fd |= (tmp & 0xffff) << 48; 280 281 return fd; 282} 283 284uint64_t helper_packsshb(uint64_t fs, uint64_t ft) 285{ 286 uint64_t fd = 0; 287 unsigned int i; 288 289 for (i = 0; i < 4; ++i) { 290 int16_t tmp = fs >> (i * 16); 291 tmp = SATSB(tmp); 292 fd |= (uint64_t)(tmp & 0xff) << (i * 8); 293 } 294 for (i = 0; i < 4; ++i) { 295 int16_t tmp = ft >> (i * 16); 296 tmp = SATSB(tmp); 297 fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32); 298 } 299 300 return fd; 301} 302 303uint64_t helper_packushb(uint64_t fs, uint64_t ft) 304{ 305 uint64_t fd = 0; 306 unsigned int i; 307 308 for (i = 0; i < 4; ++i) { 309 int16_t tmp = fs >> (i * 16); 310 tmp = SATUB(tmp); 311 fd |= (uint64_t)(tmp & 0xff) << (i * 8); 312 } 313 for (i = 0; i < 4; ++i) { 314 int16_t tmp = ft >> (i * 16); 315 tmp = SATUB(tmp); 316 fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32); 317 } 318 319 return fd; 320} 321 322uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft) 323{ 324 return (fs & 0xffffffff) | (ft << 32); 325} 326 327uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft) 328{ 329 return (fs >> 32) | (ft & ~0xffffffffull); 330} 331 332uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft) 333{ 334 unsigned host = BYTE_ORDER_XOR(3); 335 LMIValue vd, vs, vt; 336 337 vs.d = fs; 338 vt.d = ft; 339 vd.uh[0 ^ host] = vs.uh[0 ^ host]; 340 vd.uh[1 ^ host] = vt.uh[0 ^ host]; 341 vd.uh[2 ^ host] = vs.uh[1 ^ host]; 342 vd.uh[3 ^ host] = vt.uh[1 ^ host]; 343 344 return vd.d; 345} 346 347uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft) 348{ 349 unsigned host = BYTE_ORDER_XOR(3); 350 LMIValue vd, vs, vt; 351 352 vs.d = fs; 353 vt.d = ft; 354 vd.uh[0 ^ host] = vs.uh[2 ^ host]; 355 vd.uh[1 ^ host] = vt.uh[2 ^ host]; 356 vd.uh[2 ^ host] = vs.uh[3 ^ host]; 357 vd.uh[3 ^ host] = vt.uh[3 ^ host]; 358 359 return vd.d; 360} 361 362uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft) 363{ 364 unsigned host = BYTE_ORDER_XOR(7); 365 LMIValue vd, vs, vt; 366 367 vs.d = fs; 368 vt.d = ft; 369 vd.ub[0 ^ host] = vs.ub[0 ^ host]; 370 vd.ub[1 ^ host] = vt.ub[0 ^ host]; 371 vd.ub[2 ^ host] = vs.ub[1 ^ host]; 372 vd.ub[3 ^ host] = vt.ub[1 ^ host]; 373 vd.ub[4 ^ host] = vs.ub[2 ^ host]; 374 vd.ub[5 ^ host] = vt.ub[2 ^ host]; 375 vd.ub[6 ^ host] = vs.ub[3 ^ host]; 376 vd.ub[7 ^ host] = vt.ub[3 ^ host]; 377 378 return vd.d; 379} 380 381uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft) 382{ 383 unsigned host = BYTE_ORDER_XOR(7); 384 LMIValue vd, vs, vt; 385 386 vs.d = fs; 387 vt.d = ft; 388 vd.ub[0 ^ host] = vs.ub[4 ^ host]; 389 vd.ub[1 ^ host] = vt.ub[4 ^ host]; 390 vd.ub[2 ^ host] = vs.ub[5 ^ host]; 391 vd.ub[3 ^ host] = vt.ub[5 ^ host]; 392 vd.ub[4 ^ host] = vs.ub[6 ^ host]; 393 vd.ub[5 ^ host] = vt.ub[6 ^ host]; 394 vd.ub[6 ^ host] = vs.ub[7 ^ host]; 395 vd.ub[7 ^ host] = vt.ub[7 ^ host]; 396 397 return vd.d; 398} 399 400uint64_t helper_pavgh(uint64_t fs, uint64_t ft) 401{ 402 LMIValue vs, vt; 403 unsigned i; 404 405 vs.d = fs; 406 vt.d = ft; 407 for (i = 0; i < 4; i++) { 408 vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1; 409 } 410 return vs.d; 411} 412 413uint64_t helper_pavgb(uint64_t fs, uint64_t ft) 414{ 415 LMIValue vs, vt; 416 unsigned i; 417 418 vs.d = fs; 419 vt.d = ft; 420 for (i = 0; i < 8; i++) { 421 vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1; 422 } 423 return vs.d; 424} 425 426uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft) 427{ 428 LMIValue vs, vt; 429 unsigned i; 430 431 vs.d = fs; 432 vt.d = ft; 433 for (i = 0; i < 4; i++) { 434 vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]); 435 } 436 return vs.d; 437} 438 439uint64_t helper_pminsh(uint64_t fs, uint64_t ft) 440{ 441 LMIValue vs, vt; 442 unsigned i; 443 444 vs.d = fs; 445 vt.d = ft; 446 for (i = 0; i < 4; i++) { 447 vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]); 448 } 449 return vs.d; 450} 451 452uint64_t helper_pmaxub(uint64_t fs, uint64_t ft) 453{ 454 LMIValue vs, vt; 455 unsigned i; 456 457 vs.d = fs; 458 vt.d = ft; 459 for (i = 0; i < 4; i++) { 460 vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]); 461 } 462 return vs.d; 463} 464 465uint64_t helper_pminub(uint64_t fs, uint64_t ft) 466{ 467 LMIValue vs, vt; 468 unsigned i; 469 470 vs.d = fs; 471 vt.d = ft; 472 for (i = 0; i < 4; i++) { 473 vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]); 474 } 475 return vs.d; 476} 477 478uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft) 479{ 480 LMIValue vs, vt; 481 unsigned i; 482 483 vs.d = fs; 484 vt.d = ft; 485 for (i = 0; i < 2; i++) { 486 vs.uw[i] = -(vs.uw[i] == vt.uw[i]); 487 } 488 return vs.d; 489} 490 491uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft) 492{ 493 LMIValue vs, vt; 494 unsigned i; 495 496 vs.d = fs; 497 vt.d = ft; 498 for (i = 0; i < 2; i++) { 499 vs.uw[i] = -(vs.uw[i] > vt.uw[i]); 500 } 501 return vs.d; 502} 503 504uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft) 505{ 506 LMIValue vs, vt; 507 unsigned i; 508 509 vs.d = fs; 510 vt.d = ft; 511 for (i = 0; i < 4; i++) { 512 vs.uh[i] = -(vs.uh[i] == vt.uh[i]); 513 } 514 return vs.d; 515} 516 517uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft) 518{ 519 LMIValue vs, vt; 520 unsigned i; 521 522 vs.d = fs; 523 vt.d = ft; 524 for (i = 0; i < 4; i++) { 525 vs.uh[i] = -(vs.uh[i] > vt.uh[i]); 526 } 527 return vs.d; 528} 529 530uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft) 531{ 532 LMIValue vs, vt; 533 unsigned i; 534 535 vs.d = fs; 536 vt.d = ft; 537 for (i = 0; i < 8; i++) { 538 vs.ub[i] = -(vs.ub[i] == vt.ub[i]); 539 } 540 return vs.d; 541} 542 543uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft) 544{ 545 LMIValue vs, vt; 546 unsigned i; 547 548 vs.d = fs; 549 vt.d = ft; 550 for (i = 0; i < 8; i++) { 551 vs.ub[i] = -(vs.ub[i] > vt.ub[i]); 552 } 553 return vs.d; 554} 555 556uint64_t helper_psllw(uint64_t fs, uint64_t ft) 557{ 558 LMIValue vs; 559 unsigned i; 560 561 ft &= 0x7f; 562 if (ft > 31) { 563 return 0; 564 } 565 vs.d = fs; 566 for (i = 0; i < 2; ++i) { 567 vs.uw[i] <<= ft; 568 } 569 return vs.d; 570} 571 572uint64_t helper_psrlw(uint64_t fs, uint64_t ft) 573{ 574 LMIValue vs; 575 unsigned i; 576 577 ft &= 0x7f; 578 if (ft > 31) { 579 return 0; 580 } 581 vs.d = fs; 582 for (i = 0; i < 2; ++i) { 583 vs.uw[i] >>= ft; 584 } 585 return vs.d; 586} 587 588uint64_t helper_psraw(uint64_t fs, uint64_t ft) 589{ 590 LMIValue vs; 591 unsigned i; 592 593 ft &= 0x7f; 594 if (ft > 31) { 595 ft = 31; 596 } 597 vs.d = fs; 598 for (i = 0; i < 2; ++i) { 599 vs.sw[i] >>= ft; 600 } 601 return vs.d; 602} 603 604uint64_t helper_psllh(uint64_t fs, uint64_t ft) 605{ 606 LMIValue vs; 607 unsigned i; 608 609 ft &= 0x7f; 610 if (ft > 15) { 611 return 0; 612 } 613 vs.d = fs; 614 for (i = 0; i < 4; ++i) { 615 vs.uh[i] <<= ft; 616 } 617 return vs.d; 618} 619 620uint64_t helper_psrlh(uint64_t fs, uint64_t ft) 621{ 622 LMIValue vs; 623 unsigned i; 624 625 ft &= 0x7f; 626 if (ft > 15) { 627 return 0; 628 } 629 vs.d = fs; 630 for (i = 0; i < 4; ++i) { 631 vs.uh[i] >>= ft; 632 } 633 return vs.d; 634} 635 636uint64_t helper_psrah(uint64_t fs, uint64_t ft) 637{ 638 LMIValue vs; 639 unsigned i; 640 641 ft &= 0x7f; 642 if (ft > 15) { 643 ft = 15; 644 } 645 vs.d = fs; 646 for (i = 0; i < 4; ++i) { 647 vs.sh[i] >>= ft; 648 } 649 return vs.d; 650} 651 652uint64_t helper_pmullh(uint64_t fs, uint64_t ft) 653{ 654 LMIValue vs, vt; 655 unsigned i; 656 657 vs.d = fs; 658 vt.d = ft; 659 for (i = 0; i < 4; ++i) { 660 vs.sh[i] *= vt.sh[i]; 661 } 662 return vs.d; 663} 664 665uint64_t helper_pmulhh(uint64_t fs, uint64_t ft) 666{ 667 LMIValue vs, vt; 668 unsigned i; 669 670 vs.d = fs; 671 vt.d = ft; 672 for (i = 0; i < 4; ++i) { 673 int32_t r = vs.sh[i] * vt.sh[i]; 674 vs.sh[i] = r >> 16; 675 } 676 return vs.d; 677} 678 679uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft) 680{ 681 LMIValue vs, vt; 682 unsigned i; 683 684 vs.d = fs; 685 vt.d = ft; 686 for (i = 0; i < 4; ++i) { 687 uint32_t r = vs.uh[i] * vt.uh[i]; 688 vs.uh[i] = r >> 16; 689 } 690 return vs.d; 691} 692 693uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft) 694{ 695 unsigned host = BYTE_ORDER_XOR(3); 696 LMIValue vs, vt; 697 uint32_t p0, p1; 698 699 vs.d = fs; 700 vt.d = ft; 701 p0 = vs.sh[0 ^ host] * vt.sh[0 ^ host]; 702 p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host]; 703 p1 = vs.sh[2 ^ host] * vt.sh[2 ^ host]; 704 p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host]; 705 706 return ((uint64_t)p1 << 32) | p0; 707} 708 709uint64_t helper_pasubub(uint64_t fs, uint64_t ft) 710{ 711 LMIValue vs, vt; 712 unsigned i; 713 714 vs.d = fs; 715 vt.d = ft; 716 for (i = 0; i < 8; ++i) { 717 int r = vs.ub[i] - vt.ub[i]; 718 vs.ub[i] = (r < 0 ? -r : r); 719 } 720 return vs.d; 721} 722 723uint64_t helper_biadd(uint64_t fs) 724{ 725 unsigned i, fd; 726 727 for (i = fd = 0; i < 8; ++i) { 728 fd += (fs >> (i * 8)) & 0xff; 729 } 730 return fd & 0xffff; 731} 732 733uint64_t helper_pmovmskb(uint64_t fs) 734{ 735 unsigned fd = 0; 736 737 fd |= ((fs >> 7) & 1) << 0; 738 fd |= ((fs >> 15) & 1) << 1; 739 fd |= ((fs >> 23) & 1) << 2; 740 fd |= ((fs >> 31) & 1) << 3; 741 fd |= ((fs >> 39) & 1) << 4; 742 fd |= ((fs >> 47) & 1) << 5; 743 fd |= ((fs >> 55) & 1) << 6; 744 fd |= ((fs >> 63) & 1) << 7; 745 746 return fd & 0xff; 747}