tcg-runtime-gvec.c (40491B)
1/* 2 * Generic vectorized operation runtime 3 * 4 * Copyright (c) 2018 Linaro 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20#include "qemu/osdep.h" 21#include "qemu/host-utils.h" 22#include "cpu.h" 23#include "exec/helper-proto.h" 24#include "tcg/tcg-gvec-desc.h" 25 26 27static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc) 28{ 29 intptr_t maxsz = simd_maxsz(desc); 30 intptr_t i; 31 32 if (unlikely(maxsz > oprsz)) { 33 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) { 34 *(uint64_t *)(d + i) = 0; 35 } 36 } 37} 38 39void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc) 40{ 41 intptr_t oprsz = simd_oprsz(desc); 42 intptr_t i; 43 44 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 45 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 46 } 47 clear_high(d, oprsz, desc); 48} 49 50void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc) 51{ 52 intptr_t oprsz = simd_oprsz(desc); 53 intptr_t i; 54 55 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 56 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 57 } 58 clear_high(d, oprsz, desc); 59} 60 61void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc) 62{ 63 intptr_t oprsz = simd_oprsz(desc); 64 intptr_t i; 65 66 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 67 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i); 68 } 69 clear_high(d, oprsz, desc); 70} 71 72void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc) 73{ 74 intptr_t oprsz = simd_oprsz(desc); 75 intptr_t i; 76 77 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 78 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i); 79 } 80 clear_high(d, oprsz, desc); 81} 82 83void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc) 84{ 85 intptr_t oprsz = simd_oprsz(desc); 86 intptr_t i; 87 88 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 89 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b; 90 } 91 clear_high(d, oprsz, desc); 92} 93 94void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc) 95{ 96 intptr_t oprsz = simd_oprsz(desc); 97 intptr_t i; 98 99 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 100 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b; 101 } 102 clear_high(d, oprsz, desc); 103} 104 105void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc) 106{ 107 intptr_t oprsz = simd_oprsz(desc); 108 intptr_t i; 109 110 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 111 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b; 112 } 113 clear_high(d, oprsz, desc); 114} 115 116void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc) 117{ 118 intptr_t oprsz = simd_oprsz(desc); 119 intptr_t i; 120 121 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 122 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b; 123 } 124 clear_high(d, oprsz, desc); 125} 126 127void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc) 128{ 129 intptr_t oprsz = simd_oprsz(desc); 130 intptr_t i; 131 132 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 133 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 134 } 135 clear_high(d, oprsz, desc); 136} 137 138void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc) 139{ 140 intptr_t oprsz = simd_oprsz(desc); 141 intptr_t i; 142 143 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 144 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 145 } 146 clear_high(d, oprsz, desc); 147} 148 149void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc) 150{ 151 intptr_t oprsz = simd_oprsz(desc); 152 intptr_t i; 153 154 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 155 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i); 156 } 157 clear_high(d, oprsz, desc); 158} 159 160void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc) 161{ 162 intptr_t oprsz = simd_oprsz(desc); 163 intptr_t i; 164 165 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 166 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i); 167 } 168 clear_high(d, oprsz, desc); 169} 170 171void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc) 172{ 173 intptr_t oprsz = simd_oprsz(desc); 174 intptr_t i; 175 176 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 177 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b; 178 } 179 clear_high(d, oprsz, desc); 180} 181 182void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc) 183{ 184 intptr_t oprsz = simd_oprsz(desc); 185 intptr_t i; 186 187 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 188 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b; 189 } 190 clear_high(d, oprsz, desc); 191} 192 193void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc) 194{ 195 intptr_t oprsz = simd_oprsz(desc); 196 intptr_t i; 197 198 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 199 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b; 200 } 201 clear_high(d, oprsz, desc); 202} 203 204void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc) 205{ 206 intptr_t oprsz = simd_oprsz(desc); 207 intptr_t i; 208 209 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 210 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b; 211 } 212 clear_high(d, oprsz, desc); 213} 214 215void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc) 216{ 217 intptr_t oprsz = simd_oprsz(desc); 218 intptr_t i; 219 220 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 221 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i); 222 } 223 clear_high(d, oprsz, desc); 224} 225 226void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc) 227{ 228 intptr_t oprsz = simd_oprsz(desc); 229 intptr_t i; 230 231 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 232 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i); 233 } 234 clear_high(d, oprsz, desc); 235} 236 237void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc) 238{ 239 intptr_t oprsz = simd_oprsz(desc); 240 intptr_t i; 241 242 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 243 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i); 244 } 245 clear_high(d, oprsz, desc); 246} 247 248void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc) 249{ 250 intptr_t oprsz = simd_oprsz(desc); 251 intptr_t i; 252 253 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 254 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i); 255 } 256 clear_high(d, oprsz, desc); 257} 258 259void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc) 260{ 261 intptr_t oprsz = simd_oprsz(desc); 262 intptr_t i; 263 264 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 265 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b; 266 } 267 clear_high(d, oprsz, desc); 268} 269 270void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc) 271{ 272 intptr_t oprsz = simd_oprsz(desc); 273 intptr_t i; 274 275 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 276 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b; 277 } 278 clear_high(d, oprsz, desc); 279} 280 281void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc) 282{ 283 intptr_t oprsz = simd_oprsz(desc); 284 intptr_t i; 285 286 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 287 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b; 288 } 289 clear_high(d, oprsz, desc); 290} 291 292void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc) 293{ 294 intptr_t oprsz = simd_oprsz(desc); 295 intptr_t i; 296 297 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 298 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b; 299 } 300 clear_high(d, oprsz, desc); 301} 302 303void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc) 304{ 305 intptr_t oprsz = simd_oprsz(desc); 306 intptr_t i; 307 308 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 309 *(uint8_t *)(d + i) = -*(uint8_t *)(a + i); 310 } 311 clear_high(d, oprsz, desc); 312} 313 314void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc) 315{ 316 intptr_t oprsz = simd_oprsz(desc); 317 intptr_t i; 318 319 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 320 *(uint16_t *)(d + i) = -*(uint16_t *)(a + i); 321 } 322 clear_high(d, oprsz, desc); 323} 324 325void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc) 326{ 327 intptr_t oprsz = simd_oprsz(desc); 328 intptr_t i; 329 330 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 331 *(uint32_t *)(d + i) = -*(uint32_t *)(a + i); 332 } 333 clear_high(d, oprsz, desc); 334} 335 336void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc) 337{ 338 intptr_t oprsz = simd_oprsz(desc); 339 intptr_t i; 340 341 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 342 *(uint64_t *)(d + i) = -*(uint64_t *)(a + i); 343 } 344 clear_high(d, oprsz, desc); 345} 346 347void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc) 348{ 349 intptr_t oprsz = simd_oprsz(desc); 350 intptr_t i; 351 352 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 353 int8_t aa = *(int8_t *)(a + i); 354 *(int8_t *)(d + i) = aa < 0 ? -aa : aa; 355 } 356 clear_high(d, oprsz, desc); 357} 358 359void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc) 360{ 361 intptr_t oprsz = simd_oprsz(desc); 362 intptr_t i; 363 364 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 365 int16_t aa = *(int16_t *)(a + i); 366 *(int16_t *)(d + i) = aa < 0 ? -aa : aa; 367 } 368 clear_high(d, oprsz, desc); 369} 370 371void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc) 372{ 373 intptr_t oprsz = simd_oprsz(desc); 374 intptr_t i; 375 376 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 377 int32_t aa = *(int32_t *)(a + i); 378 *(int32_t *)(d + i) = aa < 0 ? -aa : aa; 379 } 380 clear_high(d, oprsz, desc); 381} 382 383void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc) 384{ 385 intptr_t oprsz = simd_oprsz(desc); 386 intptr_t i; 387 388 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 389 int64_t aa = *(int64_t *)(a + i); 390 *(int64_t *)(d + i) = aa < 0 ? -aa : aa; 391 } 392 clear_high(d, oprsz, desc); 393} 394 395void HELPER(gvec_mov)(void *d, void *a, uint32_t desc) 396{ 397 intptr_t oprsz = simd_oprsz(desc); 398 399 memcpy(d, a, oprsz); 400 clear_high(d, oprsz, desc); 401} 402 403void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c) 404{ 405 intptr_t oprsz = simd_oprsz(desc); 406 intptr_t i; 407 408 if (c == 0) { 409 oprsz = 0; 410 } else { 411 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 412 *(uint64_t *)(d + i) = c; 413 } 414 } 415 clear_high(d, oprsz, desc); 416} 417 418void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c) 419{ 420 intptr_t oprsz = simd_oprsz(desc); 421 intptr_t i; 422 423 if (c == 0) { 424 oprsz = 0; 425 } else { 426 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 427 *(uint32_t *)(d + i) = c; 428 } 429 } 430 clear_high(d, oprsz, desc); 431} 432 433void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c) 434{ 435 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff)); 436} 437 438void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c) 439{ 440 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff)); 441} 442 443void HELPER(gvec_not)(void *d, void *a, uint32_t desc) 444{ 445 intptr_t oprsz = simd_oprsz(desc); 446 intptr_t i; 447 448 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 449 *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i); 450 } 451 clear_high(d, oprsz, desc); 452} 453 454void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc) 455{ 456 intptr_t oprsz = simd_oprsz(desc); 457 intptr_t i; 458 459 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 460 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i); 461 } 462 clear_high(d, oprsz, desc); 463} 464 465void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc) 466{ 467 intptr_t oprsz = simd_oprsz(desc); 468 intptr_t i; 469 470 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 471 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i); 472 } 473 clear_high(d, oprsz, desc); 474} 475 476void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc) 477{ 478 intptr_t oprsz = simd_oprsz(desc); 479 intptr_t i; 480 481 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 482 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i); 483 } 484 clear_high(d, oprsz, desc); 485} 486 487void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc) 488{ 489 intptr_t oprsz = simd_oprsz(desc); 490 intptr_t i; 491 492 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 493 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i); 494 } 495 clear_high(d, oprsz, desc); 496} 497 498void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc) 499{ 500 intptr_t oprsz = simd_oprsz(desc); 501 intptr_t i; 502 503 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 504 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i); 505 } 506 clear_high(d, oprsz, desc); 507} 508 509void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc) 510{ 511 intptr_t oprsz = simd_oprsz(desc); 512 intptr_t i; 513 514 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 515 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i)); 516 } 517 clear_high(d, oprsz, desc); 518} 519 520void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc) 521{ 522 intptr_t oprsz = simd_oprsz(desc); 523 intptr_t i; 524 525 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 526 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i)); 527 } 528 clear_high(d, oprsz, desc); 529} 530 531void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc) 532{ 533 intptr_t oprsz = simd_oprsz(desc); 534 intptr_t i; 535 536 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 537 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i)); 538 } 539 clear_high(d, oprsz, desc); 540} 541 542void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc) 543{ 544 intptr_t oprsz = simd_oprsz(desc); 545 intptr_t i; 546 547 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 548 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b; 549 } 550 clear_high(d, oprsz, desc); 551} 552 553void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc) 554{ 555 intptr_t oprsz = simd_oprsz(desc); 556 intptr_t i; 557 558 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 559 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b; 560 } 561 clear_high(d, oprsz, desc); 562} 563 564void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc) 565{ 566 intptr_t oprsz = simd_oprsz(desc); 567 intptr_t i; 568 569 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 570 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b; 571 } 572 clear_high(d, oprsz, desc); 573} 574 575void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc) 576{ 577 intptr_t oprsz = simd_oprsz(desc); 578 int shift = simd_data(desc); 579 intptr_t i; 580 581 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 582 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift; 583 } 584 clear_high(d, oprsz, desc); 585} 586 587void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc) 588{ 589 intptr_t oprsz = simd_oprsz(desc); 590 int shift = simd_data(desc); 591 intptr_t i; 592 593 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 594 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift; 595 } 596 clear_high(d, oprsz, desc); 597} 598 599void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc) 600{ 601 intptr_t oprsz = simd_oprsz(desc); 602 int shift = simd_data(desc); 603 intptr_t i; 604 605 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 606 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift; 607 } 608 clear_high(d, oprsz, desc); 609} 610 611void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc) 612{ 613 intptr_t oprsz = simd_oprsz(desc); 614 int shift = simd_data(desc); 615 intptr_t i; 616 617 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 618 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift; 619 } 620 clear_high(d, oprsz, desc); 621} 622 623void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc) 624{ 625 intptr_t oprsz = simd_oprsz(desc); 626 int shift = simd_data(desc); 627 intptr_t i; 628 629 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 630 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift; 631 } 632 clear_high(d, oprsz, desc); 633} 634 635void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc) 636{ 637 intptr_t oprsz = simd_oprsz(desc); 638 int shift = simd_data(desc); 639 intptr_t i; 640 641 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 642 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift; 643 } 644 clear_high(d, oprsz, desc); 645} 646 647void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc) 648{ 649 intptr_t oprsz = simd_oprsz(desc); 650 int shift = simd_data(desc); 651 intptr_t i; 652 653 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 654 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift; 655 } 656 clear_high(d, oprsz, desc); 657} 658 659void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc) 660{ 661 intptr_t oprsz = simd_oprsz(desc); 662 int shift = simd_data(desc); 663 intptr_t i; 664 665 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 666 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift; 667 } 668 clear_high(d, oprsz, desc); 669} 670 671void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc) 672{ 673 intptr_t oprsz = simd_oprsz(desc); 674 int shift = simd_data(desc); 675 intptr_t i; 676 677 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 678 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift; 679 } 680 clear_high(d, oprsz, desc); 681} 682 683void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc) 684{ 685 intptr_t oprsz = simd_oprsz(desc); 686 int shift = simd_data(desc); 687 intptr_t i; 688 689 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 690 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift; 691 } 692 clear_high(d, oprsz, desc); 693} 694 695void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc) 696{ 697 intptr_t oprsz = simd_oprsz(desc); 698 int shift = simd_data(desc); 699 intptr_t i; 700 701 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 702 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift; 703 } 704 clear_high(d, oprsz, desc); 705} 706 707void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc) 708{ 709 intptr_t oprsz = simd_oprsz(desc); 710 int shift = simd_data(desc); 711 intptr_t i; 712 713 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 714 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift; 715 } 716 clear_high(d, oprsz, desc); 717} 718 719void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc) 720{ 721 intptr_t oprsz = simd_oprsz(desc); 722 int shift = simd_data(desc); 723 intptr_t i; 724 725 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 726 *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift); 727 } 728 clear_high(d, oprsz, desc); 729} 730 731void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc) 732{ 733 intptr_t oprsz = simd_oprsz(desc); 734 int shift = simd_data(desc); 735 intptr_t i; 736 737 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 738 *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift); 739 } 740 clear_high(d, oprsz, desc); 741} 742 743void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc) 744{ 745 intptr_t oprsz = simd_oprsz(desc); 746 int shift = simd_data(desc); 747 intptr_t i; 748 749 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 750 *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift); 751 } 752 clear_high(d, oprsz, desc); 753} 754 755void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc) 756{ 757 intptr_t oprsz = simd_oprsz(desc); 758 int shift = simd_data(desc); 759 intptr_t i; 760 761 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 762 *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift); 763 } 764 clear_high(d, oprsz, desc); 765} 766 767void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc) 768{ 769 intptr_t oprsz = simd_oprsz(desc); 770 intptr_t i; 771 772 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 773 uint8_t sh = *(uint8_t *)(b + i) & 7; 774 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh; 775 } 776 clear_high(d, oprsz, desc); 777} 778 779void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc) 780{ 781 intptr_t oprsz = simd_oprsz(desc); 782 intptr_t i; 783 784 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 785 uint8_t sh = *(uint16_t *)(b + i) & 15; 786 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh; 787 } 788 clear_high(d, oprsz, desc); 789} 790 791void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc) 792{ 793 intptr_t oprsz = simd_oprsz(desc); 794 intptr_t i; 795 796 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 797 uint8_t sh = *(uint32_t *)(b + i) & 31; 798 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh; 799 } 800 clear_high(d, oprsz, desc); 801} 802 803void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc) 804{ 805 intptr_t oprsz = simd_oprsz(desc); 806 intptr_t i; 807 808 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 809 uint8_t sh = *(uint64_t *)(b + i) & 63; 810 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh; 811 } 812 clear_high(d, oprsz, desc); 813} 814 815void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc) 816{ 817 intptr_t oprsz = simd_oprsz(desc); 818 intptr_t i; 819 820 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 821 uint8_t sh = *(uint8_t *)(b + i) & 7; 822 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh; 823 } 824 clear_high(d, oprsz, desc); 825} 826 827void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc) 828{ 829 intptr_t oprsz = simd_oprsz(desc); 830 intptr_t i; 831 832 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 833 uint8_t sh = *(uint16_t *)(b + i) & 15; 834 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh; 835 } 836 clear_high(d, oprsz, desc); 837} 838 839void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc) 840{ 841 intptr_t oprsz = simd_oprsz(desc); 842 intptr_t i; 843 844 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 845 uint8_t sh = *(uint32_t *)(b + i) & 31; 846 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh; 847 } 848 clear_high(d, oprsz, desc); 849} 850 851void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc) 852{ 853 intptr_t oprsz = simd_oprsz(desc); 854 intptr_t i; 855 856 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 857 uint8_t sh = *(uint64_t *)(b + i) & 63; 858 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh; 859 } 860 clear_high(d, oprsz, desc); 861} 862 863void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc) 864{ 865 intptr_t oprsz = simd_oprsz(desc); 866 intptr_t i; 867 868 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 869 uint8_t sh = *(uint8_t *)(b + i) & 7; 870 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh; 871 } 872 clear_high(d, oprsz, desc); 873} 874 875void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc) 876{ 877 intptr_t oprsz = simd_oprsz(desc); 878 intptr_t i; 879 880 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 881 uint8_t sh = *(uint16_t *)(b + i) & 15; 882 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh; 883 } 884 clear_high(d, oprsz, desc); 885} 886 887void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc) 888{ 889 intptr_t oprsz = simd_oprsz(desc); 890 intptr_t i; 891 892 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 893 uint8_t sh = *(uint32_t *)(b + i) & 31; 894 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh; 895 } 896 clear_high(d, oprsz, desc); 897} 898 899void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc) 900{ 901 intptr_t oprsz = simd_oprsz(desc); 902 intptr_t i; 903 904 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 905 uint8_t sh = *(uint64_t *)(b + i) & 63; 906 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh; 907 } 908 clear_high(d, oprsz, desc); 909} 910 911void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc) 912{ 913 intptr_t oprsz = simd_oprsz(desc); 914 intptr_t i; 915 916 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 917 uint8_t sh = *(uint8_t *)(b + i) & 7; 918 *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh); 919 } 920 clear_high(d, oprsz, desc); 921} 922 923void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc) 924{ 925 intptr_t oprsz = simd_oprsz(desc); 926 intptr_t i; 927 928 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 929 uint8_t sh = *(uint16_t *)(b + i) & 15; 930 *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh); 931 } 932 clear_high(d, oprsz, desc); 933} 934 935void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc) 936{ 937 intptr_t oprsz = simd_oprsz(desc); 938 intptr_t i; 939 940 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 941 uint8_t sh = *(uint32_t *)(b + i) & 31; 942 *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh); 943 } 944 clear_high(d, oprsz, desc); 945} 946 947void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc) 948{ 949 intptr_t oprsz = simd_oprsz(desc); 950 intptr_t i; 951 952 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 953 uint8_t sh = *(uint64_t *)(b + i) & 63; 954 *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh); 955 } 956 clear_high(d, oprsz, desc); 957} 958 959void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc) 960{ 961 intptr_t oprsz = simd_oprsz(desc); 962 intptr_t i; 963 964 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 965 uint8_t sh = *(uint8_t *)(b + i) & 7; 966 *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh); 967 } 968 clear_high(d, oprsz, desc); 969} 970 971void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc) 972{ 973 intptr_t oprsz = simd_oprsz(desc); 974 intptr_t i; 975 976 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 977 uint8_t sh = *(uint16_t *)(b + i) & 15; 978 *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh); 979 } 980 clear_high(d, oprsz, desc); 981} 982 983void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc) 984{ 985 intptr_t oprsz = simd_oprsz(desc); 986 intptr_t i; 987 988 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 989 uint8_t sh = *(uint32_t *)(b + i) & 31; 990 *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh); 991 } 992 clear_high(d, oprsz, desc); 993} 994 995void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc) 996{ 997 intptr_t oprsz = simd_oprsz(desc); 998 intptr_t i; 999 1000 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1001 uint8_t sh = *(uint64_t *)(b + i) & 63; 1002 *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh); 1003 } 1004 clear_high(d, oprsz, desc); 1005} 1006 1007#define DO_CMP1(NAME, TYPE, OP) \ 1008void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \ 1009{ \ 1010 intptr_t oprsz = simd_oprsz(desc); \ 1011 intptr_t i; \ 1012 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \ 1013 *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \ 1014 } \ 1015 clear_high(d, oprsz, desc); \ 1016} 1017 1018#define DO_CMP2(SZ) \ 1019 DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \ 1020 DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \ 1021 DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \ 1022 DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \ 1023 DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \ 1024 DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=) 1025 1026DO_CMP2(8) 1027DO_CMP2(16) 1028DO_CMP2(32) 1029DO_CMP2(64) 1030 1031#undef DO_CMP1 1032#undef DO_CMP2 1033 1034void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc) 1035{ 1036 intptr_t oprsz = simd_oprsz(desc); 1037 intptr_t i; 1038 1039 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1040 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i); 1041 if (r > INT8_MAX) { 1042 r = INT8_MAX; 1043 } else if (r < INT8_MIN) { 1044 r = INT8_MIN; 1045 } 1046 *(int8_t *)(d + i) = r; 1047 } 1048 clear_high(d, oprsz, desc); 1049} 1050 1051void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc) 1052{ 1053 intptr_t oprsz = simd_oprsz(desc); 1054 intptr_t i; 1055 1056 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1057 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i); 1058 if (r > INT16_MAX) { 1059 r = INT16_MAX; 1060 } else if (r < INT16_MIN) { 1061 r = INT16_MIN; 1062 } 1063 *(int16_t *)(d + i) = r; 1064 } 1065 clear_high(d, oprsz, desc); 1066} 1067 1068void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc) 1069{ 1070 intptr_t oprsz = simd_oprsz(desc); 1071 intptr_t i; 1072 1073 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1074 int32_t ai = *(int32_t *)(a + i); 1075 int32_t bi = *(int32_t *)(b + i); 1076 int32_t di; 1077 if (sadd32_overflow(ai, bi, &di)) { 1078 di = (di < 0 ? INT32_MAX : INT32_MIN); 1079 } 1080 *(int32_t *)(d + i) = di; 1081 } 1082 clear_high(d, oprsz, desc); 1083} 1084 1085void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc) 1086{ 1087 intptr_t oprsz = simd_oprsz(desc); 1088 intptr_t i; 1089 1090 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1091 int64_t ai = *(int64_t *)(a + i); 1092 int64_t bi = *(int64_t *)(b + i); 1093 int64_t di; 1094 if (sadd64_overflow(ai, bi, &di)) { 1095 di = (di < 0 ? INT64_MAX : INT64_MIN); 1096 } 1097 *(int64_t *)(d + i) = di; 1098 } 1099 clear_high(d, oprsz, desc); 1100} 1101 1102void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc) 1103{ 1104 intptr_t oprsz = simd_oprsz(desc); 1105 intptr_t i; 1106 1107 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1108 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i); 1109 if (r > INT8_MAX) { 1110 r = INT8_MAX; 1111 } else if (r < INT8_MIN) { 1112 r = INT8_MIN; 1113 } 1114 *(uint8_t *)(d + i) = r; 1115 } 1116 clear_high(d, oprsz, desc); 1117} 1118 1119void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc) 1120{ 1121 intptr_t oprsz = simd_oprsz(desc); 1122 intptr_t i; 1123 1124 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1125 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i); 1126 if (r > INT16_MAX) { 1127 r = INT16_MAX; 1128 } else if (r < INT16_MIN) { 1129 r = INT16_MIN; 1130 } 1131 *(int16_t *)(d + i) = r; 1132 } 1133 clear_high(d, oprsz, desc); 1134} 1135 1136void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc) 1137{ 1138 intptr_t oprsz = simd_oprsz(desc); 1139 intptr_t i; 1140 1141 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1142 int32_t ai = *(int32_t *)(a + i); 1143 int32_t bi = *(int32_t *)(b + i); 1144 int32_t di; 1145 if (ssub32_overflow(ai, bi, &di)) { 1146 di = (di < 0 ? INT32_MAX : INT32_MIN); 1147 } 1148 *(int32_t *)(d + i) = di; 1149 } 1150 clear_high(d, oprsz, desc); 1151} 1152 1153void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc) 1154{ 1155 intptr_t oprsz = simd_oprsz(desc); 1156 intptr_t i; 1157 1158 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1159 int64_t ai = *(int64_t *)(a + i); 1160 int64_t bi = *(int64_t *)(b + i); 1161 int64_t di; 1162 if (ssub64_overflow(ai, bi, &di)) { 1163 di = (di < 0 ? INT64_MAX : INT64_MIN); 1164 } 1165 *(int64_t *)(d + i) = di; 1166 } 1167 clear_high(d, oprsz, desc); 1168} 1169 1170void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc) 1171{ 1172 intptr_t oprsz = simd_oprsz(desc); 1173 intptr_t i; 1174 1175 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1176 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i); 1177 if (r > UINT8_MAX) { 1178 r = UINT8_MAX; 1179 } 1180 *(uint8_t *)(d + i) = r; 1181 } 1182 clear_high(d, oprsz, desc); 1183} 1184 1185void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc) 1186{ 1187 intptr_t oprsz = simd_oprsz(desc); 1188 intptr_t i; 1189 1190 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1191 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i); 1192 if (r > UINT16_MAX) { 1193 r = UINT16_MAX; 1194 } 1195 *(uint16_t *)(d + i) = r; 1196 } 1197 clear_high(d, oprsz, desc); 1198} 1199 1200void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc) 1201{ 1202 intptr_t oprsz = simd_oprsz(desc); 1203 intptr_t i; 1204 1205 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1206 uint32_t ai = *(uint32_t *)(a + i); 1207 uint32_t bi = *(uint32_t *)(b + i); 1208 uint32_t di; 1209 if (uadd32_overflow(ai, bi, &di)) { 1210 di = UINT32_MAX; 1211 } 1212 *(uint32_t *)(d + i) = di; 1213 } 1214 clear_high(d, oprsz, desc); 1215} 1216 1217void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc) 1218{ 1219 intptr_t oprsz = simd_oprsz(desc); 1220 intptr_t i; 1221 1222 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1223 uint64_t ai = *(uint64_t *)(a + i); 1224 uint64_t bi = *(uint64_t *)(b + i); 1225 uint64_t di; 1226 if (uadd64_overflow(ai, bi, &di)) { 1227 di = UINT64_MAX; 1228 } 1229 *(uint64_t *)(d + i) = di; 1230 } 1231 clear_high(d, oprsz, desc); 1232} 1233 1234void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc) 1235{ 1236 intptr_t oprsz = simd_oprsz(desc); 1237 intptr_t i; 1238 1239 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1240 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i); 1241 if (r < 0) { 1242 r = 0; 1243 } 1244 *(uint8_t *)(d + i) = r; 1245 } 1246 clear_high(d, oprsz, desc); 1247} 1248 1249void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc) 1250{ 1251 intptr_t oprsz = simd_oprsz(desc); 1252 intptr_t i; 1253 1254 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1255 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i); 1256 if (r < 0) { 1257 r = 0; 1258 } 1259 *(uint16_t *)(d + i) = r; 1260 } 1261 clear_high(d, oprsz, desc); 1262} 1263 1264void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc) 1265{ 1266 intptr_t oprsz = simd_oprsz(desc); 1267 intptr_t i; 1268 1269 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1270 uint32_t ai = *(uint32_t *)(a + i); 1271 uint32_t bi = *(uint32_t *)(b + i); 1272 uint32_t di; 1273 if (usub32_overflow(ai, bi, &di)) { 1274 di = 0; 1275 } 1276 *(uint32_t *)(d + i) = di; 1277 } 1278 clear_high(d, oprsz, desc); 1279} 1280 1281void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc) 1282{ 1283 intptr_t oprsz = simd_oprsz(desc); 1284 intptr_t i; 1285 1286 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1287 uint64_t ai = *(uint64_t *)(a + i); 1288 uint64_t bi = *(uint64_t *)(b + i); 1289 uint64_t di; 1290 if (usub64_overflow(ai, bi, &di)) { 1291 di = 0; 1292 } 1293 *(uint64_t *)(d + i) = di; 1294 } 1295 clear_high(d, oprsz, desc); 1296} 1297 1298void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc) 1299{ 1300 intptr_t oprsz = simd_oprsz(desc); 1301 intptr_t i; 1302 1303 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1304 int8_t aa = *(int8_t *)(a + i); 1305 int8_t bb = *(int8_t *)(b + i); 1306 int8_t dd = aa < bb ? aa : bb; 1307 *(int8_t *)(d + i) = dd; 1308 } 1309 clear_high(d, oprsz, desc); 1310} 1311 1312void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc) 1313{ 1314 intptr_t oprsz = simd_oprsz(desc); 1315 intptr_t i; 1316 1317 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1318 int16_t aa = *(int16_t *)(a + i); 1319 int16_t bb = *(int16_t *)(b + i); 1320 int16_t dd = aa < bb ? aa : bb; 1321 *(int16_t *)(d + i) = dd; 1322 } 1323 clear_high(d, oprsz, desc); 1324} 1325 1326void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc) 1327{ 1328 intptr_t oprsz = simd_oprsz(desc); 1329 intptr_t i; 1330 1331 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1332 int32_t aa = *(int32_t *)(a + i); 1333 int32_t bb = *(int32_t *)(b + i); 1334 int32_t dd = aa < bb ? aa : bb; 1335 *(int32_t *)(d + i) = dd; 1336 } 1337 clear_high(d, oprsz, desc); 1338} 1339 1340void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc) 1341{ 1342 intptr_t oprsz = simd_oprsz(desc); 1343 intptr_t i; 1344 1345 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1346 int64_t aa = *(int64_t *)(a + i); 1347 int64_t bb = *(int64_t *)(b + i); 1348 int64_t dd = aa < bb ? aa : bb; 1349 *(int64_t *)(d + i) = dd; 1350 } 1351 clear_high(d, oprsz, desc); 1352} 1353 1354void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc) 1355{ 1356 intptr_t oprsz = simd_oprsz(desc); 1357 intptr_t i; 1358 1359 for (i = 0; i < oprsz; i += sizeof(int8_t)) { 1360 int8_t aa = *(int8_t *)(a + i); 1361 int8_t bb = *(int8_t *)(b + i); 1362 int8_t dd = aa > bb ? aa : bb; 1363 *(int8_t *)(d + i) = dd; 1364 } 1365 clear_high(d, oprsz, desc); 1366} 1367 1368void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc) 1369{ 1370 intptr_t oprsz = simd_oprsz(desc); 1371 intptr_t i; 1372 1373 for (i = 0; i < oprsz; i += sizeof(int16_t)) { 1374 int16_t aa = *(int16_t *)(a + i); 1375 int16_t bb = *(int16_t *)(b + i); 1376 int16_t dd = aa > bb ? aa : bb; 1377 *(int16_t *)(d + i) = dd; 1378 } 1379 clear_high(d, oprsz, desc); 1380} 1381 1382void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc) 1383{ 1384 intptr_t oprsz = simd_oprsz(desc); 1385 intptr_t i; 1386 1387 for (i = 0; i < oprsz; i += sizeof(int32_t)) { 1388 int32_t aa = *(int32_t *)(a + i); 1389 int32_t bb = *(int32_t *)(b + i); 1390 int32_t dd = aa > bb ? aa : bb; 1391 *(int32_t *)(d + i) = dd; 1392 } 1393 clear_high(d, oprsz, desc); 1394} 1395 1396void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc) 1397{ 1398 intptr_t oprsz = simd_oprsz(desc); 1399 intptr_t i; 1400 1401 for (i = 0; i < oprsz; i += sizeof(int64_t)) { 1402 int64_t aa = *(int64_t *)(a + i); 1403 int64_t bb = *(int64_t *)(b + i); 1404 int64_t dd = aa > bb ? aa : bb; 1405 *(int64_t *)(d + i) = dd; 1406 } 1407 clear_high(d, oprsz, desc); 1408} 1409 1410void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc) 1411{ 1412 intptr_t oprsz = simd_oprsz(desc); 1413 intptr_t i; 1414 1415 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1416 uint8_t aa = *(uint8_t *)(a + i); 1417 uint8_t bb = *(uint8_t *)(b + i); 1418 uint8_t dd = aa < bb ? aa : bb; 1419 *(uint8_t *)(d + i) = dd; 1420 } 1421 clear_high(d, oprsz, desc); 1422} 1423 1424void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc) 1425{ 1426 intptr_t oprsz = simd_oprsz(desc); 1427 intptr_t i; 1428 1429 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1430 uint16_t aa = *(uint16_t *)(a + i); 1431 uint16_t bb = *(uint16_t *)(b + i); 1432 uint16_t dd = aa < bb ? aa : bb; 1433 *(uint16_t *)(d + i) = dd; 1434 } 1435 clear_high(d, oprsz, desc); 1436} 1437 1438void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc) 1439{ 1440 intptr_t oprsz = simd_oprsz(desc); 1441 intptr_t i; 1442 1443 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1444 uint32_t aa = *(uint32_t *)(a + i); 1445 uint32_t bb = *(uint32_t *)(b + i); 1446 uint32_t dd = aa < bb ? aa : bb; 1447 *(uint32_t *)(d + i) = dd; 1448 } 1449 clear_high(d, oprsz, desc); 1450} 1451 1452void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc) 1453{ 1454 intptr_t oprsz = simd_oprsz(desc); 1455 intptr_t i; 1456 1457 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1458 uint64_t aa = *(uint64_t *)(a + i); 1459 uint64_t bb = *(uint64_t *)(b + i); 1460 uint64_t dd = aa < bb ? aa : bb; 1461 *(uint64_t *)(d + i) = dd; 1462 } 1463 clear_high(d, oprsz, desc); 1464} 1465 1466void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc) 1467{ 1468 intptr_t oprsz = simd_oprsz(desc); 1469 intptr_t i; 1470 1471 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1472 uint8_t aa = *(uint8_t *)(a + i); 1473 uint8_t bb = *(uint8_t *)(b + i); 1474 uint8_t dd = aa > bb ? aa : bb; 1475 *(uint8_t *)(d + i) = dd; 1476 } 1477 clear_high(d, oprsz, desc); 1478} 1479 1480void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc) 1481{ 1482 intptr_t oprsz = simd_oprsz(desc); 1483 intptr_t i; 1484 1485 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1486 uint16_t aa = *(uint16_t *)(a + i); 1487 uint16_t bb = *(uint16_t *)(b + i); 1488 uint16_t dd = aa > bb ? aa : bb; 1489 *(uint16_t *)(d + i) = dd; 1490 } 1491 clear_high(d, oprsz, desc); 1492} 1493 1494void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc) 1495{ 1496 intptr_t oprsz = simd_oprsz(desc); 1497 intptr_t i; 1498 1499 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1500 uint32_t aa = *(uint32_t *)(a + i); 1501 uint32_t bb = *(uint32_t *)(b + i); 1502 uint32_t dd = aa > bb ? aa : bb; 1503 *(uint32_t *)(d + i) = dd; 1504 } 1505 clear_high(d, oprsz, desc); 1506} 1507 1508void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc) 1509{ 1510 intptr_t oprsz = simd_oprsz(desc); 1511 intptr_t i; 1512 1513 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1514 uint64_t aa = *(uint64_t *)(a + i); 1515 uint64_t bb = *(uint64_t *)(b + i); 1516 uint64_t dd = aa > bb ? aa : bb; 1517 *(uint64_t *)(d + i) = dd; 1518 } 1519 clear_high(d, oprsz, desc); 1520} 1521 1522void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc) 1523{ 1524 intptr_t oprsz = simd_oprsz(desc); 1525 intptr_t i; 1526 1527 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1528 uint64_t aa = *(uint64_t *)(a + i); 1529 uint64_t bb = *(uint64_t *)(b + i); 1530 uint64_t cc = *(uint64_t *)(c + i); 1531 *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa); 1532 } 1533 clear_high(d, oprsz, desc); 1534}