vec_int_helper.c (30099B)
1/* 2 * QEMU TCG support -- s390x vector integer instruction support 3 * 4 * Copyright (C) 2019 Red Hat Inc 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12#include "qemu/osdep.h" 13#include "qemu-common.h" 14#include "cpu.h" 15#include "vec.h" 16#include "exec/helper-proto.h" 17#include "tcg/tcg-gvec-desc.h" 18 19static bool s390_vec_is_zero(const S390Vector *v) 20{ 21 return !v->doubleword[0] && !v->doubleword[1]; 22} 23 24static void s390_vec_xor(S390Vector *res, const S390Vector *a, 25 const S390Vector *b) 26{ 27 res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0]; 28 res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1]; 29} 30 31static void s390_vec_and(S390Vector *res, const S390Vector *a, 32 const S390Vector *b) 33{ 34 res->doubleword[0] = a->doubleword[0] & b->doubleword[0]; 35 res->doubleword[1] = a->doubleword[1] & b->doubleword[1]; 36} 37 38static bool s390_vec_equal(const S390Vector *a, const S390Vector *b) 39{ 40 return a->doubleword[0] == b->doubleword[0] && 41 a->doubleword[1] == b->doubleword[1]; 42} 43 44static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count) 45{ 46 uint64_t tmp; 47 48 g_assert(count < 128); 49 if (count == 0) { 50 d->doubleword[0] = a->doubleword[0]; 51 d->doubleword[1] = a->doubleword[1]; 52 } else if (count == 64) { 53 d->doubleword[0] = a->doubleword[1]; 54 d->doubleword[1] = 0; 55 } else if (count < 64) { 56 tmp = extract64(a->doubleword[1], 64 - count, count); 57 d->doubleword[1] = a->doubleword[1] << count; 58 d->doubleword[0] = (a->doubleword[0] << count) | tmp; 59 } else { 60 d->doubleword[0] = a->doubleword[1] << (count - 64); 61 d->doubleword[1] = 0; 62 } 63} 64 65static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count) 66{ 67 uint64_t tmp; 68 69 if (count == 0) { 70 d->doubleword[0] = a->doubleword[0]; 71 d->doubleword[1] = a->doubleword[1]; 72 } else if (count == 64) { 73 tmp = (int64_t)a->doubleword[0] >> 63; 74 d->doubleword[1] = a->doubleword[0]; 75 d->doubleword[0] = tmp; 76 } else if (count < 64) { 77 tmp = a->doubleword[1] >> count; 78 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 79 d->doubleword[0] = (int64_t)a->doubleword[0] >> count; 80 } else { 81 tmp = (int64_t)a->doubleword[0] >> 63; 82 d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64); 83 d->doubleword[0] = tmp; 84 } 85} 86 87static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count) 88{ 89 uint64_t tmp; 90 91 g_assert(count < 128); 92 if (count == 0) { 93 d->doubleword[0] = a->doubleword[0]; 94 d->doubleword[1] = a->doubleword[1]; 95 } else if (count == 64) { 96 d->doubleword[1] = a->doubleword[0]; 97 d->doubleword[0] = 0; 98 } else if (count < 64) { 99 tmp = a->doubleword[1] >> count; 100 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 101 d->doubleword[0] = a->doubleword[0] >> count; 102 } else { 103 d->doubleword[1] = a->doubleword[0] >> (count - 64); 104 d->doubleword[0] = 0; 105 } 106} 107#define DEF_VAVG(BITS) \ 108void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \ 109 uint32_t desc) \ 110{ \ 111 int i; \ 112 \ 113 for (i = 0; i < (128 / BITS); i++) { \ 114 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 115 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 116 \ 117 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 118 } \ 119} 120DEF_VAVG(8) 121DEF_VAVG(16) 122 123#define DEF_VAVGL(BITS) \ 124void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \ 125 uint32_t desc) \ 126{ \ 127 int i; \ 128 \ 129 for (i = 0; i < (128 / BITS); i++) { \ 130 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 131 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 132 \ 133 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 134 } \ 135} 136DEF_VAVGL(8) 137DEF_VAVGL(16) 138 139#define DEF_VCLZ(BITS) \ 140void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \ 141{ \ 142 int i; \ 143 \ 144 for (i = 0; i < (128 / BITS); i++) { \ 145 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 146 \ 147 s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \ 148 } \ 149} 150DEF_VCLZ(8) 151DEF_VCLZ(16) 152 153#define DEF_VCTZ(BITS) \ 154void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \ 155{ \ 156 int i; \ 157 \ 158 for (i = 0; i < (128 / BITS); i++) { \ 159 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 160 \ 161 s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \ 162 } \ 163} 164DEF_VCTZ(8) 165DEF_VCTZ(16) 166 167/* like binary multiplication, but XOR instead of addition */ 168#define DEF_GALOIS_MULTIPLY(BITS, TBITS) \ 169static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a, \ 170 uint##TBITS##_t b) \ 171{ \ 172 uint##TBITS##_t res = 0; \ 173 \ 174 while (b) { \ 175 if (b & 0x1) { \ 176 res = res ^ a; \ 177 } \ 178 a = a << 1; \ 179 b = b >> 1; \ 180 } \ 181 return res; \ 182} 183DEF_GALOIS_MULTIPLY(8, 16) 184DEF_GALOIS_MULTIPLY(16, 32) 185DEF_GALOIS_MULTIPLY(32, 64) 186 187static S390Vector galois_multiply64(uint64_t a, uint64_t b) 188{ 189 S390Vector res = {}; 190 S390Vector va = { 191 .doubleword[1] = a, 192 }; 193 S390Vector vb = { 194 .doubleword[1] = b, 195 }; 196 197 while (!s390_vec_is_zero(&vb)) { 198 if (vb.doubleword[1] & 0x1) { 199 s390_vec_xor(&res, &res, &va); 200 } 201 s390_vec_shl(&va, &va, 1); 202 s390_vec_shr(&vb, &vb, 1); 203 } 204 return res; 205} 206 207#define DEF_VGFM(BITS, TBITS) \ 208void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3, \ 209 uint32_t desc) \ 210{ \ 211 int i; \ 212 \ 213 for (i = 0; i < (128 / TBITS); i++) { \ 214 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 215 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 216 uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 217 \ 218 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 219 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 220 d = d ^ galois_multiply32(a, b); \ 221 s390_vec_write_element##TBITS(v1, i, d); \ 222 } \ 223} 224DEF_VGFM(8, 16) 225DEF_VGFM(16, 32) 226DEF_VGFM(32, 64) 227 228void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3, 229 uint32_t desc) 230{ 231 S390Vector tmp1, tmp2; 232 uint64_t a, b; 233 234 a = s390_vec_read_element64(v2, 0); 235 b = s390_vec_read_element64(v3, 0); 236 tmp1 = galois_multiply64(a, b); 237 a = s390_vec_read_element64(v2, 1); 238 b = s390_vec_read_element64(v3, 1); 239 tmp2 = galois_multiply64(a, b); 240 s390_vec_xor(v1, &tmp1, &tmp2); 241} 242 243#define DEF_VGFMA(BITS, TBITS) \ 244void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3, \ 245 const void *v4, uint32_t desc) \ 246{ \ 247 int i; \ 248 \ 249 for (i = 0; i < (128 / TBITS); i++) { \ 250 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 251 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 252 uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 253 \ 254 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 255 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 256 d = d ^ galois_multiply32(a, b); \ 257 d = d ^ s390_vec_read_element##TBITS(v4, i); \ 258 s390_vec_write_element##TBITS(v1, i, d); \ 259 } \ 260} 261DEF_VGFMA(8, 16) 262DEF_VGFMA(16, 32) 263DEF_VGFMA(32, 64) 264 265void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3, 266 const void *v4, uint32_t desc) 267{ 268 S390Vector tmp1, tmp2; 269 uint64_t a, b; 270 271 a = s390_vec_read_element64(v2, 0); 272 b = s390_vec_read_element64(v3, 0); 273 tmp1 = galois_multiply64(a, b); 274 a = s390_vec_read_element64(v2, 1); 275 b = s390_vec_read_element64(v3, 1); 276 tmp2 = galois_multiply64(a, b); 277 s390_vec_xor(&tmp1, &tmp1, &tmp2); 278 s390_vec_xor(v1, &tmp1, v4); 279} 280 281#define DEF_VMAL(BITS) \ 282void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \ 283 const void *v4, uint32_t desc) \ 284{ \ 285 int i; \ 286 \ 287 for (i = 0; i < (128 / BITS); i++) { \ 288 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 289 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 290 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 291 \ 292 s390_vec_write_element##BITS(v1, i, a * b + c); \ 293 } \ 294} 295DEF_VMAL(8) 296DEF_VMAL(16) 297 298#define DEF_VMAH(BITS) \ 299void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \ 300 const void *v4, uint32_t desc) \ 301{ \ 302 int i; \ 303 \ 304 for (i = 0; i < (128 / BITS); i++) { \ 305 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 306 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 307 const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \ 308 \ 309 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 310 } \ 311} 312DEF_VMAH(8) 313DEF_VMAH(16) 314 315#define DEF_VMALH(BITS) \ 316void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \ 317 const void *v4, uint32_t desc) \ 318{ \ 319 int i; \ 320 \ 321 for (i = 0; i < (128 / BITS); i++) { \ 322 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 323 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 324 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 325 \ 326 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 327 } \ 328} 329DEF_VMALH(8) 330DEF_VMALH(16) 331 332#define DEF_VMAE(BITS, TBITS) \ 333void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \ 334 const void *v4, uint32_t desc) \ 335{ \ 336 int i, j; \ 337 \ 338 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 339 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 340 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 341 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 342 \ 343 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 344 } \ 345} 346DEF_VMAE(8, 16) 347DEF_VMAE(16, 32) 348DEF_VMAE(32, 64) 349 350#define DEF_VMALE(BITS, TBITS) \ 351void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \ 352 const void *v4, uint32_t desc) \ 353{ \ 354 int i, j; \ 355 \ 356 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 357 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 358 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 359 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 360 \ 361 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 362 } \ 363} 364DEF_VMALE(8, 16) 365DEF_VMALE(16, 32) 366DEF_VMALE(32, 64) 367 368#define DEF_VMAO(BITS, TBITS) \ 369void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \ 370 const void *v4, uint32_t desc) \ 371{ \ 372 int i, j; \ 373 \ 374 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 375 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 376 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 377 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 378 \ 379 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 380 } \ 381} 382DEF_VMAO(8, 16) 383DEF_VMAO(16, 32) 384DEF_VMAO(32, 64) 385 386#define DEF_VMALO(BITS, TBITS) \ 387void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \ 388 const void *v4, uint32_t desc) \ 389{ \ 390 int i, j; \ 391 \ 392 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 393 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 394 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 395 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 396 \ 397 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 398 } \ 399} 400DEF_VMALO(8, 16) 401DEF_VMALO(16, 32) 402DEF_VMALO(32, 64) 403 404#define DEF_VMH(BITS) \ 405void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \ 406 uint32_t desc) \ 407{ \ 408 int i; \ 409 \ 410 for (i = 0; i < (128 / BITS); i++) { \ 411 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 412 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 413 \ 414 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 415 } \ 416} 417DEF_VMH(8) 418DEF_VMH(16) 419 420#define DEF_VMLH(BITS) \ 421void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \ 422 uint32_t desc) \ 423{ \ 424 int i; \ 425 \ 426 for (i = 0; i < (128 / BITS); i++) { \ 427 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 428 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 429 \ 430 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 431 } \ 432} 433DEF_VMLH(8) 434DEF_VMLH(16) 435 436#define DEF_VME(BITS, TBITS) \ 437void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \ 438 uint32_t desc) \ 439{ \ 440 int i, j; \ 441 \ 442 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 443 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 444 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 445 \ 446 s390_vec_write_element##TBITS(v1, i, a * b); \ 447 } \ 448} 449DEF_VME(8, 16) 450DEF_VME(16, 32) 451DEF_VME(32, 64) 452 453#define DEF_VMLE(BITS, TBITS) \ 454void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \ 455 uint32_t desc) \ 456{ \ 457 int i, j; \ 458 \ 459 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 460 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 461 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 462 \ 463 s390_vec_write_element##TBITS(v1, i, a * b); \ 464 } \ 465} 466DEF_VMLE(8, 16) 467DEF_VMLE(16, 32) 468DEF_VMLE(32, 64) 469 470#define DEF_VMO(BITS, TBITS) \ 471void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \ 472 uint32_t desc) \ 473{ \ 474 int i, j; \ 475 \ 476 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 477 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 478 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 479 \ 480 s390_vec_write_element##TBITS(v1, i, a * b); \ 481 } \ 482} 483DEF_VMO(8, 16) 484DEF_VMO(16, 32) 485DEF_VMO(32, 64) 486 487#define DEF_VMLO(BITS, TBITS) \ 488void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \ 489 uint32_t desc) \ 490{ \ 491 int i, j; \ 492 \ 493 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 494 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 495 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 496 \ 497 s390_vec_write_element##TBITS(v1, i, a * b); \ 498 } \ 499} 500DEF_VMLO(8, 16) 501DEF_VMLO(16, 32) 502DEF_VMLO(32, 64) 503 504#define DEF_VPOPCT(BITS) \ 505void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \ 506{ \ 507 int i; \ 508 \ 509 for (i = 0; i < (128 / BITS); i++) { \ 510 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 511 \ 512 s390_vec_write_element##BITS(v1, i, ctpop32(a)); \ 513 } \ 514} 515DEF_VPOPCT(8) 516DEF_VPOPCT(16) 517 518#define DEF_VERIM(BITS) \ 519void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \ 520 uint32_t desc) \ 521{ \ 522 const uint8_t count = simd_data(desc); \ 523 int i; \ 524 \ 525 for (i = 0; i < (128 / BITS); i++) { \ 526 const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \ 527 const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \ 528 const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \ 529 const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \ 530 \ 531 s390_vec_write_element##BITS(v1, i, d); \ 532 } \ 533} 534DEF_VERIM(8) 535DEF_VERIM(16) 536 537void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count, 538 uint32_t desc) 539{ 540 s390_vec_shl(v1, v2, count); 541} 542 543void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count, 544 uint32_t desc) 545{ 546 s390_vec_sar(v1, v2, count); 547} 548 549void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count, 550 uint32_t desc) 551{ 552 s390_vec_shr(v1, v2, count); 553} 554 555#define DEF_VSCBI(BITS) \ 556void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3, \ 557 uint32_t desc) \ 558{ \ 559 int i; \ 560 \ 561 for (i = 0; i < (128 / BITS); i++) { \ 562 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 563 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 564 \ 565 s390_vec_write_element##BITS(v1, i, a >= b); \ 566 } \ 567} 568DEF_VSCBI(8) 569DEF_VSCBI(16) 570 571void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env, 572 uint32_t desc) 573{ 574 S390Vector tmp; 575 576 s390_vec_and(&tmp, v1, v2); 577 if (s390_vec_is_zero(&tmp)) { 578 /* Selected bits all zeros; or all mask bits zero */ 579 env->cc_op = 0; 580 } else if (s390_vec_equal(&tmp, v2)) { 581 /* Selected bits all ones */ 582 env->cc_op = 3; 583 } else { 584 /* Selected bits a mix of zeros and ones */ 585 env->cc_op = 1; 586 } 587}