translate-sve.c (265498B)
1/* 2 * AArch64 SVE translation 3 * 4 * Copyright (c) 2018 Linaro, Ltd 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20#include "qemu/osdep.h" 21#include "cpu.h" 22#include "exec/exec-all.h" 23#include "tcg/tcg-op.h" 24#include "tcg/tcg-op-gvec.h" 25#include "tcg/tcg-gvec-desc.h" 26#include "qemu/log.h" 27#include "arm_ldst.h" 28#include "translate.h" 29#include "internals.h" 30#include "exec/helper-proto.h" 31#include "exec/helper-gen.h" 32#include "exec/log.h" 33#include "translate-a64.h" 34#include "fpu/softfloat.h" 35 36 37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, 38 TCGv_i64, uint32_t, uint32_t); 39 40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr, 41 TCGv_ptr, TCGv_i32); 42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr, 43 TCGv_ptr, TCGv_ptr, TCGv_i32); 44 45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32); 46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr, 47 TCGv_ptr, TCGv_i64, TCGv_i32); 48 49/* 50 * Helpers for extracting complex instruction fields. 51 */ 52 53/* See e.g. ASR (immediate, predicated). 54 * Returns -1 for unallocated encoding; diagnose later. 55 */ 56static int tszimm_esz(DisasContext *s, int x) 57{ 58 x >>= 3; /* discard imm3 */ 59 return 31 - clz32(x); 60} 61 62static int tszimm_shr(DisasContext *s, int x) 63{ 64 return (16 << tszimm_esz(s, x)) - x; 65} 66 67/* See e.g. LSL (immediate, predicated). */ 68static int tszimm_shl(DisasContext *s, int x) 69{ 70 return x - (8 << tszimm_esz(s, x)); 71} 72 73/* The SH bit is in bit 8. Extract the low 8 and shift. */ 74static inline int expand_imm_sh8s(DisasContext *s, int x) 75{ 76 return (int8_t)x << (x & 0x100 ? 8 : 0); 77} 78 79static inline int expand_imm_sh8u(DisasContext *s, int x) 80{ 81 return (uint8_t)x << (x & 0x100 ? 8 : 0); 82} 83 84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype) 85 * with unsigned data. C.f. SVE Memory Contiguous Load Group. 86 */ 87static inline int msz_dtype(DisasContext *s, int msz) 88{ 89 static const uint8_t dtype[4] = { 0, 5, 10, 15 }; 90 return dtype[msz]; 91} 92 93/* 94 * Include the generated decoder. 95 */ 96 97#include "decode-sve.c.inc" 98 99/* 100 * Implement all of the translator functions referenced by the decoder. 101 */ 102 103/* Return the offset info CPUARMState of the predicate vector register Pn. 104 * Note for this purpose, FFR is P16. 105 */ 106static inline int pred_full_reg_offset(DisasContext *s, int regno) 107{ 108 return offsetof(CPUARMState, vfp.pregs[regno]); 109} 110 111/* Return the byte size of the whole predicate register, VL / 64. */ 112static inline int pred_full_reg_size(DisasContext *s) 113{ 114 return s->sve_len >> 3; 115} 116 117/* Round up the size of a register to a size allowed by 118 * the tcg vector infrastructure. Any operation which uses this 119 * size may assume that the bits above pred_full_reg_size are zero, 120 * and must leave them the same way. 121 * 122 * Note that this is not needed for the vector registers as they 123 * are always properly sized for tcg vectors. 124 */ 125static int size_for_gvec(int size) 126{ 127 if (size <= 8) { 128 return 8; 129 } else { 130 return QEMU_ALIGN_UP(size, 16); 131 } 132} 133 134static int pred_gvec_reg_size(DisasContext *s) 135{ 136 return size_for_gvec(pred_full_reg_size(s)); 137} 138 139/* Invoke an out-of-line helper on 2 Zregs. */ 140static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, 141 int rd, int rn, int data) 142{ 143 unsigned vsz = vec_full_reg_size(s); 144 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 145 vec_full_reg_offset(s, rn), 146 vsz, vsz, data, fn); 147} 148 149/* Invoke an out-of-line helper on 3 Zregs. */ 150static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, 151 int rd, int rn, int rm, int data) 152{ 153 unsigned vsz = vec_full_reg_size(s); 154 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 155 vec_full_reg_offset(s, rn), 156 vec_full_reg_offset(s, rm), 157 vsz, vsz, data, fn); 158} 159 160/* Invoke an out-of-line helper on 4 Zregs. */ 161static void gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, 162 int rd, int rn, int rm, int ra, int data) 163{ 164 unsigned vsz = vec_full_reg_size(s); 165 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 166 vec_full_reg_offset(s, rn), 167 vec_full_reg_offset(s, rm), 168 vec_full_reg_offset(s, ra), 169 vsz, vsz, data, fn); 170} 171 172/* Invoke an out-of-line helper on 2 Zregs and a predicate. */ 173static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, 174 int rd, int rn, int pg, int data) 175{ 176 unsigned vsz = vec_full_reg_size(s); 177 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 178 vec_full_reg_offset(s, rn), 179 pred_full_reg_offset(s, pg), 180 vsz, vsz, data, fn); 181} 182 183/* Invoke an out-of-line helper on 3 Zregs and a predicate. */ 184static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, 185 int rd, int rn, int rm, int pg, int data) 186{ 187 unsigned vsz = vec_full_reg_size(s); 188 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 189 vec_full_reg_offset(s, rn), 190 vec_full_reg_offset(s, rm), 191 pred_full_reg_offset(s, pg), 192 vsz, vsz, data, fn); 193} 194 195/* Invoke a vector expander on two Zregs. */ 196static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn, 197 int esz, int rd, int rn) 198{ 199 unsigned vsz = vec_full_reg_size(s); 200 gvec_fn(esz, vec_full_reg_offset(s, rd), 201 vec_full_reg_offset(s, rn), vsz, vsz); 202} 203 204/* Invoke a vector expander on three Zregs. */ 205static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, 206 int esz, int rd, int rn, int rm) 207{ 208 unsigned vsz = vec_full_reg_size(s); 209 gvec_fn(esz, vec_full_reg_offset(s, rd), 210 vec_full_reg_offset(s, rn), 211 vec_full_reg_offset(s, rm), vsz, vsz); 212} 213 214/* Invoke a vector expander on four Zregs. */ 215static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn, 216 int esz, int rd, int rn, int rm, int ra) 217{ 218 unsigned vsz = vec_full_reg_size(s); 219 gvec_fn(esz, vec_full_reg_offset(s, rd), 220 vec_full_reg_offset(s, rn), 221 vec_full_reg_offset(s, rm), 222 vec_full_reg_offset(s, ra), vsz, vsz); 223} 224 225/* Invoke a vector move on two Zregs. */ 226static bool do_mov_z(DisasContext *s, int rd, int rn) 227{ 228 if (sve_access_check(s)) { 229 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn); 230 } 231 return true; 232} 233 234/* Initialize a Zreg with replications of a 64-bit immediate. */ 235static void do_dupi_z(DisasContext *s, int rd, uint64_t word) 236{ 237 unsigned vsz = vec_full_reg_size(s); 238 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); 239} 240 241/* Invoke a vector expander on three Pregs. */ 242static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, 243 int rd, int rn, int rm) 244{ 245 unsigned psz = pred_gvec_reg_size(s); 246 gvec_fn(MO_64, pred_full_reg_offset(s, rd), 247 pred_full_reg_offset(s, rn), 248 pred_full_reg_offset(s, rm), psz, psz); 249} 250 251/* Invoke a vector move on two Pregs. */ 252static bool do_mov_p(DisasContext *s, int rd, int rn) 253{ 254 if (sve_access_check(s)) { 255 unsigned psz = pred_gvec_reg_size(s); 256 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd), 257 pred_full_reg_offset(s, rn), psz, psz); 258 } 259 return true; 260} 261 262/* Set the cpu flags as per a return from an SVE helper. */ 263static void do_pred_flags(TCGv_i32 t) 264{ 265 tcg_gen_mov_i32(cpu_NF, t); 266 tcg_gen_andi_i32(cpu_ZF, t, 2); 267 tcg_gen_andi_i32(cpu_CF, t, 1); 268 tcg_gen_movi_i32(cpu_VF, 0); 269} 270 271/* Subroutines computing the ARM PredTest psuedofunction. */ 272static void do_predtest1(TCGv_i64 d, TCGv_i64 g) 273{ 274 TCGv_i32 t = tcg_temp_new_i32(); 275 276 gen_helper_sve_predtest1(t, d, g); 277 do_pred_flags(t); 278 tcg_temp_free_i32(t); 279} 280 281static void do_predtest(DisasContext *s, int dofs, int gofs, int words) 282{ 283 TCGv_ptr dptr = tcg_temp_new_ptr(); 284 TCGv_ptr gptr = tcg_temp_new_ptr(); 285 TCGv_i32 t; 286 287 tcg_gen_addi_ptr(dptr, cpu_env, dofs); 288 tcg_gen_addi_ptr(gptr, cpu_env, gofs); 289 t = tcg_const_i32(words); 290 291 gen_helper_sve_predtest(t, dptr, gptr, t); 292 tcg_temp_free_ptr(dptr); 293 tcg_temp_free_ptr(gptr); 294 295 do_pred_flags(t); 296 tcg_temp_free_i32(t); 297} 298 299/* For each element size, the bits within a predicate word that are active. */ 300const uint64_t pred_esz_masks[4] = { 301 0xffffffffffffffffull, 0x5555555555555555ull, 302 0x1111111111111111ull, 0x0101010101010101ull 303}; 304 305/* 306 *** SVE Logical - Unpredicated Group 307 */ 308 309static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn) 310{ 311 if (sve_access_check(s)) { 312 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm); 313 } 314 return true; 315} 316 317static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a) 318{ 319 return do_zzz_fn(s, a, tcg_gen_gvec_and); 320} 321 322static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a) 323{ 324 return do_zzz_fn(s, a, tcg_gen_gvec_or); 325} 326 327static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a) 328{ 329 return do_zzz_fn(s, a, tcg_gen_gvec_xor); 330} 331 332static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a) 333{ 334 return do_zzz_fn(s, a, tcg_gen_gvec_andc); 335} 336 337static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 338{ 339 TCGv_i64 t = tcg_temp_new_i64(); 340 uint64_t mask = dup_const(MO_8, 0xff >> sh); 341 342 tcg_gen_xor_i64(t, n, m); 343 tcg_gen_shri_i64(d, t, sh); 344 tcg_gen_shli_i64(t, t, 8 - sh); 345 tcg_gen_andi_i64(d, d, mask); 346 tcg_gen_andi_i64(t, t, ~mask); 347 tcg_gen_or_i64(d, d, t); 348 tcg_temp_free_i64(t); 349} 350 351static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 352{ 353 TCGv_i64 t = tcg_temp_new_i64(); 354 uint64_t mask = dup_const(MO_16, 0xffff >> sh); 355 356 tcg_gen_xor_i64(t, n, m); 357 tcg_gen_shri_i64(d, t, sh); 358 tcg_gen_shli_i64(t, t, 16 - sh); 359 tcg_gen_andi_i64(d, d, mask); 360 tcg_gen_andi_i64(t, t, ~mask); 361 tcg_gen_or_i64(d, d, t); 362 tcg_temp_free_i64(t); 363} 364 365static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) 366{ 367 tcg_gen_xor_i32(d, n, m); 368 tcg_gen_rotri_i32(d, d, sh); 369} 370 371static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) 372{ 373 tcg_gen_xor_i64(d, n, m); 374 tcg_gen_rotri_i64(d, d, sh); 375} 376 377static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 378 TCGv_vec m, int64_t sh) 379{ 380 tcg_gen_xor_vec(vece, d, n, m); 381 tcg_gen_rotri_vec(vece, d, d, sh); 382} 383 384void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 385 uint32_t rm_ofs, int64_t shift, 386 uint32_t opr_sz, uint32_t max_sz) 387{ 388 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; 389 static const GVecGen3i ops[4] = { 390 { .fni8 = gen_xar8_i64, 391 .fniv = gen_xar_vec, 392 .fno = gen_helper_sve2_xar_b, 393 .opt_opc = vecop, 394 .vece = MO_8 }, 395 { .fni8 = gen_xar16_i64, 396 .fniv = gen_xar_vec, 397 .fno = gen_helper_sve2_xar_h, 398 .opt_opc = vecop, 399 .vece = MO_16 }, 400 { .fni4 = gen_xar_i32, 401 .fniv = gen_xar_vec, 402 .fno = gen_helper_sve2_xar_s, 403 .opt_opc = vecop, 404 .vece = MO_32 }, 405 { .fni8 = gen_xar_i64, 406 .fniv = gen_xar_vec, 407 .fno = gen_helper_gvec_xar_d, 408 .opt_opc = vecop, 409 .vece = MO_64 } 410 }; 411 int esize = 8 << vece; 412 413 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ 414 tcg_debug_assert(shift >= 0); 415 tcg_debug_assert(shift <= esize); 416 shift &= esize - 1; 417 418 if (shift == 0) { 419 /* xar with no rotate devolves to xor. */ 420 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); 421 } else { 422 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 423 shift, &ops[vece]); 424 } 425} 426 427static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) 428{ 429 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 430 return false; 431 } 432 if (sve_access_check(s)) { 433 unsigned vsz = vec_full_reg_size(s); 434 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd), 435 vec_full_reg_offset(s, a->rn), 436 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz); 437 } 438 return true; 439} 440 441static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn) 442{ 443 if (!dc_isar_feature(aa64_sve2, s)) { 444 return false; 445 } 446 if (sve_access_check(s)) { 447 gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra); 448 } 449 return true; 450} 451 452static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 453{ 454 tcg_gen_xor_i64(d, n, m); 455 tcg_gen_xor_i64(d, d, k); 456} 457 458static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 459 TCGv_vec m, TCGv_vec k) 460{ 461 tcg_gen_xor_vec(vece, d, n, m); 462 tcg_gen_xor_vec(vece, d, d, k); 463} 464 465static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 466 uint32_t a, uint32_t oprsz, uint32_t maxsz) 467{ 468 static const GVecGen4 op = { 469 .fni8 = gen_eor3_i64, 470 .fniv = gen_eor3_vec, 471 .fno = gen_helper_sve2_eor3, 472 .vece = MO_64, 473 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 474 }; 475 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 476} 477 478static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a) 479{ 480 return do_sve2_zzzz_fn(s, a, gen_eor3); 481} 482 483static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 484{ 485 tcg_gen_andc_i64(d, m, k); 486 tcg_gen_xor_i64(d, d, n); 487} 488 489static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 490 TCGv_vec m, TCGv_vec k) 491{ 492 tcg_gen_andc_vec(vece, d, m, k); 493 tcg_gen_xor_vec(vece, d, d, n); 494} 495 496static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 497 uint32_t a, uint32_t oprsz, uint32_t maxsz) 498{ 499 static const GVecGen4 op = { 500 .fni8 = gen_bcax_i64, 501 .fniv = gen_bcax_vec, 502 .fno = gen_helper_sve2_bcax, 503 .vece = MO_64, 504 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 505 }; 506 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 507} 508 509static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a) 510{ 511 return do_sve2_zzzz_fn(s, a, gen_bcax); 512} 513 514static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 515 uint32_t a, uint32_t oprsz, uint32_t maxsz) 516{ 517 /* BSL differs from the generic bitsel in argument ordering. */ 518 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz); 519} 520 521static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a) 522{ 523 return do_sve2_zzzz_fn(s, a, gen_bsl); 524} 525 526static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 527{ 528 tcg_gen_andc_i64(n, k, n); 529 tcg_gen_andc_i64(m, m, k); 530 tcg_gen_or_i64(d, n, m); 531} 532 533static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 534 TCGv_vec m, TCGv_vec k) 535{ 536 if (TCG_TARGET_HAS_bitsel_vec) { 537 tcg_gen_not_vec(vece, n, n); 538 tcg_gen_bitsel_vec(vece, d, k, n, m); 539 } else { 540 tcg_gen_andc_vec(vece, n, k, n); 541 tcg_gen_andc_vec(vece, m, m, k); 542 tcg_gen_or_vec(vece, d, n, m); 543 } 544} 545 546static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 547 uint32_t a, uint32_t oprsz, uint32_t maxsz) 548{ 549 static const GVecGen4 op = { 550 .fni8 = gen_bsl1n_i64, 551 .fniv = gen_bsl1n_vec, 552 .fno = gen_helper_sve2_bsl1n, 553 .vece = MO_64, 554 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 555 }; 556 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 557} 558 559static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a) 560{ 561 return do_sve2_zzzz_fn(s, a, gen_bsl1n); 562} 563 564static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 565{ 566 /* 567 * Z[dn] = (n & k) | (~m & ~k) 568 * = | ~(m | k) 569 */ 570 tcg_gen_and_i64(n, n, k); 571 if (TCG_TARGET_HAS_orc_i64) { 572 tcg_gen_or_i64(m, m, k); 573 tcg_gen_orc_i64(d, n, m); 574 } else { 575 tcg_gen_nor_i64(m, m, k); 576 tcg_gen_or_i64(d, n, m); 577 } 578} 579 580static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 581 TCGv_vec m, TCGv_vec k) 582{ 583 if (TCG_TARGET_HAS_bitsel_vec) { 584 tcg_gen_not_vec(vece, m, m); 585 tcg_gen_bitsel_vec(vece, d, k, n, m); 586 } else { 587 tcg_gen_and_vec(vece, n, n, k); 588 tcg_gen_or_vec(vece, m, m, k); 589 tcg_gen_orc_vec(vece, d, n, m); 590 } 591} 592 593static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 594 uint32_t a, uint32_t oprsz, uint32_t maxsz) 595{ 596 static const GVecGen4 op = { 597 .fni8 = gen_bsl2n_i64, 598 .fniv = gen_bsl2n_vec, 599 .fno = gen_helper_sve2_bsl2n, 600 .vece = MO_64, 601 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 602 }; 603 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 604} 605 606static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a) 607{ 608 return do_sve2_zzzz_fn(s, a, gen_bsl2n); 609} 610 611static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) 612{ 613 tcg_gen_and_i64(n, n, k); 614 tcg_gen_andc_i64(m, m, k); 615 tcg_gen_nor_i64(d, n, m); 616} 617 618static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n, 619 TCGv_vec m, TCGv_vec k) 620{ 621 tcg_gen_bitsel_vec(vece, d, k, n, m); 622 tcg_gen_not_vec(vece, d, d); 623} 624 625static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, 626 uint32_t a, uint32_t oprsz, uint32_t maxsz) 627{ 628 static const GVecGen4 op = { 629 .fni8 = gen_nbsl_i64, 630 .fniv = gen_nbsl_vec, 631 .fno = gen_helper_sve2_nbsl, 632 .vece = MO_64, 633 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 634 }; 635 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); 636} 637 638static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a) 639{ 640 return do_sve2_zzzz_fn(s, a, gen_nbsl); 641} 642 643/* 644 *** SVE Integer Arithmetic - Unpredicated Group 645 */ 646 647static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a) 648{ 649 return do_zzz_fn(s, a, tcg_gen_gvec_add); 650} 651 652static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a) 653{ 654 return do_zzz_fn(s, a, tcg_gen_gvec_sub); 655} 656 657static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a) 658{ 659 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd); 660} 661 662static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a) 663{ 664 return do_zzz_fn(s, a, tcg_gen_gvec_sssub); 665} 666 667static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a) 668{ 669 return do_zzz_fn(s, a, tcg_gen_gvec_usadd); 670} 671 672static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a) 673{ 674 return do_zzz_fn(s, a, tcg_gen_gvec_ussub); 675} 676 677/* 678 *** SVE Integer Arithmetic - Binary Predicated Group 679 */ 680 681static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn) 682{ 683 if (fn == NULL) { 684 return false; 685 } 686 if (sve_access_check(s)) { 687 gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0); 688 } 689 return true; 690} 691 692/* Select active elememnts from Zn and inactive elements from Zm, 693 * storing the result in Zd. 694 */ 695static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) 696{ 697 static gen_helper_gvec_4 * const fns[4] = { 698 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, 699 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d 700 }; 701 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); 702} 703 704#define DO_ZPZZ(NAME, name) \ 705static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \ 706{ \ 707 static gen_helper_gvec_4 * const fns[4] = { \ 708 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \ 709 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \ 710 }; \ 711 return do_zpzz_ool(s, a, fns[a->esz]); \ 712} 713 714DO_ZPZZ(AND, and) 715DO_ZPZZ(EOR, eor) 716DO_ZPZZ(ORR, orr) 717DO_ZPZZ(BIC, bic) 718 719DO_ZPZZ(ADD, add) 720DO_ZPZZ(SUB, sub) 721 722DO_ZPZZ(SMAX, smax) 723DO_ZPZZ(UMAX, umax) 724DO_ZPZZ(SMIN, smin) 725DO_ZPZZ(UMIN, umin) 726DO_ZPZZ(SABD, sabd) 727DO_ZPZZ(UABD, uabd) 728 729DO_ZPZZ(MUL, mul) 730DO_ZPZZ(SMULH, smulh) 731DO_ZPZZ(UMULH, umulh) 732 733DO_ZPZZ(ASR, asr) 734DO_ZPZZ(LSR, lsr) 735DO_ZPZZ(LSL, lsl) 736 737static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a) 738{ 739 static gen_helper_gvec_4 * const fns[4] = { 740 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d 741 }; 742 return do_zpzz_ool(s, a, fns[a->esz]); 743} 744 745static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a) 746{ 747 static gen_helper_gvec_4 * const fns[4] = { 748 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d 749 }; 750 return do_zpzz_ool(s, a, fns[a->esz]); 751} 752 753static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a) 754{ 755 if (sve_access_check(s)) { 756 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz); 757 } 758 return true; 759} 760 761#undef DO_ZPZZ 762 763/* 764 *** SVE Integer Arithmetic - Unary Predicated Group 765 */ 766 767static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn) 768{ 769 if (fn == NULL) { 770 return false; 771 } 772 if (sve_access_check(s)) { 773 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0); 774 } 775 return true; 776} 777 778#define DO_ZPZ(NAME, name) \ 779static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \ 780{ \ 781 static gen_helper_gvec_3 * const fns[4] = { \ 782 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ 783 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 784 }; \ 785 return do_zpz_ool(s, a, fns[a->esz]); \ 786} 787 788DO_ZPZ(CLS, cls) 789DO_ZPZ(CLZ, clz) 790DO_ZPZ(CNT_zpz, cnt_zpz) 791DO_ZPZ(CNOT, cnot) 792DO_ZPZ(NOT_zpz, not_zpz) 793DO_ZPZ(ABS, abs) 794DO_ZPZ(NEG, neg) 795 796static bool trans_FABS(DisasContext *s, arg_rpr_esz *a) 797{ 798 static gen_helper_gvec_3 * const fns[4] = { 799 NULL, 800 gen_helper_sve_fabs_h, 801 gen_helper_sve_fabs_s, 802 gen_helper_sve_fabs_d 803 }; 804 return do_zpz_ool(s, a, fns[a->esz]); 805} 806 807static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a) 808{ 809 static gen_helper_gvec_3 * const fns[4] = { 810 NULL, 811 gen_helper_sve_fneg_h, 812 gen_helper_sve_fneg_s, 813 gen_helper_sve_fneg_d 814 }; 815 return do_zpz_ool(s, a, fns[a->esz]); 816} 817 818static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a) 819{ 820 static gen_helper_gvec_3 * const fns[4] = { 821 NULL, 822 gen_helper_sve_sxtb_h, 823 gen_helper_sve_sxtb_s, 824 gen_helper_sve_sxtb_d 825 }; 826 return do_zpz_ool(s, a, fns[a->esz]); 827} 828 829static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a) 830{ 831 static gen_helper_gvec_3 * const fns[4] = { 832 NULL, 833 gen_helper_sve_uxtb_h, 834 gen_helper_sve_uxtb_s, 835 gen_helper_sve_uxtb_d 836 }; 837 return do_zpz_ool(s, a, fns[a->esz]); 838} 839 840static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a) 841{ 842 static gen_helper_gvec_3 * const fns[4] = { 843 NULL, NULL, 844 gen_helper_sve_sxth_s, 845 gen_helper_sve_sxth_d 846 }; 847 return do_zpz_ool(s, a, fns[a->esz]); 848} 849 850static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a) 851{ 852 static gen_helper_gvec_3 * const fns[4] = { 853 NULL, NULL, 854 gen_helper_sve_uxth_s, 855 gen_helper_sve_uxth_d 856 }; 857 return do_zpz_ool(s, a, fns[a->esz]); 858} 859 860static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a) 861{ 862 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL); 863} 864 865static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a) 866{ 867 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL); 868} 869 870#undef DO_ZPZ 871 872/* 873 *** SVE Integer Reduction Group 874 */ 875 876typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32); 877static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a, 878 gen_helper_gvec_reduc *fn) 879{ 880 unsigned vsz = vec_full_reg_size(s); 881 TCGv_ptr t_zn, t_pg; 882 TCGv_i32 desc; 883 TCGv_i64 temp; 884 885 if (fn == NULL) { 886 return false; 887 } 888 if (!sve_access_check(s)) { 889 return true; 890 } 891 892 desc = tcg_const_i32(simd_desc(vsz, vsz, 0)); 893 temp = tcg_temp_new_i64(); 894 t_zn = tcg_temp_new_ptr(); 895 t_pg = tcg_temp_new_ptr(); 896 897 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 898 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 899 fn(temp, t_zn, t_pg, desc); 900 tcg_temp_free_ptr(t_zn); 901 tcg_temp_free_ptr(t_pg); 902 tcg_temp_free_i32(desc); 903 904 write_fp_dreg(s, a->rd, temp); 905 tcg_temp_free_i64(temp); 906 return true; 907} 908 909#define DO_VPZ(NAME, name) \ 910static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \ 911{ \ 912 static gen_helper_gvec_reduc * const fns[4] = { \ 913 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ 914 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 915 }; \ 916 return do_vpz_ool(s, a, fns[a->esz]); \ 917} 918 919DO_VPZ(ORV, orv) 920DO_VPZ(ANDV, andv) 921DO_VPZ(EORV, eorv) 922 923DO_VPZ(UADDV, uaddv) 924DO_VPZ(SMAXV, smaxv) 925DO_VPZ(UMAXV, umaxv) 926DO_VPZ(SMINV, sminv) 927DO_VPZ(UMINV, uminv) 928 929static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a) 930{ 931 static gen_helper_gvec_reduc * const fns[4] = { 932 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h, 933 gen_helper_sve_saddv_s, NULL 934 }; 935 return do_vpz_ool(s, a, fns[a->esz]); 936} 937 938#undef DO_VPZ 939 940/* 941 *** SVE Shift by Immediate - Predicated Group 942 */ 943 944/* 945 * Copy Zn into Zd, storing zeros into inactive elements. 946 * If invert, store zeros into the active elements. 947 */ 948static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, 949 int esz, bool invert) 950{ 951 static gen_helper_gvec_3 * const fns[4] = { 952 gen_helper_sve_movz_b, gen_helper_sve_movz_h, 953 gen_helper_sve_movz_s, gen_helper_sve_movz_d, 954 }; 955 956 if (sve_access_check(s)) { 957 gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); 958 } 959 return true; 960} 961 962static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a, 963 gen_helper_gvec_3 *fn) 964{ 965 if (sve_access_check(s)) { 966 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); 967 } 968 return true; 969} 970 971static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a) 972{ 973 static gen_helper_gvec_3 * const fns[4] = { 974 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h, 975 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d, 976 }; 977 if (a->esz < 0) { 978 /* Invalid tsz encoding -- see tszimm_esz. */ 979 return false; 980 } 981 /* Shift by element size is architecturally valid. For 982 arithmetic right-shift, it's the same as by one less. */ 983 a->imm = MIN(a->imm, (8 << a->esz) - 1); 984 return do_zpzi_ool(s, a, fns[a->esz]); 985} 986 987static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a) 988{ 989 static gen_helper_gvec_3 * const fns[4] = { 990 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h, 991 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d, 992 }; 993 if (a->esz < 0) { 994 return false; 995 } 996 /* Shift by element size is architecturally valid. 997 For logical shifts, it is a zeroing operation. */ 998 if (a->imm >= (8 << a->esz)) { 999 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); 1000 } else { 1001 return do_zpzi_ool(s, a, fns[a->esz]); 1002 } 1003} 1004 1005static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a) 1006{ 1007 static gen_helper_gvec_3 * const fns[4] = { 1008 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h, 1009 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d, 1010 }; 1011 if (a->esz < 0) { 1012 return false; 1013 } 1014 /* Shift by element size is architecturally valid. 1015 For logical shifts, it is a zeroing operation. */ 1016 if (a->imm >= (8 << a->esz)) { 1017 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); 1018 } else { 1019 return do_zpzi_ool(s, a, fns[a->esz]); 1020 } 1021} 1022 1023static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a) 1024{ 1025 static gen_helper_gvec_3 * const fns[4] = { 1026 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h, 1027 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d, 1028 }; 1029 if (a->esz < 0) { 1030 return false; 1031 } 1032 /* Shift by element size is architecturally valid. For arithmetic 1033 right shift for division, it is a zeroing operation. */ 1034 if (a->imm >= (8 << a->esz)) { 1035 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); 1036 } else { 1037 return do_zpzi_ool(s, a, fns[a->esz]); 1038 } 1039} 1040 1041static bool trans_SQSHL_zpzi(DisasContext *s, arg_rpri_esz *a) 1042{ 1043 static gen_helper_gvec_3 * const fns[4] = { 1044 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h, 1045 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d, 1046 }; 1047 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 1048 return false; 1049 } 1050 return do_zpzi_ool(s, a, fns[a->esz]); 1051} 1052 1053static bool trans_UQSHL_zpzi(DisasContext *s, arg_rpri_esz *a) 1054{ 1055 static gen_helper_gvec_3 * const fns[4] = { 1056 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h, 1057 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d, 1058 }; 1059 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 1060 return false; 1061 } 1062 return do_zpzi_ool(s, a, fns[a->esz]); 1063} 1064 1065static bool trans_SRSHR(DisasContext *s, arg_rpri_esz *a) 1066{ 1067 static gen_helper_gvec_3 * const fns[4] = { 1068 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h, 1069 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d, 1070 }; 1071 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 1072 return false; 1073 } 1074 return do_zpzi_ool(s, a, fns[a->esz]); 1075} 1076 1077static bool trans_URSHR(DisasContext *s, arg_rpri_esz *a) 1078{ 1079 static gen_helper_gvec_3 * const fns[4] = { 1080 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h, 1081 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d, 1082 }; 1083 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 1084 return false; 1085 } 1086 return do_zpzi_ool(s, a, fns[a->esz]); 1087} 1088 1089static bool trans_SQSHLU(DisasContext *s, arg_rpri_esz *a) 1090{ 1091 static gen_helper_gvec_3 * const fns[4] = { 1092 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h, 1093 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d, 1094 }; 1095 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 1096 return false; 1097 } 1098 return do_zpzi_ool(s, a, fns[a->esz]); 1099} 1100 1101/* 1102 *** SVE Bitwise Shift - Predicated Group 1103 */ 1104 1105#define DO_ZPZW(NAME, name) \ 1106static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \ 1107{ \ 1108 static gen_helper_gvec_4 * const fns[3] = { \ 1109 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \ 1110 gen_helper_sve_##name##_zpzw_s, \ 1111 }; \ 1112 if (a->esz < 0 || a->esz >= 3) { \ 1113 return false; \ 1114 } \ 1115 return do_zpzz_ool(s, a, fns[a->esz]); \ 1116} 1117 1118DO_ZPZW(ASR, asr) 1119DO_ZPZW(LSR, lsr) 1120DO_ZPZW(LSL, lsl) 1121 1122#undef DO_ZPZW 1123 1124/* 1125 *** SVE Bitwise Shift - Unpredicated Group 1126 */ 1127 1128static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr, 1129 void (*gvec_fn)(unsigned, uint32_t, uint32_t, 1130 int64_t, uint32_t, uint32_t)) 1131{ 1132 if (a->esz < 0) { 1133 /* Invalid tsz encoding -- see tszimm_esz. */ 1134 return false; 1135 } 1136 if (sve_access_check(s)) { 1137 unsigned vsz = vec_full_reg_size(s); 1138 /* Shift by element size is architecturally valid. For 1139 arithmetic right-shift, it's the same as by one less. 1140 Otherwise it is a zeroing operation. */ 1141 if (a->imm >= 8 << a->esz) { 1142 if (asr) { 1143 a->imm = (8 << a->esz) - 1; 1144 } else { 1145 do_dupi_z(s, a->rd, 0); 1146 return true; 1147 } 1148 } 1149 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 1150 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); 1151 } 1152 return true; 1153} 1154 1155static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a) 1156{ 1157 return do_shift_imm(s, a, true, tcg_gen_gvec_sari); 1158} 1159 1160static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a) 1161{ 1162 return do_shift_imm(s, a, false, tcg_gen_gvec_shri); 1163} 1164 1165static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a) 1166{ 1167 return do_shift_imm(s, a, false, tcg_gen_gvec_shli); 1168} 1169 1170static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn) 1171{ 1172 if (fn == NULL) { 1173 return false; 1174 } 1175 if (sve_access_check(s)) { 1176 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0); 1177 } 1178 return true; 1179} 1180 1181#define DO_ZZW(NAME, name) \ 1182static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \ 1183{ \ 1184 static gen_helper_gvec_3 * const fns[4] = { \ 1185 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \ 1186 gen_helper_sve_##name##_zzw_s, NULL \ 1187 }; \ 1188 return do_zzw_ool(s, a, fns[a->esz]); \ 1189} 1190 1191DO_ZZW(ASR, asr) 1192DO_ZZW(LSR, lsr) 1193DO_ZZW(LSL, lsl) 1194 1195#undef DO_ZZW 1196 1197/* 1198 *** SVE Integer Multiply-Add Group 1199 */ 1200 1201static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a, 1202 gen_helper_gvec_5 *fn) 1203{ 1204 if (sve_access_check(s)) { 1205 unsigned vsz = vec_full_reg_size(s); 1206 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd), 1207 vec_full_reg_offset(s, a->ra), 1208 vec_full_reg_offset(s, a->rn), 1209 vec_full_reg_offset(s, a->rm), 1210 pred_full_reg_offset(s, a->pg), 1211 vsz, vsz, 0, fn); 1212 } 1213 return true; 1214} 1215 1216#define DO_ZPZZZ(NAME, name) \ 1217static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \ 1218{ \ 1219 static gen_helper_gvec_5 * const fns[4] = { \ 1220 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ 1221 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ 1222 }; \ 1223 return do_zpzzz_ool(s, a, fns[a->esz]); \ 1224} 1225 1226DO_ZPZZZ(MLA, mla) 1227DO_ZPZZZ(MLS, mls) 1228 1229#undef DO_ZPZZZ 1230 1231/* 1232 *** SVE Index Generation Group 1233 */ 1234 1235static void do_index(DisasContext *s, int esz, int rd, 1236 TCGv_i64 start, TCGv_i64 incr) 1237{ 1238 unsigned vsz = vec_full_reg_size(s); 1239 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0)); 1240 TCGv_ptr t_zd = tcg_temp_new_ptr(); 1241 1242 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd)); 1243 if (esz == 3) { 1244 gen_helper_sve_index_d(t_zd, start, incr, desc); 1245 } else { 1246 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 1247 static index_fn * const fns[3] = { 1248 gen_helper_sve_index_b, 1249 gen_helper_sve_index_h, 1250 gen_helper_sve_index_s, 1251 }; 1252 TCGv_i32 s32 = tcg_temp_new_i32(); 1253 TCGv_i32 i32 = tcg_temp_new_i32(); 1254 1255 tcg_gen_extrl_i64_i32(s32, start); 1256 tcg_gen_extrl_i64_i32(i32, incr); 1257 fns[esz](t_zd, s32, i32, desc); 1258 1259 tcg_temp_free_i32(s32); 1260 tcg_temp_free_i32(i32); 1261 } 1262 tcg_temp_free_ptr(t_zd); 1263 tcg_temp_free_i32(desc); 1264} 1265 1266static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a) 1267{ 1268 if (sve_access_check(s)) { 1269 TCGv_i64 start = tcg_const_i64(a->imm1); 1270 TCGv_i64 incr = tcg_const_i64(a->imm2); 1271 do_index(s, a->esz, a->rd, start, incr); 1272 tcg_temp_free_i64(start); 1273 tcg_temp_free_i64(incr); 1274 } 1275 return true; 1276} 1277 1278static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a) 1279{ 1280 if (sve_access_check(s)) { 1281 TCGv_i64 start = tcg_const_i64(a->imm); 1282 TCGv_i64 incr = cpu_reg(s, a->rm); 1283 do_index(s, a->esz, a->rd, start, incr); 1284 tcg_temp_free_i64(start); 1285 } 1286 return true; 1287} 1288 1289static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a) 1290{ 1291 if (sve_access_check(s)) { 1292 TCGv_i64 start = cpu_reg(s, a->rn); 1293 TCGv_i64 incr = tcg_const_i64(a->imm); 1294 do_index(s, a->esz, a->rd, start, incr); 1295 tcg_temp_free_i64(incr); 1296 } 1297 return true; 1298} 1299 1300static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a) 1301{ 1302 if (sve_access_check(s)) { 1303 TCGv_i64 start = cpu_reg(s, a->rn); 1304 TCGv_i64 incr = cpu_reg(s, a->rm); 1305 do_index(s, a->esz, a->rd, start, incr); 1306 } 1307 return true; 1308} 1309 1310/* 1311 *** SVE Stack Allocation Group 1312 */ 1313 1314static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) 1315{ 1316 if (sve_access_check(s)) { 1317 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1318 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1319 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s)); 1320 } 1321 return true; 1322} 1323 1324static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) 1325{ 1326 if (sve_access_check(s)) { 1327 TCGv_i64 rd = cpu_reg_sp(s, a->rd); 1328 TCGv_i64 rn = cpu_reg_sp(s, a->rn); 1329 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s)); 1330 } 1331 return true; 1332} 1333 1334static bool trans_RDVL(DisasContext *s, arg_RDVL *a) 1335{ 1336 if (sve_access_check(s)) { 1337 TCGv_i64 reg = cpu_reg(s, a->rd); 1338 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s)); 1339 } 1340 return true; 1341} 1342 1343/* 1344 *** SVE Compute Vector Address Group 1345 */ 1346 1347static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) 1348{ 1349 if (sve_access_check(s)) { 1350 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); 1351 } 1352 return true; 1353} 1354 1355static bool trans_ADR_p32(DisasContext *s, arg_rrri *a) 1356{ 1357 return do_adr(s, a, gen_helper_sve_adr_p32); 1358} 1359 1360static bool trans_ADR_p64(DisasContext *s, arg_rrri *a) 1361{ 1362 return do_adr(s, a, gen_helper_sve_adr_p64); 1363} 1364 1365static bool trans_ADR_s32(DisasContext *s, arg_rrri *a) 1366{ 1367 return do_adr(s, a, gen_helper_sve_adr_s32); 1368} 1369 1370static bool trans_ADR_u32(DisasContext *s, arg_rrri *a) 1371{ 1372 return do_adr(s, a, gen_helper_sve_adr_u32); 1373} 1374 1375/* 1376 *** SVE Integer Misc - Unpredicated Group 1377 */ 1378 1379static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a) 1380{ 1381 static gen_helper_gvec_2 * const fns[4] = { 1382 NULL, 1383 gen_helper_sve_fexpa_h, 1384 gen_helper_sve_fexpa_s, 1385 gen_helper_sve_fexpa_d, 1386 }; 1387 if (a->esz == 0) { 1388 return false; 1389 } 1390 if (sve_access_check(s)) { 1391 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0); 1392 } 1393 return true; 1394} 1395 1396static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a) 1397{ 1398 static gen_helper_gvec_3 * const fns[4] = { 1399 NULL, 1400 gen_helper_sve_ftssel_h, 1401 gen_helper_sve_ftssel_s, 1402 gen_helper_sve_ftssel_d, 1403 }; 1404 if (a->esz == 0) { 1405 return false; 1406 } 1407 if (sve_access_check(s)) { 1408 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0); 1409 } 1410 return true; 1411} 1412 1413/* 1414 *** SVE Predicate Logical Operations Group 1415 */ 1416 1417static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, 1418 const GVecGen4 *gvec_op) 1419{ 1420 if (!sve_access_check(s)) { 1421 return true; 1422 } 1423 1424 unsigned psz = pred_gvec_reg_size(s); 1425 int dofs = pred_full_reg_offset(s, a->rd); 1426 int nofs = pred_full_reg_offset(s, a->rn); 1427 int mofs = pred_full_reg_offset(s, a->rm); 1428 int gofs = pred_full_reg_offset(s, a->pg); 1429 1430 if (!a->s) { 1431 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1432 return true; 1433 } 1434 1435 if (psz == 8) { 1436 /* Do the operation and the flags generation in temps. */ 1437 TCGv_i64 pd = tcg_temp_new_i64(); 1438 TCGv_i64 pn = tcg_temp_new_i64(); 1439 TCGv_i64 pm = tcg_temp_new_i64(); 1440 TCGv_i64 pg = tcg_temp_new_i64(); 1441 1442 tcg_gen_ld_i64(pn, cpu_env, nofs); 1443 tcg_gen_ld_i64(pm, cpu_env, mofs); 1444 tcg_gen_ld_i64(pg, cpu_env, gofs); 1445 1446 gvec_op->fni8(pd, pn, pm, pg); 1447 tcg_gen_st_i64(pd, cpu_env, dofs); 1448 1449 do_predtest1(pd, pg); 1450 1451 tcg_temp_free_i64(pd); 1452 tcg_temp_free_i64(pn); 1453 tcg_temp_free_i64(pm); 1454 tcg_temp_free_i64(pg); 1455 } else { 1456 /* The operation and flags generation is large. The computation 1457 * of the flags depends on the original contents of the guarding 1458 * predicate. If the destination overwrites the guarding predicate, 1459 * then the easiest way to get this right is to save a copy. 1460 */ 1461 int tofs = gofs; 1462 if (a->rd == a->pg) { 1463 tofs = offsetof(CPUARMState, vfp.preg_tmp); 1464 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz); 1465 } 1466 1467 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op); 1468 do_predtest(s, dofs, tofs, psz / 8); 1469 } 1470 return true; 1471} 1472 1473static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1474{ 1475 tcg_gen_and_i64(pd, pn, pm); 1476 tcg_gen_and_i64(pd, pd, pg); 1477} 1478 1479static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1480 TCGv_vec pm, TCGv_vec pg) 1481{ 1482 tcg_gen_and_vec(vece, pd, pn, pm); 1483 tcg_gen_and_vec(vece, pd, pd, pg); 1484} 1485 1486static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) 1487{ 1488 static const GVecGen4 op = { 1489 .fni8 = gen_and_pg_i64, 1490 .fniv = gen_and_pg_vec, 1491 .fno = gen_helper_sve_and_pppp, 1492 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1493 }; 1494 1495 if (!a->s) { 1496 if (!sve_access_check(s)) { 1497 return true; 1498 } 1499 if (a->rn == a->rm) { 1500 if (a->pg == a->rn) { 1501 do_mov_p(s, a->rd, a->rn); 1502 } else { 1503 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); 1504 } 1505 return true; 1506 } else if (a->pg == a->rn || a->pg == a->rm) { 1507 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); 1508 return true; 1509 } 1510 } 1511 return do_pppp_flags(s, a, &op); 1512} 1513 1514static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1515{ 1516 tcg_gen_andc_i64(pd, pn, pm); 1517 tcg_gen_and_i64(pd, pd, pg); 1518} 1519 1520static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1521 TCGv_vec pm, TCGv_vec pg) 1522{ 1523 tcg_gen_andc_vec(vece, pd, pn, pm); 1524 tcg_gen_and_vec(vece, pd, pd, pg); 1525} 1526 1527static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) 1528{ 1529 static const GVecGen4 op = { 1530 .fni8 = gen_bic_pg_i64, 1531 .fniv = gen_bic_pg_vec, 1532 .fno = gen_helper_sve_bic_pppp, 1533 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1534 }; 1535 1536 if (!a->s && a->pg == a->rn) { 1537 if (sve_access_check(s)) { 1538 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); 1539 } 1540 return true; 1541 } 1542 return do_pppp_flags(s, a, &op); 1543} 1544 1545static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1546{ 1547 tcg_gen_xor_i64(pd, pn, pm); 1548 tcg_gen_and_i64(pd, pd, pg); 1549} 1550 1551static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1552 TCGv_vec pm, TCGv_vec pg) 1553{ 1554 tcg_gen_xor_vec(vece, pd, pn, pm); 1555 tcg_gen_and_vec(vece, pd, pd, pg); 1556} 1557 1558static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) 1559{ 1560 static const GVecGen4 op = { 1561 .fni8 = gen_eor_pg_i64, 1562 .fniv = gen_eor_pg_vec, 1563 .fno = gen_helper_sve_eor_pppp, 1564 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1565 }; 1566 return do_pppp_flags(s, a, &op); 1567} 1568 1569static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) 1570{ 1571 if (a->s) { 1572 return false; 1573 } 1574 if (sve_access_check(s)) { 1575 unsigned psz = pred_gvec_reg_size(s); 1576 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd), 1577 pred_full_reg_offset(s, a->pg), 1578 pred_full_reg_offset(s, a->rn), 1579 pred_full_reg_offset(s, a->rm), psz, psz); 1580 } 1581 return true; 1582} 1583 1584static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1585{ 1586 tcg_gen_or_i64(pd, pn, pm); 1587 tcg_gen_and_i64(pd, pd, pg); 1588} 1589 1590static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1591 TCGv_vec pm, TCGv_vec pg) 1592{ 1593 tcg_gen_or_vec(vece, pd, pn, pm); 1594 tcg_gen_and_vec(vece, pd, pd, pg); 1595} 1596 1597static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) 1598{ 1599 static const GVecGen4 op = { 1600 .fni8 = gen_orr_pg_i64, 1601 .fniv = gen_orr_pg_vec, 1602 .fno = gen_helper_sve_orr_pppp, 1603 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1604 }; 1605 1606 if (!a->s && a->pg == a->rn && a->rn == a->rm) { 1607 return do_mov_p(s, a->rd, a->rn); 1608 } 1609 return do_pppp_flags(s, a, &op); 1610} 1611 1612static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1613{ 1614 tcg_gen_orc_i64(pd, pn, pm); 1615 tcg_gen_and_i64(pd, pd, pg); 1616} 1617 1618static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1619 TCGv_vec pm, TCGv_vec pg) 1620{ 1621 tcg_gen_orc_vec(vece, pd, pn, pm); 1622 tcg_gen_and_vec(vece, pd, pd, pg); 1623} 1624 1625static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) 1626{ 1627 static const GVecGen4 op = { 1628 .fni8 = gen_orn_pg_i64, 1629 .fniv = gen_orn_pg_vec, 1630 .fno = gen_helper_sve_orn_pppp, 1631 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1632 }; 1633 return do_pppp_flags(s, a, &op); 1634} 1635 1636static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1637{ 1638 tcg_gen_or_i64(pd, pn, pm); 1639 tcg_gen_andc_i64(pd, pg, pd); 1640} 1641 1642static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1643 TCGv_vec pm, TCGv_vec pg) 1644{ 1645 tcg_gen_or_vec(vece, pd, pn, pm); 1646 tcg_gen_andc_vec(vece, pd, pg, pd); 1647} 1648 1649static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) 1650{ 1651 static const GVecGen4 op = { 1652 .fni8 = gen_nor_pg_i64, 1653 .fniv = gen_nor_pg_vec, 1654 .fno = gen_helper_sve_nor_pppp, 1655 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1656 }; 1657 return do_pppp_flags(s, a, &op); 1658} 1659 1660static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg) 1661{ 1662 tcg_gen_and_i64(pd, pn, pm); 1663 tcg_gen_andc_i64(pd, pg, pd); 1664} 1665 1666static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn, 1667 TCGv_vec pm, TCGv_vec pg) 1668{ 1669 tcg_gen_and_vec(vece, pd, pn, pm); 1670 tcg_gen_andc_vec(vece, pd, pg, pd); 1671} 1672 1673static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) 1674{ 1675 static const GVecGen4 op = { 1676 .fni8 = gen_nand_pg_i64, 1677 .fniv = gen_nand_pg_vec, 1678 .fno = gen_helper_sve_nand_pppp, 1679 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1680 }; 1681 return do_pppp_flags(s, a, &op); 1682} 1683 1684/* 1685 *** SVE Predicate Misc Group 1686 */ 1687 1688static bool trans_PTEST(DisasContext *s, arg_PTEST *a) 1689{ 1690 if (sve_access_check(s)) { 1691 int nofs = pred_full_reg_offset(s, a->rn); 1692 int gofs = pred_full_reg_offset(s, a->pg); 1693 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8); 1694 1695 if (words == 1) { 1696 TCGv_i64 pn = tcg_temp_new_i64(); 1697 TCGv_i64 pg = tcg_temp_new_i64(); 1698 1699 tcg_gen_ld_i64(pn, cpu_env, nofs); 1700 tcg_gen_ld_i64(pg, cpu_env, gofs); 1701 do_predtest1(pn, pg); 1702 1703 tcg_temp_free_i64(pn); 1704 tcg_temp_free_i64(pg); 1705 } else { 1706 do_predtest(s, nofs, gofs, words); 1707 } 1708 } 1709 return true; 1710} 1711 1712/* See the ARM pseudocode DecodePredCount. */ 1713static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz) 1714{ 1715 unsigned elements = fullsz >> esz; 1716 unsigned bound; 1717 1718 switch (pattern) { 1719 case 0x0: /* POW2 */ 1720 return pow2floor(elements); 1721 case 0x1: /* VL1 */ 1722 case 0x2: /* VL2 */ 1723 case 0x3: /* VL3 */ 1724 case 0x4: /* VL4 */ 1725 case 0x5: /* VL5 */ 1726 case 0x6: /* VL6 */ 1727 case 0x7: /* VL7 */ 1728 case 0x8: /* VL8 */ 1729 bound = pattern; 1730 break; 1731 case 0x9: /* VL16 */ 1732 case 0xa: /* VL32 */ 1733 case 0xb: /* VL64 */ 1734 case 0xc: /* VL128 */ 1735 case 0xd: /* VL256 */ 1736 bound = 16 << (pattern - 9); 1737 break; 1738 case 0x1d: /* MUL4 */ 1739 return elements - elements % 4; 1740 case 0x1e: /* MUL3 */ 1741 return elements - elements % 3; 1742 case 0x1f: /* ALL */ 1743 return elements; 1744 default: /* #uimm5 */ 1745 return 0; 1746 } 1747 return elements >= bound ? bound : 0; 1748} 1749 1750/* This handles all of the predicate initialization instructions, 1751 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32 1752 * so that decode_pred_count returns 0. For SETFFR, we will have 1753 * set RD == 16 == FFR. 1754 */ 1755static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) 1756{ 1757 if (!sve_access_check(s)) { 1758 return true; 1759 } 1760 1761 unsigned fullsz = vec_full_reg_size(s); 1762 unsigned ofs = pred_full_reg_offset(s, rd); 1763 unsigned numelem, setsz, i; 1764 uint64_t word, lastword; 1765 TCGv_i64 t; 1766 1767 numelem = decode_pred_count(fullsz, pat, esz); 1768 1769 /* Determine what we must store into each bit, and how many. */ 1770 if (numelem == 0) { 1771 lastword = word = 0; 1772 setsz = fullsz; 1773 } else { 1774 setsz = numelem << esz; 1775 lastword = word = pred_esz_masks[esz]; 1776 if (setsz % 64) { 1777 lastword &= MAKE_64BIT_MASK(0, setsz % 64); 1778 } 1779 } 1780 1781 t = tcg_temp_new_i64(); 1782 if (fullsz <= 64) { 1783 tcg_gen_movi_i64(t, lastword); 1784 tcg_gen_st_i64(t, cpu_env, ofs); 1785 goto done; 1786 } 1787 1788 if (word == lastword) { 1789 unsigned maxsz = size_for_gvec(fullsz / 8); 1790 unsigned oprsz = size_for_gvec(setsz / 8); 1791 1792 if (oprsz * 8 == setsz) { 1793 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word); 1794 goto done; 1795 } 1796 } 1797 1798 setsz /= 8; 1799 fullsz /= 8; 1800 1801 tcg_gen_movi_i64(t, word); 1802 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) { 1803 tcg_gen_st_i64(t, cpu_env, ofs + i); 1804 } 1805 if (lastword != word) { 1806 tcg_gen_movi_i64(t, lastword); 1807 tcg_gen_st_i64(t, cpu_env, ofs + i); 1808 i += 8; 1809 } 1810 if (i < fullsz) { 1811 tcg_gen_movi_i64(t, 0); 1812 for (; i < fullsz; i += 8) { 1813 tcg_gen_st_i64(t, cpu_env, ofs + i); 1814 } 1815 } 1816 1817 done: 1818 tcg_temp_free_i64(t); 1819 1820 /* PTRUES */ 1821 if (setflag) { 1822 tcg_gen_movi_i32(cpu_NF, -(word != 0)); 1823 tcg_gen_movi_i32(cpu_CF, word == 0); 1824 tcg_gen_movi_i32(cpu_VF, 0); 1825 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1826 } 1827 return true; 1828} 1829 1830static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a) 1831{ 1832 return do_predset(s, a->esz, a->rd, a->pat, a->s); 1833} 1834 1835static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a) 1836{ 1837 /* Note pat == 31 is #all, to set all elements. */ 1838 return do_predset(s, 0, FFR_PRED_NUM, 31, false); 1839} 1840 1841static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a) 1842{ 1843 /* Note pat == 32 is #unimp, to set no elements. */ 1844 return do_predset(s, 0, a->rd, 32, false); 1845} 1846 1847static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) 1848{ 1849 /* The path through do_pppp_flags is complicated enough to want to avoid 1850 * duplication. Frob the arguments into the form of a predicated AND. 1851 */ 1852 arg_rprr_s alt_a = { 1853 .rd = a->rd, .pg = a->pg, .s = a->s, 1854 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM, 1855 }; 1856 return trans_AND_pppp(s, &alt_a); 1857} 1858 1859static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a) 1860{ 1861 return do_mov_p(s, a->rd, FFR_PRED_NUM); 1862} 1863 1864static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a) 1865{ 1866 return do_mov_p(s, FFR_PRED_NUM, a->rn); 1867} 1868 1869static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, 1870 void (*gen_fn)(TCGv_i32, TCGv_ptr, 1871 TCGv_ptr, TCGv_i32)) 1872{ 1873 if (!sve_access_check(s)) { 1874 return true; 1875 } 1876 1877 TCGv_ptr t_pd = tcg_temp_new_ptr(); 1878 TCGv_ptr t_pg = tcg_temp_new_ptr(); 1879 TCGv_i32 t; 1880 unsigned desc = 0; 1881 1882 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 1883 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 1884 1885 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd)); 1886 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn)); 1887 t = tcg_const_i32(desc); 1888 1889 gen_fn(t, t_pd, t_pg, t); 1890 tcg_temp_free_ptr(t_pd); 1891 tcg_temp_free_ptr(t_pg); 1892 1893 do_pred_flags(t); 1894 tcg_temp_free_i32(t); 1895 return true; 1896} 1897 1898static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a) 1899{ 1900 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst); 1901} 1902 1903static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a) 1904{ 1905 return do_pfirst_pnext(s, a, gen_helper_sve_pnext); 1906} 1907 1908/* 1909 *** SVE Element Count Group 1910 */ 1911 1912/* Perform an inline saturating addition of a 32-bit value within 1913 * a 64-bit register. The second operand is known to be positive, 1914 * which halves the comparisions we must perform to bound the result. 1915 */ 1916static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1917{ 1918 int64_t ibound; 1919 TCGv_i64 bound; 1920 TCGCond cond; 1921 1922 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 1923 if (u) { 1924 tcg_gen_ext32u_i64(reg, reg); 1925 } else { 1926 tcg_gen_ext32s_i64(reg, reg); 1927 } 1928 if (d) { 1929 tcg_gen_sub_i64(reg, reg, val); 1930 ibound = (u ? 0 : INT32_MIN); 1931 cond = TCG_COND_LT; 1932 } else { 1933 tcg_gen_add_i64(reg, reg, val); 1934 ibound = (u ? UINT32_MAX : INT32_MAX); 1935 cond = TCG_COND_GT; 1936 } 1937 bound = tcg_const_i64(ibound); 1938 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg); 1939 tcg_temp_free_i64(bound); 1940} 1941 1942/* Similarly with 64-bit values. */ 1943static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) 1944{ 1945 TCGv_i64 t0 = tcg_temp_new_i64(); 1946 TCGv_i64 t1 = tcg_temp_new_i64(); 1947 TCGv_i64 t2; 1948 1949 if (u) { 1950 if (d) { 1951 tcg_gen_sub_i64(t0, reg, val); 1952 tcg_gen_movi_i64(t1, 0); 1953 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0); 1954 } else { 1955 tcg_gen_add_i64(t0, reg, val); 1956 tcg_gen_movi_i64(t1, -1); 1957 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0); 1958 } 1959 } else { 1960 if (d) { 1961 /* Detect signed overflow for subtraction. */ 1962 tcg_gen_xor_i64(t0, reg, val); 1963 tcg_gen_sub_i64(t1, reg, val); 1964 tcg_gen_xor_i64(reg, reg, t1); 1965 tcg_gen_and_i64(t0, t0, reg); 1966 1967 /* Bound the result. */ 1968 tcg_gen_movi_i64(reg, INT64_MIN); 1969 t2 = tcg_const_i64(0); 1970 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1); 1971 } else { 1972 /* Detect signed overflow for addition. */ 1973 tcg_gen_xor_i64(t0, reg, val); 1974 tcg_gen_add_i64(reg, reg, val); 1975 tcg_gen_xor_i64(t1, reg, val); 1976 tcg_gen_andc_i64(t0, t1, t0); 1977 1978 /* Bound the result. */ 1979 tcg_gen_movi_i64(t1, INT64_MAX); 1980 t2 = tcg_const_i64(0); 1981 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg); 1982 } 1983 tcg_temp_free_i64(t2); 1984 } 1985 tcg_temp_free_i64(t0); 1986 tcg_temp_free_i64(t1); 1987} 1988 1989/* Similarly with a vector and a scalar operand. */ 1990static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, 1991 TCGv_i64 val, bool u, bool d) 1992{ 1993 unsigned vsz = vec_full_reg_size(s); 1994 TCGv_ptr dptr, nptr; 1995 TCGv_i32 t32, desc; 1996 TCGv_i64 t64; 1997 1998 dptr = tcg_temp_new_ptr(); 1999 nptr = tcg_temp_new_ptr(); 2000 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd)); 2001 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn)); 2002 desc = tcg_const_i32(simd_desc(vsz, vsz, 0)); 2003 2004 switch (esz) { 2005 case MO_8: 2006 t32 = tcg_temp_new_i32(); 2007 tcg_gen_extrl_i64_i32(t32, val); 2008 if (d) { 2009 tcg_gen_neg_i32(t32, t32); 2010 } 2011 if (u) { 2012 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc); 2013 } else { 2014 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc); 2015 } 2016 tcg_temp_free_i32(t32); 2017 break; 2018 2019 case MO_16: 2020 t32 = tcg_temp_new_i32(); 2021 tcg_gen_extrl_i64_i32(t32, val); 2022 if (d) { 2023 tcg_gen_neg_i32(t32, t32); 2024 } 2025 if (u) { 2026 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc); 2027 } else { 2028 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc); 2029 } 2030 tcg_temp_free_i32(t32); 2031 break; 2032 2033 case MO_32: 2034 t64 = tcg_temp_new_i64(); 2035 if (d) { 2036 tcg_gen_neg_i64(t64, val); 2037 } else { 2038 tcg_gen_mov_i64(t64, val); 2039 } 2040 if (u) { 2041 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc); 2042 } else { 2043 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc); 2044 } 2045 tcg_temp_free_i64(t64); 2046 break; 2047 2048 case MO_64: 2049 if (u) { 2050 if (d) { 2051 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc); 2052 } else { 2053 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc); 2054 } 2055 } else if (d) { 2056 t64 = tcg_temp_new_i64(); 2057 tcg_gen_neg_i64(t64, val); 2058 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc); 2059 tcg_temp_free_i64(t64); 2060 } else { 2061 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc); 2062 } 2063 break; 2064 2065 default: 2066 g_assert_not_reached(); 2067 } 2068 2069 tcg_temp_free_ptr(dptr); 2070 tcg_temp_free_ptr(nptr); 2071 tcg_temp_free_i32(desc); 2072} 2073 2074static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a) 2075{ 2076 if (sve_access_check(s)) { 2077 unsigned fullsz = vec_full_reg_size(s); 2078 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2079 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm); 2080 } 2081 return true; 2082} 2083 2084static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a) 2085{ 2086 if (sve_access_check(s)) { 2087 unsigned fullsz = vec_full_reg_size(s); 2088 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2089 int inc = numelem * a->imm * (a->d ? -1 : 1); 2090 TCGv_i64 reg = cpu_reg(s, a->rd); 2091 2092 tcg_gen_addi_i64(reg, reg, inc); 2093 } 2094 return true; 2095} 2096 2097static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a) 2098{ 2099 if (!sve_access_check(s)) { 2100 return true; 2101 } 2102 2103 unsigned fullsz = vec_full_reg_size(s); 2104 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2105 int inc = numelem * a->imm; 2106 TCGv_i64 reg = cpu_reg(s, a->rd); 2107 2108 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ 2109 if (inc == 0) { 2110 if (a->u) { 2111 tcg_gen_ext32u_i64(reg, reg); 2112 } else { 2113 tcg_gen_ext32s_i64(reg, reg); 2114 } 2115 } else { 2116 TCGv_i64 t = tcg_const_i64(inc); 2117 do_sat_addsub_32(reg, t, a->u, a->d); 2118 tcg_temp_free_i64(t); 2119 } 2120 return true; 2121} 2122 2123static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a) 2124{ 2125 if (!sve_access_check(s)) { 2126 return true; 2127 } 2128 2129 unsigned fullsz = vec_full_reg_size(s); 2130 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2131 int inc = numelem * a->imm; 2132 TCGv_i64 reg = cpu_reg(s, a->rd); 2133 2134 if (inc != 0) { 2135 TCGv_i64 t = tcg_const_i64(inc); 2136 do_sat_addsub_64(reg, t, a->u, a->d); 2137 tcg_temp_free_i64(t); 2138 } 2139 return true; 2140} 2141 2142static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2143{ 2144 if (a->esz == 0) { 2145 return false; 2146 } 2147 2148 unsigned fullsz = vec_full_reg_size(s); 2149 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2150 int inc = numelem * a->imm; 2151 2152 if (inc != 0) { 2153 if (sve_access_check(s)) { 2154 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc); 2155 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd), 2156 vec_full_reg_offset(s, a->rn), 2157 t, fullsz, fullsz); 2158 tcg_temp_free_i64(t); 2159 } 2160 } else { 2161 do_mov_z(s, a->rd, a->rn); 2162 } 2163 return true; 2164} 2165 2166static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a) 2167{ 2168 if (a->esz == 0) { 2169 return false; 2170 } 2171 2172 unsigned fullsz = vec_full_reg_size(s); 2173 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); 2174 int inc = numelem * a->imm; 2175 2176 if (inc != 0) { 2177 if (sve_access_check(s)) { 2178 TCGv_i64 t = tcg_const_i64(inc); 2179 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d); 2180 tcg_temp_free_i64(t); 2181 } 2182 } else { 2183 do_mov_z(s, a->rd, a->rn); 2184 } 2185 return true; 2186} 2187 2188/* 2189 *** SVE Bitwise Immediate Group 2190 */ 2191 2192static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn) 2193{ 2194 uint64_t imm; 2195 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2196 extract32(a->dbm, 0, 6), 2197 extract32(a->dbm, 6, 6))) { 2198 return false; 2199 } 2200 if (sve_access_check(s)) { 2201 unsigned vsz = vec_full_reg_size(s); 2202 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd), 2203 vec_full_reg_offset(s, a->rn), imm, vsz, vsz); 2204 } 2205 return true; 2206} 2207 2208static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a) 2209{ 2210 return do_zz_dbm(s, a, tcg_gen_gvec_andi); 2211} 2212 2213static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a) 2214{ 2215 return do_zz_dbm(s, a, tcg_gen_gvec_ori); 2216} 2217 2218static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a) 2219{ 2220 return do_zz_dbm(s, a, tcg_gen_gvec_xori); 2221} 2222 2223static bool trans_DUPM(DisasContext *s, arg_DUPM *a) 2224{ 2225 uint64_t imm; 2226 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 2227 extract32(a->dbm, 0, 6), 2228 extract32(a->dbm, 6, 6))) { 2229 return false; 2230 } 2231 if (sve_access_check(s)) { 2232 do_dupi_z(s, a->rd, imm); 2233 } 2234 return true; 2235} 2236 2237/* 2238 *** SVE Integer Wide Immediate - Predicated Group 2239 */ 2240 2241/* Implement all merging copies. This is used for CPY (immediate), 2242 * FCPY, CPY (scalar), CPY (SIMD&FP scalar). 2243 */ 2244static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg, 2245 TCGv_i64 val) 2246{ 2247 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2248 static gen_cpy * const fns[4] = { 2249 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h, 2250 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d, 2251 }; 2252 unsigned vsz = vec_full_reg_size(s); 2253 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0)); 2254 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2255 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2256 TCGv_ptr t_pg = tcg_temp_new_ptr(); 2257 2258 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd)); 2259 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn)); 2260 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 2261 2262 fns[esz](t_zd, t_zn, t_pg, val, desc); 2263 2264 tcg_temp_free_ptr(t_zd); 2265 tcg_temp_free_ptr(t_zn); 2266 tcg_temp_free_ptr(t_pg); 2267 tcg_temp_free_i32(desc); 2268} 2269 2270static bool trans_FCPY(DisasContext *s, arg_FCPY *a) 2271{ 2272 if (a->esz == 0) { 2273 return false; 2274 } 2275 if (sve_access_check(s)) { 2276 /* Decode the VFP immediate. */ 2277 uint64_t imm = vfp_expand_imm(a->esz, a->imm); 2278 TCGv_i64 t_imm = tcg_const_i64(imm); 2279 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm); 2280 tcg_temp_free_i64(t_imm); 2281 } 2282 return true; 2283} 2284 2285static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a) 2286{ 2287 if (a->esz == 0 && extract32(s->insn, 13, 1)) { 2288 return false; 2289 } 2290 if (sve_access_check(s)) { 2291 TCGv_i64 t_imm = tcg_const_i64(a->imm); 2292 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm); 2293 tcg_temp_free_i64(t_imm); 2294 } 2295 return true; 2296} 2297 2298static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a) 2299{ 2300 static gen_helper_gvec_2i * const fns[4] = { 2301 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h, 2302 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d, 2303 }; 2304 2305 if (a->esz == 0 && extract32(s->insn, 13, 1)) { 2306 return false; 2307 } 2308 if (sve_access_check(s)) { 2309 unsigned vsz = vec_full_reg_size(s); 2310 TCGv_i64 t_imm = tcg_const_i64(a->imm); 2311 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 2312 pred_full_reg_offset(s, a->pg), 2313 t_imm, vsz, vsz, 0, fns[a->esz]); 2314 tcg_temp_free_i64(t_imm); 2315 } 2316 return true; 2317} 2318 2319/* 2320 *** SVE Permute Extract Group 2321 */ 2322 2323static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) 2324{ 2325 if (!sve_access_check(s)) { 2326 return true; 2327 } 2328 2329 unsigned vsz = vec_full_reg_size(s); 2330 unsigned n_ofs = imm >= vsz ? 0 : imm; 2331 unsigned n_siz = vsz - n_ofs; 2332 unsigned d = vec_full_reg_offset(s, rd); 2333 unsigned n = vec_full_reg_offset(s, rn); 2334 unsigned m = vec_full_reg_offset(s, rm); 2335 2336 /* Use host vector move insns if we have appropriate sizes 2337 * and no unfortunate overlap. 2338 */ 2339 if (m != d 2340 && n_ofs == size_for_gvec(n_ofs) 2341 && n_siz == size_for_gvec(n_siz) 2342 && (d != n || n_siz <= n_ofs)) { 2343 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz); 2344 if (n_ofs != 0) { 2345 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs); 2346 } 2347 } else { 2348 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext); 2349 } 2350 return true; 2351} 2352 2353static bool trans_EXT(DisasContext *s, arg_EXT *a) 2354{ 2355 return do_EXT(s, a->rd, a->rn, a->rm, a->imm); 2356} 2357 2358static bool trans_EXT_sve2(DisasContext *s, arg_rri *a) 2359{ 2360 if (!dc_isar_feature(aa64_sve2, s)) { 2361 return false; 2362 } 2363 return do_EXT(s, a->rd, a->rn, (a->rn + 1) % 32, a->imm); 2364} 2365 2366/* 2367 *** SVE Permute - Unpredicated Group 2368 */ 2369 2370static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a) 2371{ 2372 if (sve_access_check(s)) { 2373 unsigned vsz = vec_full_reg_size(s); 2374 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd), 2375 vsz, vsz, cpu_reg_sp(s, a->rn)); 2376 } 2377 return true; 2378} 2379 2380static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) 2381{ 2382 if ((a->imm & 0x1f) == 0) { 2383 return false; 2384 } 2385 if (sve_access_check(s)) { 2386 unsigned vsz = vec_full_reg_size(s); 2387 unsigned dofs = vec_full_reg_offset(s, a->rd); 2388 unsigned esz, index; 2389 2390 esz = ctz32(a->imm); 2391 index = a->imm >> (esz + 1); 2392 2393 if ((index << esz) < vsz) { 2394 unsigned nofs = vec_reg_offset(s, a->rn, index, esz); 2395 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); 2396 } else { 2397 /* 2398 * While dup_mem handles 128-bit elements, dup_imm does not. 2399 * Thankfully element size doesn't matter for splatting zero. 2400 */ 2401 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 2402 } 2403 } 2404 return true; 2405} 2406 2407static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) 2408{ 2409 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32); 2410 static gen_insr * const fns[4] = { 2411 gen_helper_sve_insr_b, gen_helper_sve_insr_h, 2412 gen_helper_sve_insr_s, gen_helper_sve_insr_d, 2413 }; 2414 unsigned vsz = vec_full_reg_size(s); 2415 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0)); 2416 TCGv_ptr t_zd = tcg_temp_new_ptr(); 2417 TCGv_ptr t_zn = tcg_temp_new_ptr(); 2418 2419 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd)); 2420 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 2421 2422 fns[a->esz](t_zd, t_zn, val, desc); 2423 2424 tcg_temp_free_ptr(t_zd); 2425 tcg_temp_free_ptr(t_zn); 2426 tcg_temp_free_i32(desc); 2427} 2428 2429static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a) 2430{ 2431 if (sve_access_check(s)) { 2432 TCGv_i64 t = tcg_temp_new_i64(); 2433 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64)); 2434 do_insr_i64(s, a, t); 2435 tcg_temp_free_i64(t); 2436 } 2437 return true; 2438} 2439 2440static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a) 2441{ 2442 if (sve_access_check(s)) { 2443 do_insr_i64(s, a, cpu_reg(s, a->rm)); 2444 } 2445 return true; 2446} 2447 2448static bool trans_REV_v(DisasContext *s, arg_rr_esz *a) 2449{ 2450 static gen_helper_gvec_2 * const fns[4] = { 2451 gen_helper_sve_rev_b, gen_helper_sve_rev_h, 2452 gen_helper_sve_rev_s, gen_helper_sve_rev_d 2453 }; 2454 2455 if (sve_access_check(s)) { 2456 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0); 2457 } 2458 return true; 2459} 2460 2461static bool trans_TBL(DisasContext *s, arg_rrr_esz *a) 2462{ 2463 static gen_helper_gvec_3 * const fns[4] = { 2464 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h, 2465 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d 2466 }; 2467 2468 if (sve_access_check(s)) { 2469 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0); 2470 } 2471 return true; 2472} 2473 2474static bool trans_TBL_sve2(DisasContext *s, arg_rrr_esz *a) 2475{ 2476 static gen_helper_gvec_4 * const fns[4] = { 2477 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h, 2478 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d 2479 }; 2480 2481 if (!dc_isar_feature(aa64_sve2, s)) { 2482 return false; 2483 } 2484 if (sve_access_check(s)) { 2485 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, 2486 (a->rn + 1) % 32, a->rm, 0); 2487 } 2488 return true; 2489} 2490 2491static bool trans_TBX(DisasContext *s, arg_rrr_esz *a) 2492{ 2493 static gen_helper_gvec_3 * const fns[4] = { 2494 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, 2495 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d 2496 }; 2497 2498 if (!dc_isar_feature(aa64_sve2, s)) { 2499 return false; 2500 } 2501 if (sve_access_check(s)) { 2502 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0); 2503 } 2504 return true; 2505} 2506 2507static bool trans_UNPK(DisasContext *s, arg_UNPK *a) 2508{ 2509 static gen_helper_gvec_2 * const fns[4][2] = { 2510 { NULL, NULL }, 2511 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h }, 2512 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s }, 2513 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d }, 2514 }; 2515 2516 if (a->esz == 0) { 2517 return false; 2518 } 2519 if (sve_access_check(s)) { 2520 unsigned vsz = vec_full_reg_size(s); 2521 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd), 2522 vec_full_reg_offset(s, a->rn) 2523 + (a->h ? vsz / 2 : 0), 2524 vsz, vsz, 0, fns[a->esz][a->u]); 2525 } 2526 return true; 2527} 2528 2529/* 2530 *** SVE Permute - Predicates Group 2531 */ 2532 2533static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd, 2534 gen_helper_gvec_3 *fn) 2535{ 2536 if (!sve_access_check(s)) { 2537 return true; 2538 } 2539 2540 unsigned vsz = pred_full_reg_size(s); 2541 2542 TCGv_ptr t_d = tcg_temp_new_ptr(); 2543 TCGv_ptr t_n = tcg_temp_new_ptr(); 2544 TCGv_ptr t_m = tcg_temp_new_ptr(); 2545 TCGv_i32 t_desc; 2546 uint32_t desc = 0; 2547 2548 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2549 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2550 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2551 2552 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd)); 2553 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn)); 2554 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm)); 2555 t_desc = tcg_const_i32(desc); 2556 2557 fn(t_d, t_n, t_m, t_desc); 2558 2559 tcg_temp_free_ptr(t_d); 2560 tcg_temp_free_ptr(t_n); 2561 tcg_temp_free_ptr(t_m); 2562 tcg_temp_free_i32(t_desc); 2563 return true; 2564} 2565 2566static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd, 2567 gen_helper_gvec_2 *fn) 2568{ 2569 if (!sve_access_check(s)) { 2570 return true; 2571 } 2572 2573 unsigned vsz = pred_full_reg_size(s); 2574 TCGv_ptr t_d = tcg_temp_new_ptr(); 2575 TCGv_ptr t_n = tcg_temp_new_ptr(); 2576 TCGv_i32 t_desc; 2577 uint32_t desc = 0; 2578 2579 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd)); 2580 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn)); 2581 2582 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); 2583 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 2584 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); 2585 t_desc = tcg_const_i32(desc); 2586 2587 fn(t_d, t_n, t_desc); 2588 2589 tcg_temp_free_i32(t_desc); 2590 tcg_temp_free_ptr(t_d); 2591 tcg_temp_free_ptr(t_n); 2592 return true; 2593} 2594 2595static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a) 2596{ 2597 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p); 2598} 2599 2600static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a) 2601{ 2602 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p); 2603} 2604 2605static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a) 2606{ 2607 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p); 2608} 2609 2610static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a) 2611{ 2612 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p); 2613} 2614 2615static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a) 2616{ 2617 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p); 2618} 2619 2620static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a) 2621{ 2622 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p); 2623} 2624 2625static bool trans_REV_p(DisasContext *s, arg_rr_esz *a) 2626{ 2627 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p); 2628} 2629 2630static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a) 2631{ 2632 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p); 2633} 2634 2635static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a) 2636{ 2637 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p); 2638} 2639 2640/* 2641 *** SVE Permute - Interleaving Group 2642 */ 2643 2644static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high) 2645{ 2646 static gen_helper_gvec_3 * const fns[4] = { 2647 gen_helper_sve_zip_b, gen_helper_sve_zip_h, 2648 gen_helper_sve_zip_s, gen_helper_sve_zip_d, 2649 }; 2650 2651 if (sve_access_check(s)) { 2652 unsigned vsz = vec_full_reg_size(s); 2653 unsigned high_ofs = high ? vsz / 2 : 0; 2654 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), 2655 vec_full_reg_offset(s, a->rn) + high_ofs, 2656 vec_full_reg_offset(s, a->rm) + high_ofs, 2657 vsz, vsz, 0, fns[a->esz]); 2658 } 2659 return true; 2660} 2661 2662static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data, 2663 gen_helper_gvec_3 *fn) 2664{ 2665 if (sve_access_check(s)) { 2666 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); 2667 } 2668 return true; 2669} 2670 2671static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a) 2672{ 2673 return do_zip(s, a, false); 2674} 2675 2676static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a) 2677{ 2678 return do_zip(s, a, true); 2679} 2680 2681static bool do_zip_q(DisasContext *s, arg_rrr_esz *a, bool high) 2682{ 2683 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 2684 return false; 2685 } 2686 if (sve_access_check(s)) { 2687 unsigned vsz = vec_full_reg_size(s); 2688 unsigned high_ofs = high ? QEMU_ALIGN_DOWN(vsz, 32) / 2 : 0; 2689 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), 2690 vec_full_reg_offset(s, a->rn) + high_ofs, 2691 vec_full_reg_offset(s, a->rm) + high_ofs, 2692 vsz, vsz, 0, gen_helper_sve2_zip_q); 2693 } 2694 return true; 2695} 2696 2697static bool trans_ZIP1_q(DisasContext *s, arg_rrr_esz *a) 2698{ 2699 return do_zip_q(s, a, false); 2700} 2701 2702static bool trans_ZIP2_q(DisasContext *s, arg_rrr_esz *a) 2703{ 2704 return do_zip_q(s, a, true); 2705} 2706 2707static gen_helper_gvec_3 * const uzp_fns[4] = { 2708 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, 2709 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, 2710}; 2711 2712static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a) 2713{ 2714 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]); 2715} 2716 2717static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a) 2718{ 2719 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]); 2720} 2721 2722static bool trans_UZP1_q(DisasContext *s, arg_rrr_esz *a) 2723{ 2724 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 2725 return false; 2726 } 2727 return do_zzz_data_ool(s, a, 0, gen_helper_sve2_uzp_q); 2728} 2729 2730static bool trans_UZP2_q(DisasContext *s, arg_rrr_esz *a) 2731{ 2732 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 2733 return false; 2734 } 2735 return do_zzz_data_ool(s, a, 16, gen_helper_sve2_uzp_q); 2736} 2737 2738static gen_helper_gvec_3 * const trn_fns[4] = { 2739 gen_helper_sve_trn_b, gen_helper_sve_trn_h, 2740 gen_helper_sve_trn_s, gen_helper_sve_trn_d, 2741}; 2742 2743static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a) 2744{ 2745 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]); 2746} 2747 2748static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a) 2749{ 2750 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]); 2751} 2752 2753static bool trans_TRN1_q(DisasContext *s, arg_rrr_esz *a) 2754{ 2755 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 2756 return false; 2757 } 2758 return do_zzz_data_ool(s, a, 0, gen_helper_sve2_trn_q); 2759} 2760 2761static bool trans_TRN2_q(DisasContext *s, arg_rrr_esz *a) 2762{ 2763 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 2764 return false; 2765 } 2766 return do_zzz_data_ool(s, a, 16, gen_helper_sve2_trn_q); 2767} 2768 2769/* 2770 *** SVE Permute Vector - Predicated Group 2771 */ 2772 2773static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a) 2774{ 2775 static gen_helper_gvec_3 * const fns[4] = { 2776 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d 2777 }; 2778 return do_zpz_ool(s, a, fns[a->esz]); 2779} 2780 2781/* Call the helper that computes the ARM LastActiveElement pseudocode 2782 * function, scaled by the element size. This includes the not found 2783 * indication; e.g. not found for esz=3 is -8. 2784 */ 2785static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) 2786{ 2787 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot 2788 * round up, as we do elsewhere, because we need the exact size. 2789 */ 2790 TCGv_ptr t_p = tcg_temp_new_ptr(); 2791 TCGv_i32 t_desc; 2792 unsigned desc = 0; 2793 2794 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); 2795 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 2796 2797 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg)); 2798 t_desc = tcg_const_i32(desc); 2799 2800 gen_helper_sve_last_active_element(ret, t_p, t_desc); 2801 2802 tcg_temp_free_i32(t_desc); 2803 tcg_temp_free_ptr(t_p); 2804} 2805 2806/* Increment LAST to the offset of the next element in the vector, 2807 * wrapping around to 0. 2808 */ 2809static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz) 2810{ 2811 unsigned vsz = vec_full_reg_size(s); 2812 2813 tcg_gen_addi_i32(last, last, 1 << esz); 2814 if (is_power_of_2(vsz)) { 2815 tcg_gen_andi_i32(last, last, vsz - 1); 2816 } else { 2817 TCGv_i32 max = tcg_const_i32(vsz); 2818 TCGv_i32 zero = tcg_const_i32(0); 2819 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last); 2820 tcg_temp_free_i32(max); 2821 tcg_temp_free_i32(zero); 2822 } 2823} 2824 2825/* If LAST < 0, set LAST to the offset of the last element in the vector. */ 2826static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz) 2827{ 2828 unsigned vsz = vec_full_reg_size(s); 2829 2830 if (is_power_of_2(vsz)) { 2831 tcg_gen_andi_i32(last, last, vsz - 1); 2832 } else { 2833 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz)); 2834 TCGv_i32 zero = tcg_const_i32(0); 2835 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last); 2836 tcg_temp_free_i32(max); 2837 tcg_temp_free_i32(zero); 2838 } 2839} 2840 2841/* Load an unsigned element of ESZ from BASE+OFS. */ 2842static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz) 2843{ 2844 TCGv_i64 r = tcg_temp_new_i64(); 2845 2846 switch (esz) { 2847 case 0: 2848 tcg_gen_ld8u_i64(r, base, ofs); 2849 break; 2850 case 1: 2851 tcg_gen_ld16u_i64(r, base, ofs); 2852 break; 2853 case 2: 2854 tcg_gen_ld32u_i64(r, base, ofs); 2855 break; 2856 case 3: 2857 tcg_gen_ld_i64(r, base, ofs); 2858 break; 2859 default: 2860 g_assert_not_reached(); 2861 } 2862 return r; 2863} 2864 2865/* Load an unsigned element of ESZ from RM[LAST]. */ 2866static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last, 2867 int rm, int esz) 2868{ 2869 TCGv_ptr p = tcg_temp_new_ptr(); 2870 TCGv_i64 r; 2871 2872 /* Convert offset into vector into offset into ENV. 2873 * The final adjustment for the vector register base 2874 * is added via constant offset to the load. 2875 */ 2876#ifdef HOST_WORDS_BIGENDIAN 2877 /* Adjust for element ordering. See vec_reg_offset. */ 2878 if (esz < 3) { 2879 tcg_gen_xori_i32(last, last, 8 - (1 << esz)); 2880 } 2881#endif 2882 tcg_gen_ext_i32_ptr(p, last); 2883 tcg_gen_add_ptr(p, p, cpu_env); 2884 2885 r = load_esz(p, vec_full_reg_offset(s, rm), esz); 2886 tcg_temp_free_ptr(p); 2887 2888 return r; 2889} 2890 2891/* Compute CLAST for a Zreg. */ 2892static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) 2893{ 2894 TCGv_i32 last; 2895 TCGLabel *over; 2896 TCGv_i64 ele; 2897 unsigned vsz, esz = a->esz; 2898 2899 if (!sve_access_check(s)) { 2900 return true; 2901 } 2902 2903 last = tcg_temp_local_new_i32(); 2904 over = gen_new_label(); 2905 2906 find_last_active(s, last, esz, a->pg); 2907 2908 /* There is of course no movcond for a 2048-bit vector, 2909 * so we must branch over the actual store. 2910 */ 2911 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over); 2912 2913 if (!before) { 2914 incr_last_active(s, last, esz); 2915 } 2916 2917 ele = load_last_active(s, last, a->rm, esz); 2918 tcg_temp_free_i32(last); 2919 2920 vsz = vec_full_reg_size(s); 2921 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele); 2922 tcg_temp_free_i64(ele); 2923 2924 /* If this insn used MOVPRFX, we may need a second move. */ 2925 if (a->rd != a->rn) { 2926 TCGLabel *done = gen_new_label(); 2927 tcg_gen_br(done); 2928 2929 gen_set_label(over); 2930 do_mov_z(s, a->rd, a->rn); 2931 2932 gen_set_label(done); 2933 } else { 2934 gen_set_label(over); 2935 } 2936 return true; 2937} 2938 2939static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a) 2940{ 2941 return do_clast_vector(s, a, false); 2942} 2943 2944static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a) 2945{ 2946 return do_clast_vector(s, a, true); 2947} 2948 2949/* Compute CLAST for a scalar. */ 2950static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm, 2951 bool before, TCGv_i64 reg_val) 2952{ 2953 TCGv_i32 last = tcg_temp_new_i32(); 2954 TCGv_i64 ele, cmp, zero; 2955 2956 find_last_active(s, last, esz, pg); 2957 2958 /* Extend the original value of last prior to incrementing. */ 2959 cmp = tcg_temp_new_i64(); 2960 tcg_gen_ext_i32_i64(cmp, last); 2961 2962 if (!before) { 2963 incr_last_active(s, last, esz); 2964 } 2965 2966 /* The conceit here is that while last < 0 indicates not found, after 2967 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address 2968 * from which we can load garbage. We then discard the garbage with 2969 * a conditional move. 2970 */ 2971 ele = load_last_active(s, last, rm, esz); 2972 tcg_temp_free_i32(last); 2973 2974 zero = tcg_const_i64(0); 2975 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val); 2976 2977 tcg_temp_free_i64(zero); 2978 tcg_temp_free_i64(cmp); 2979 tcg_temp_free_i64(ele); 2980} 2981 2982/* Compute CLAST for a Vreg. */ 2983static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before) 2984{ 2985 if (sve_access_check(s)) { 2986 int esz = a->esz; 2987 int ofs = vec_reg_offset(s, a->rd, 0, esz); 2988 TCGv_i64 reg = load_esz(cpu_env, ofs, esz); 2989 2990 do_clast_scalar(s, esz, a->pg, a->rn, before, reg); 2991 write_fp_dreg(s, a->rd, reg); 2992 tcg_temp_free_i64(reg); 2993 } 2994 return true; 2995} 2996 2997static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a) 2998{ 2999 return do_clast_fp(s, a, false); 3000} 3001 3002static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a) 3003{ 3004 return do_clast_fp(s, a, true); 3005} 3006 3007/* Compute CLAST for a Xreg. */ 3008static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before) 3009{ 3010 TCGv_i64 reg; 3011 3012 if (!sve_access_check(s)) { 3013 return true; 3014 } 3015 3016 reg = cpu_reg(s, a->rd); 3017 switch (a->esz) { 3018 case 0: 3019 tcg_gen_ext8u_i64(reg, reg); 3020 break; 3021 case 1: 3022 tcg_gen_ext16u_i64(reg, reg); 3023 break; 3024 case 2: 3025 tcg_gen_ext32u_i64(reg, reg); 3026 break; 3027 case 3: 3028 break; 3029 default: 3030 g_assert_not_reached(); 3031 } 3032 3033 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg); 3034 return true; 3035} 3036 3037static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a) 3038{ 3039 return do_clast_general(s, a, false); 3040} 3041 3042static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a) 3043{ 3044 return do_clast_general(s, a, true); 3045} 3046 3047/* Compute LAST for a scalar. */ 3048static TCGv_i64 do_last_scalar(DisasContext *s, int esz, 3049 int pg, int rm, bool before) 3050{ 3051 TCGv_i32 last = tcg_temp_new_i32(); 3052 TCGv_i64 ret; 3053 3054 find_last_active(s, last, esz, pg); 3055 if (before) { 3056 wrap_last_active(s, last, esz); 3057 } else { 3058 incr_last_active(s, last, esz); 3059 } 3060 3061 ret = load_last_active(s, last, rm, esz); 3062 tcg_temp_free_i32(last); 3063 return ret; 3064} 3065 3066/* Compute LAST for a Vreg. */ 3067static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before) 3068{ 3069 if (sve_access_check(s)) { 3070 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 3071 write_fp_dreg(s, a->rd, val); 3072 tcg_temp_free_i64(val); 3073 } 3074 return true; 3075} 3076 3077static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a) 3078{ 3079 return do_last_fp(s, a, false); 3080} 3081 3082static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a) 3083{ 3084 return do_last_fp(s, a, true); 3085} 3086 3087/* Compute LAST for a Xreg. */ 3088static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before) 3089{ 3090 if (sve_access_check(s)) { 3091 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); 3092 tcg_gen_mov_i64(cpu_reg(s, a->rd), val); 3093 tcg_temp_free_i64(val); 3094 } 3095 return true; 3096} 3097 3098static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a) 3099{ 3100 return do_last_general(s, a, false); 3101} 3102 3103static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a) 3104{ 3105 return do_last_general(s, a, true); 3106} 3107 3108static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a) 3109{ 3110 if (sve_access_check(s)) { 3111 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn)); 3112 } 3113 return true; 3114} 3115 3116static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a) 3117{ 3118 if (sve_access_check(s)) { 3119 int ofs = vec_reg_offset(s, a->rn, 0, a->esz); 3120 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz); 3121 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t); 3122 tcg_temp_free_i64(t); 3123 } 3124 return true; 3125} 3126 3127static bool trans_REVB(DisasContext *s, arg_rpr_esz *a) 3128{ 3129 static gen_helper_gvec_3 * const fns[4] = { 3130 NULL, 3131 gen_helper_sve_revb_h, 3132 gen_helper_sve_revb_s, 3133 gen_helper_sve_revb_d, 3134 }; 3135 return do_zpz_ool(s, a, fns[a->esz]); 3136} 3137 3138static bool trans_REVH(DisasContext *s, arg_rpr_esz *a) 3139{ 3140 static gen_helper_gvec_3 * const fns[4] = { 3141 NULL, 3142 NULL, 3143 gen_helper_sve_revh_s, 3144 gen_helper_sve_revh_d, 3145 }; 3146 return do_zpz_ool(s, a, fns[a->esz]); 3147} 3148 3149static bool trans_REVW(DisasContext *s, arg_rpr_esz *a) 3150{ 3151 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL); 3152} 3153 3154static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a) 3155{ 3156 static gen_helper_gvec_3 * const fns[4] = { 3157 gen_helper_sve_rbit_b, 3158 gen_helper_sve_rbit_h, 3159 gen_helper_sve_rbit_s, 3160 gen_helper_sve_rbit_d, 3161 }; 3162 return do_zpz_ool(s, a, fns[a->esz]); 3163} 3164 3165static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a) 3166{ 3167 if (sve_access_check(s)) { 3168 gen_gvec_ool_zzzp(s, gen_helper_sve_splice, 3169 a->rd, a->rn, a->rm, a->pg, a->esz); 3170 } 3171 return true; 3172} 3173 3174static bool trans_SPLICE_sve2(DisasContext *s, arg_rpr_esz *a) 3175{ 3176 if (!dc_isar_feature(aa64_sve2, s)) { 3177 return false; 3178 } 3179 if (sve_access_check(s)) { 3180 gen_gvec_ool_zzzp(s, gen_helper_sve_splice, 3181 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz); 3182 } 3183 return true; 3184} 3185 3186/* 3187 *** SVE Integer Compare - Vectors Group 3188 */ 3189 3190static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a, 3191 gen_helper_gvec_flags_4 *gen_fn) 3192{ 3193 TCGv_ptr pd, zn, zm, pg; 3194 unsigned vsz; 3195 TCGv_i32 t; 3196 3197 if (gen_fn == NULL) { 3198 return false; 3199 } 3200 if (!sve_access_check(s)) { 3201 return true; 3202 } 3203 3204 vsz = vec_full_reg_size(s); 3205 t = tcg_const_i32(simd_desc(vsz, vsz, 0)); 3206 pd = tcg_temp_new_ptr(); 3207 zn = tcg_temp_new_ptr(); 3208 zm = tcg_temp_new_ptr(); 3209 pg = tcg_temp_new_ptr(); 3210 3211 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd)); 3212 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn)); 3213 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm)); 3214 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); 3215 3216 gen_fn(t, pd, zn, zm, pg, t); 3217 3218 tcg_temp_free_ptr(pd); 3219 tcg_temp_free_ptr(zn); 3220 tcg_temp_free_ptr(zm); 3221 tcg_temp_free_ptr(pg); 3222 3223 do_pred_flags(t); 3224 3225 tcg_temp_free_i32(t); 3226 return true; 3227} 3228 3229#define DO_PPZZ(NAME, name) \ 3230static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \ 3231{ \ 3232 static gen_helper_gvec_flags_4 * const fns[4] = { \ 3233 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \ 3234 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \ 3235 }; \ 3236 return do_ppzz_flags(s, a, fns[a->esz]); \ 3237} 3238 3239DO_PPZZ(CMPEQ, cmpeq) 3240DO_PPZZ(CMPNE, cmpne) 3241DO_PPZZ(CMPGT, cmpgt) 3242DO_PPZZ(CMPGE, cmpge) 3243DO_PPZZ(CMPHI, cmphi) 3244DO_PPZZ(CMPHS, cmphs) 3245 3246#undef DO_PPZZ 3247 3248#define DO_PPZW(NAME, name) \ 3249static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \ 3250{ \ 3251 static gen_helper_gvec_flags_4 * const fns[4] = { \ 3252 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \ 3253 gen_helper_sve_##name##_ppzw_s, NULL \ 3254 }; \ 3255 return do_ppzz_flags(s, a, fns[a->esz]); \ 3256} 3257 3258DO_PPZW(CMPEQ, cmpeq) 3259DO_PPZW(CMPNE, cmpne) 3260DO_PPZW(CMPGT, cmpgt) 3261DO_PPZW(CMPGE, cmpge) 3262DO_PPZW(CMPHI, cmphi) 3263DO_PPZW(CMPHS, cmphs) 3264DO_PPZW(CMPLT, cmplt) 3265DO_PPZW(CMPLE, cmple) 3266DO_PPZW(CMPLO, cmplo) 3267DO_PPZW(CMPLS, cmpls) 3268 3269#undef DO_PPZW 3270 3271/* 3272 *** SVE Integer Compare - Immediate Groups 3273 */ 3274 3275static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a, 3276 gen_helper_gvec_flags_3 *gen_fn) 3277{ 3278 TCGv_ptr pd, zn, pg; 3279 unsigned vsz; 3280 TCGv_i32 t; 3281 3282 if (gen_fn == NULL) { 3283 return false; 3284 } 3285 if (!sve_access_check(s)) { 3286 return true; 3287 } 3288 3289 vsz = vec_full_reg_size(s); 3290 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm)); 3291 pd = tcg_temp_new_ptr(); 3292 zn = tcg_temp_new_ptr(); 3293 pg = tcg_temp_new_ptr(); 3294 3295 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd)); 3296 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn)); 3297 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); 3298 3299 gen_fn(t, pd, zn, pg, t); 3300 3301 tcg_temp_free_ptr(pd); 3302 tcg_temp_free_ptr(zn); 3303 tcg_temp_free_ptr(pg); 3304 3305 do_pred_flags(t); 3306 3307 tcg_temp_free_i32(t); 3308 return true; 3309} 3310 3311#define DO_PPZI(NAME, name) \ 3312static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \ 3313{ \ 3314 static gen_helper_gvec_flags_3 * const fns[4] = { \ 3315 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \ 3316 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \ 3317 }; \ 3318 return do_ppzi_flags(s, a, fns[a->esz]); \ 3319} 3320 3321DO_PPZI(CMPEQ, cmpeq) 3322DO_PPZI(CMPNE, cmpne) 3323DO_PPZI(CMPGT, cmpgt) 3324DO_PPZI(CMPGE, cmpge) 3325DO_PPZI(CMPHI, cmphi) 3326DO_PPZI(CMPHS, cmphs) 3327DO_PPZI(CMPLT, cmplt) 3328DO_PPZI(CMPLE, cmple) 3329DO_PPZI(CMPLO, cmplo) 3330DO_PPZI(CMPLS, cmpls) 3331 3332#undef DO_PPZI 3333 3334/* 3335 *** SVE Partition Break Group 3336 */ 3337 3338static bool do_brk3(DisasContext *s, arg_rprr_s *a, 3339 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s) 3340{ 3341 if (!sve_access_check(s)) { 3342 return true; 3343 } 3344 3345 unsigned vsz = pred_full_reg_size(s); 3346 3347 /* Predicate sizes may be smaller and cannot use simd_desc. */ 3348 TCGv_ptr d = tcg_temp_new_ptr(); 3349 TCGv_ptr n = tcg_temp_new_ptr(); 3350 TCGv_ptr m = tcg_temp_new_ptr(); 3351 TCGv_ptr g = tcg_temp_new_ptr(); 3352 TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 3353 3354 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); 3355 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); 3356 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm)); 3357 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg)); 3358 3359 if (a->s) { 3360 fn_s(t, d, n, m, g, t); 3361 do_pred_flags(t); 3362 } else { 3363 fn(d, n, m, g, t); 3364 } 3365 tcg_temp_free_ptr(d); 3366 tcg_temp_free_ptr(n); 3367 tcg_temp_free_ptr(m); 3368 tcg_temp_free_ptr(g); 3369 tcg_temp_free_i32(t); 3370 return true; 3371} 3372 3373static bool do_brk2(DisasContext *s, arg_rpr_s *a, 3374 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s) 3375{ 3376 if (!sve_access_check(s)) { 3377 return true; 3378 } 3379 3380 unsigned vsz = pred_full_reg_size(s); 3381 3382 /* Predicate sizes may be smaller and cannot use simd_desc. */ 3383 TCGv_ptr d = tcg_temp_new_ptr(); 3384 TCGv_ptr n = tcg_temp_new_ptr(); 3385 TCGv_ptr g = tcg_temp_new_ptr(); 3386 TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); 3387 3388 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); 3389 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); 3390 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg)); 3391 3392 if (a->s) { 3393 fn_s(t, d, n, g, t); 3394 do_pred_flags(t); 3395 } else { 3396 fn(d, n, g, t); 3397 } 3398 tcg_temp_free_ptr(d); 3399 tcg_temp_free_ptr(n); 3400 tcg_temp_free_ptr(g); 3401 tcg_temp_free_i32(t); 3402 return true; 3403} 3404 3405static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a) 3406{ 3407 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas); 3408} 3409 3410static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a) 3411{ 3412 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs); 3413} 3414 3415static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a) 3416{ 3417 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m); 3418} 3419 3420static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a) 3421{ 3422 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m); 3423} 3424 3425static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a) 3426{ 3427 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z); 3428} 3429 3430static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a) 3431{ 3432 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z); 3433} 3434 3435static bool trans_BRKN(DisasContext *s, arg_rpr_s *a) 3436{ 3437 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns); 3438} 3439 3440/* 3441 *** SVE Predicate Count Group 3442 */ 3443 3444static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) 3445{ 3446 unsigned psz = pred_full_reg_size(s); 3447 3448 if (psz <= 8) { 3449 uint64_t psz_mask; 3450 3451 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn)); 3452 if (pn != pg) { 3453 TCGv_i64 g = tcg_temp_new_i64(); 3454 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg)); 3455 tcg_gen_and_i64(val, val, g); 3456 tcg_temp_free_i64(g); 3457 } 3458 3459 /* Reduce the pred_esz_masks value simply to reduce the 3460 * size of the code generated here. 3461 */ 3462 psz_mask = MAKE_64BIT_MASK(0, psz * 8); 3463 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask); 3464 3465 tcg_gen_ctpop_i64(val, val); 3466 } else { 3467 TCGv_ptr t_pn = tcg_temp_new_ptr(); 3468 TCGv_ptr t_pg = tcg_temp_new_ptr(); 3469 unsigned desc = 0; 3470 TCGv_i32 t_desc; 3471 3472 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); 3473 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); 3474 3475 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn)); 3476 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 3477 t_desc = tcg_const_i32(desc); 3478 3479 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc); 3480 tcg_temp_free_ptr(t_pn); 3481 tcg_temp_free_ptr(t_pg); 3482 tcg_temp_free_i32(t_desc); 3483 } 3484} 3485 3486static bool trans_CNTP(DisasContext *s, arg_CNTP *a) 3487{ 3488 if (sve_access_check(s)) { 3489 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg); 3490 } 3491 return true; 3492} 3493 3494static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) 3495{ 3496 if (sve_access_check(s)) { 3497 TCGv_i64 reg = cpu_reg(s, a->rd); 3498 TCGv_i64 val = tcg_temp_new_i64(); 3499 3500 do_cntp(s, val, a->esz, a->pg, a->pg); 3501 if (a->d) { 3502 tcg_gen_sub_i64(reg, reg, val); 3503 } else { 3504 tcg_gen_add_i64(reg, reg, val); 3505 } 3506 tcg_temp_free_i64(val); 3507 } 3508 return true; 3509} 3510 3511static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3512{ 3513 if (a->esz == 0) { 3514 return false; 3515 } 3516 if (sve_access_check(s)) { 3517 unsigned vsz = vec_full_reg_size(s); 3518 TCGv_i64 val = tcg_temp_new_i64(); 3519 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds; 3520 3521 do_cntp(s, val, a->esz, a->pg, a->pg); 3522 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), 3523 vec_full_reg_offset(s, a->rn), val, vsz, vsz); 3524 } 3525 return true; 3526} 3527 3528static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a) 3529{ 3530 if (sve_access_check(s)) { 3531 TCGv_i64 reg = cpu_reg(s, a->rd); 3532 TCGv_i64 val = tcg_temp_new_i64(); 3533 3534 do_cntp(s, val, a->esz, a->pg, a->pg); 3535 do_sat_addsub_32(reg, val, a->u, a->d); 3536 } 3537 return true; 3538} 3539 3540static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a) 3541{ 3542 if (sve_access_check(s)) { 3543 TCGv_i64 reg = cpu_reg(s, a->rd); 3544 TCGv_i64 val = tcg_temp_new_i64(); 3545 3546 do_cntp(s, val, a->esz, a->pg, a->pg); 3547 do_sat_addsub_64(reg, val, a->u, a->d); 3548 } 3549 return true; 3550} 3551 3552static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a) 3553{ 3554 if (a->esz == 0) { 3555 return false; 3556 } 3557 if (sve_access_check(s)) { 3558 TCGv_i64 val = tcg_temp_new_i64(); 3559 do_cntp(s, val, a->esz, a->pg, a->pg); 3560 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d); 3561 } 3562 return true; 3563} 3564 3565/* 3566 *** SVE Integer Compare Scalars Group 3567 */ 3568 3569static bool trans_CTERM(DisasContext *s, arg_CTERM *a) 3570{ 3571 if (!sve_access_check(s)) { 3572 return true; 3573 } 3574 3575 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ); 3576 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf); 3577 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf); 3578 TCGv_i64 cmp = tcg_temp_new_i64(); 3579 3580 tcg_gen_setcond_i64(cond, cmp, rn, rm); 3581 tcg_gen_extrl_i64_i32(cpu_NF, cmp); 3582 tcg_temp_free_i64(cmp); 3583 3584 /* VF = !NF & !CF. */ 3585 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1); 3586 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF); 3587 3588 /* Both NF and VF actually look at bit 31. */ 3589 tcg_gen_neg_i32(cpu_NF, cpu_NF); 3590 tcg_gen_neg_i32(cpu_VF, cpu_VF); 3591 return true; 3592} 3593 3594static bool trans_WHILE(DisasContext *s, arg_WHILE *a) 3595{ 3596 TCGv_i64 op0, op1, t0, t1, tmax; 3597 TCGv_i32 t2, t3; 3598 TCGv_ptr ptr; 3599 unsigned vsz = vec_full_reg_size(s); 3600 unsigned desc = 0; 3601 TCGCond cond; 3602 uint64_t maxval; 3603 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */ 3604 bool eq = a->eq == a->lt; 3605 3606 /* The greater-than conditions are all SVE2. */ 3607 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) { 3608 return false; 3609 } 3610 if (!sve_access_check(s)) { 3611 return true; 3612 } 3613 3614 op0 = read_cpu_reg(s, a->rn, 1); 3615 op1 = read_cpu_reg(s, a->rm, 1); 3616 3617 if (!a->sf) { 3618 if (a->u) { 3619 tcg_gen_ext32u_i64(op0, op0); 3620 tcg_gen_ext32u_i64(op1, op1); 3621 } else { 3622 tcg_gen_ext32s_i64(op0, op0); 3623 tcg_gen_ext32s_i64(op1, op1); 3624 } 3625 } 3626 3627 /* For the helper, compress the different conditions into a computation 3628 * of how many iterations for which the condition is true. 3629 */ 3630 t0 = tcg_temp_new_i64(); 3631 t1 = tcg_temp_new_i64(); 3632 3633 if (a->lt) { 3634 tcg_gen_sub_i64(t0, op1, op0); 3635 if (a->u) { 3636 maxval = a->sf ? UINT64_MAX : UINT32_MAX; 3637 cond = eq ? TCG_COND_LEU : TCG_COND_LTU; 3638 } else { 3639 maxval = a->sf ? INT64_MAX : INT32_MAX; 3640 cond = eq ? TCG_COND_LE : TCG_COND_LT; 3641 } 3642 } else { 3643 tcg_gen_sub_i64(t0, op0, op1); 3644 if (a->u) { 3645 maxval = 0; 3646 cond = eq ? TCG_COND_GEU : TCG_COND_GTU; 3647 } else { 3648 maxval = a->sf ? INT64_MIN : INT32_MIN; 3649 cond = eq ? TCG_COND_GE : TCG_COND_GT; 3650 } 3651 } 3652 3653 tmax = tcg_const_i64(vsz >> a->esz); 3654 if (eq) { 3655 /* Equality means one more iteration. */ 3656 tcg_gen_addi_i64(t0, t0, 1); 3657 3658 /* 3659 * For the less-than while, if op1 is maxval (and the only time 3660 * the addition above could overflow), then we produce an all-true 3661 * predicate by setting the count to the vector length. This is 3662 * because the pseudocode is described as an increment + compare 3663 * loop, and the maximum integer would always compare true. 3664 * Similarly, the greater-than while has the same issue with the 3665 * minimum integer due to the decrement + compare loop. 3666 */ 3667 tcg_gen_movi_i64(t1, maxval); 3668 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0); 3669 } 3670 3671 /* Bound to the maximum. */ 3672 tcg_gen_umin_i64(t0, t0, tmax); 3673 tcg_temp_free_i64(tmax); 3674 3675 /* Set the count to zero if the condition is false. */ 3676 tcg_gen_movi_i64(t1, 0); 3677 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); 3678 tcg_temp_free_i64(t1); 3679 3680 /* Since we're bounded, pass as a 32-bit type. */ 3681 t2 = tcg_temp_new_i32(); 3682 tcg_gen_extrl_i64_i32(t2, t0); 3683 tcg_temp_free_i64(t0); 3684 3685 /* Scale elements to bits. */ 3686 tcg_gen_shli_i32(t2, t2, a->esz); 3687 3688 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3689 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3690 t3 = tcg_const_i32(desc); 3691 3692 ptr = tcg_temp_new_ptr(); 3693 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); 3694 3695 if (a->lt) { 3696 gen_helper_sve_whilel(t2, ptr, t2, t3); 3697 } else { 3698 gen_helper_sve_whileg(t2, ptr, t2, t3); 3699 } 3700 do_pred_flags(t2); 3701 3702 tcg_temp_free_ptr(ptr); 3703 tcg_temp_free_i32(t2); 3704 tcg_temp_free_i32(t3); 3705 return true; 3706} 3707 3708static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) 3709{ 3710 TCGv_i64 op0, op1, diff, t1, tmax; 3711 TCGv_i32 t2, t3; 3712 TCGv_ptr ptr; 3713 unsigned vsz = vec_full_reg_size(s); 3714 unsigned desc = 0; 3715 3716 if (!dc_isar_feature(aa64_sve2, s)) { 3717 return false; 3718 } 3719 if (!sve_access_check(s)) { 3720 return true; 3721 } 3722 3723 op0 = read_cpu_reg(s, a->rn, 1); 3724 op1 = read_cpu_reg(s, a->rm, 1); 3725 3726 tmax = tcg_const_i64(vsz); 3727 diff = tcg_temp_new_i64(); 3728 3729 if (a->rw) { 3730 /* WHILERW */ 3731 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */ 3732 t1 = tcg_temp_new_i64(); 3733 tcg_gen_sub_i64(diff, op0, op1); 3734 tcg_gen_sub_i64(t1, op1, op0); 3735 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); 3736 tcg_temp_free_i64(t1); 3737 /* Round down to a multiple of ESIZE. */ 3738 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3739 /* If op1 == op0, diff == 0, and the condition is always true. */ 3740 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff); 3741 } else { 3742 /* WHILEWR */ 3743 tcg_gen_sub_i64(diff, op1, op0); 3744 /* Round down to a multiple of ESIZE. */ 3745 tcg_gen_andi_i64(diff, diff, -1 << a->esz); 3746 /* If op0 >= op1, diff <= 0, the condition is always true. */ 3747 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff); 3748 } 3749 3750 /* Bound to the maximum. */ 3751 tcg_gen_umin_i64(diff, diff, tmax); 3752 tcg_temp_free_i64(tmax); 3753 3754 /* Since we're bounded, pass as a 32-bit type. */ 3755 t2 = tcg_temp_new_i32(); 3756 tcg_gen_extrl_i64_i32(t2, diff); 3757 tcg_temp_free_i64(diff); 3758 3759 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); 3760 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); 3761 t3 = tcg_const_i32(desc); 3762 3763 ptr = tcg_temp_new_ptr(); 3764 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); 3765 3766 gen_helper_sve_whilel(t2, ptr, t2, t3); 3767 do_pred_flags(t2); 3768 3769 tcg_temp_free_ptr(ptr); 3770 tcg_temp_free_i32(t2); 3771 tcg_temp_free_i32(t3); 3772 return true; 3773} 3774 3775/* 3776 *** SVE Integer Wide Immediate - Unpredicated Group 3777 */ 3778 3779static bool trans_FDUP(DisasContext *s, arg_FDUP *a) 3780{ 3781 if (a->esz == 0) { 3782 return false; 3783 } 3784 if (sve_access_check(s)) { 3785 unsigned vsz = vec_full_reg_size(s); 3786 int dofs = vec_full_reg_offset(s, a->rd); 3787 uint64_t imm; 3788 3789 /* Decode the VFP immediate. */ 3790 imm = vfp_expand_imm(a->esz, a->imm); 3791 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm); 3792 } 3793 return true; 3794} 3795 3796static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) 3797{ 3798 if (a->esz == 0 && extract32(s->insn, 13, 1)) { 3799 return false; 3800 } 3801 if (sve_access_check(s)) { 3802 unsigned vsz = vec_full_reg_size(s); 3803 int dofs = vec_full_reg_offset(s, a->rd); 3804 3805 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm); 3806 } 3807 return true; 3808} 3809 3810static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a) 3811{ 3812 if (a->esz == 0 && extract32(s->insn, 13, 1)) { 3813 return false; 3814 } 3815 if (sve_access_check(s)) { 3816 unsigned vsz = vec_full_reg_size(s); 3817 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd), 3818 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); 3819 } 3820 return true; 3821} 3822 3823static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) 3824{ 3825 a->imm = -a->imm; 3826 return trans_ADD_zzi(s, a); 3827} 3828 3829static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) 3830{ 3831 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; 3832 static const GVecGen2s op[4] = { 3833 { .fni8 = tcg_gen_vec_sub8_i64, 3834 .fniv = tcg_gen_sub_vec, 3835 .fno = gen_helper_sve_subri_b, 3836 .opt_opc = vecop_list, 3837 .vece = MO_8, 3838 .scalar_first = true }, 3839 { .fni8 = tcg_gen_vec_sub16_i64, 3840 .fniv = tcg_gen_sub_vec, 3841 .fno = gen_helper_sve_subri_h, 3842 .opt_opc = vecop_list, 3843 .vece = MO_16, 3844 .scalar_first = true }, 3845 { .fni4 = tcg_gen_sub_i32, 3846 .fniv = tcg_gen_sub_vec, 3847 .fno = gen_helper_sve_subri_s, 3848 .opt_opc = vecop_list, 3849 .vece = MO_32, 3850 .scalar_first = true }, 3851 { .fni8 = tcg_gen_sub_i64, 3852 .fniv = tcg_gen_sub_vec, 3853 .fno = gen_helper_sve_subri_d, 3854 .opt_opc = vecop_list, 3855 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 3856 .vece = MO_64, 3857 .scalar_first = true } 3858 }; 3859 3860 if (a->esz == 0 && extract32(s->insn, 13, 1)) { 3861 return false; 3862 } 3863 if (sve_access_check(s)) { 3864 unsigned vsz = vec_full_reg_size(s); 3865 TCGv_i64 c = tcg_const_i64(a->imm); 3866 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd), 3867 vec_full_reg_offset(s, a->rn), 3868 vsz, vsz, c, &op[a->esz]); 3869 tcg_temp_free_i64(c); 3870 } 3871 return true; 3872} 3873 3874static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a) 3875{ 3876 if (sve_access_check(s)) { 3877 unsigned vsz = vec_full_reg_size(s); 3878 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd), 3879 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); 3880 } 3881 return true; 3882} 3883 3884static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d) 3885{ 3886 if (a->esz == 0 && extract32(s->insn, 13, 1)) { 3887 return false; 3888 } 3889 if (sve_access_check(s)) { 3890 TCGv_i64 val = tcg_const_i64(a->imm); 3891 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d); 3892 tcg_temp_free_i64(val); 3893 } 3894 return true; 3895} 3896 3897static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a) 3898{ 3899 return do_zzi_sat(s, a, false, false); 3900} 3901 3902static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a) 3903{ 3904 return do_zzi_sat(s, a, true, false); 3905} 3906 3907static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a) 3908{ 3909 return do_zzi_sat(s, a, false, true); 3910} 3911 3912static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a) 3913{ 3914 return do_zzi_sat(s, a, true, true); 3915} 3916 3917static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn) 3918{ 3919 if (sve_access_check(s)) { 3920 unsigned vsz = vec_full_reg_size(s); 3921 TCGv_i64 c = tcg_const_i64(a->imm); 3922 3923 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), 3924 vec_full_reg_offset(s, a->rn), 3925 c, vsz, vsz, 0, fn); 3926 tcg_temp_free_i64(c); 3927 } 3928 return true; 3929} 3930 3931#define DO_ZZI(NAME, name) \ 3932static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \ 3933{ \ 3934 static gen_helper_gvec_2i * const fns[4] = { \ 3935 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \ 3936 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \ 3937 }; \ 3938 return do_zzi_ool(s, a, fns[a->esz]); \ 3939} 3940 3941DO_ZZI(SMAX, smax) 3942DO_ZZI(UMAX, umax) 3943DO_ZZI(SMIN, smin) 3944DO_ZZI(UMIN, umin) 3945 3946#undef DO_ZZI 3947 3948static bool trans_DOT_zzzz(DisasContext *s, arg_DOT_zzzz *a) 3949{ 3950 static gen_helper_gvec_4 * const fns[2][2] = { 3951 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, 3952 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } 3953 }; 3954 3955 if (sve_access_check(s)) { 3956 gen_gvec_ool_zzzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0); 3957 } 3958 return true; 3959} 3960 3961/* 3962 * SVE Multiply - Indexed 3963 */ 3964 3965static bool do_zzxz_ool(DisasContext *s, arg_rrxr_esz *a, 3966 gen_helper_gvec_4 *fn) 3967{ 3968 if (fn == NULL) { 3969 return false; 3970 } 3971 if (sve_access_check(s)) { 3972 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); 3973 } 3974 return true; 3975} 3976 3977#define DO_RRXR(NAME, FUNC) \ 3978 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \ 3979 { return do_zzxz_ool(s, a, FUNC); } 3980 3981DO_RRXR(trans_SDOT_zzxw_s, gen_helper_gvec_sdot_idx_b) 3982DO_RRXR(trans_SDOT_zzxw_d, gen_helper_gvec_sdot_idx_h) 3983DO_RRXR(trans_UDOT_zzxw_s, gen_helper_gvec_udot_idx_b) 3984DO_RRXR(trans_UDOT_zzxw_d, gen_helper_gvec_udot_idx_h) 3985 3986static bool trans_SUDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a) 3987{ 3988 if (!dc_isar_feature(aa64_sve_i8mm, s)) { 3989 return false; 3990 } 3991 return do_zzxz_ool(s, a, gen_helper_gvec_sudot_idx_b); 3992} 3993 3994static bool trans_USDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a) 3995{ 3996 if (!dc_isar_feature(aa64_sve_i8mm, s)) { 3997 return false; 3998 } 3999 return do_zzxz_ool(s, a, gen_helper_gvec_usdot_idx_b); 4000} 4001 4002#undef DO_RRXR 4003 4004static bool do_sve2_zzz_data(DisasContext *s, int rd, int rn, int rm, int data, 4005 gen_helper_gvec_3 *fn) 4006{ 4007 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) { 4008 return false; 4009 } 4010 if (sve_access_check(s)) { 4011 unsigned vsz = vec_full_reg_size(s); 4012 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 4013 vec_full_reg_offset(s, rn), 4014 vec_full_reg_offset(s, rm), 4015 vsz, vsz, data, fn); 4016 } 4017 return true; 4018} 4019 4020#define DO_SVE2_RRX(NAME, FUNC) \ 4021 static bool NAME(DisasContext *s, arg_rrx_esz *a) \ 4022 { return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, a->index, FUNC); } 4023 4024DO_SVE2_RRX(trans_MUL_zzx_h, gen_helper_gvec_mul_idx_h) 4025DO_SVE2_RRX(trans_MUL_zzx_s, gen_helper_gvec_mul_idx_s) 4026DO_SVE2_RRX(trans_MUL_zzx_d, gen_helper_gvec_mul_idx_d) 4027 4028DO_SVE2_RRX(trans_SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h) 4029DO_SVE2_RRX(trans_SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s) 4030DO_SVE2_RRX(trans_SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d) 4031 4032DO_SVE2_RRX(trans_SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h) 4033DO_SVE2_RRX(trans_SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s) 4034DO_SVE2_RRX(trans_SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d) 4035 4036#undef DO_SVE2_RRX 4037 4038#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \ 4039 static bool NAME(DisasContext *s, arg_rrx_esz *a) \ 4040 { \ 4041 return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, \ 4042 (a->index << 1) | TOP, FUNC); \ 4043 } 4044 4045DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false) 4046DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false) 4047DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true) 4048DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true) 4049 4050DO_SVE2_RRX_TB(trans_SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false) 4051DO_SVE2_RRX_TB(trans_SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false) 4052DO_SVE2_RRX_TB(trans_SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true) 4053DO_SVE2_RRX_TB(trans_SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true) 4054 4055DO_SVE2_RRX_TB(trans_UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false) 4056DO_SVE2_RRX_TB(trans_UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false) 4057DO_SVE2_RRX_TB(trans_UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true) 4058DO_SVE2_RRX_TB(trans_UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true) 4059 4060#undef DO_SVE2_RRX_TB 4061 4062static bool do_sve2_zzzz_data(DisasContext *s, int rd, int rn, int rm, int ra, 4063 int data, gen_helper_gvec_4 *fn) 4064{ 4065 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) { 4066 return false; 4067 } 4068 if (sve_access_check(s)) { 4069 unsigned vsz = vec_full_reg_size(s); 4070 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 4071 vec_full_reg_offset(s, rn), 4072 vec_full_reg_offset(s, rm), 4073 vec_full_reg_offset(s, ra), 4074 vsz, vsz, data, fn); 4075 } 4076 return true; 4077} 4078 4079#define DO_SVE2_RRXR(NAME, FUNC) \ 4080 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \ 4081 { return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, a->index, FUNC); } 4082 4083DO_SVE2_RRXR(trans_MLA_zzxz_h, gen_helper_gvec_mla_idx_h) 4084DO_SVE2_RRXR(trans_MLA_zzxz_s, gen_helper_gvec_mla_idx_s) 4085DO_SVE2_RRXR(trans_MLA_zzxz_d, gen_helper_gvec_mla_idx_d) 4086 4087DO_SVE2_RRXR(trans_MLS_zzxz_h, gen_helper_gvec_mls_idx_h) 4088DO_SVE2_RRXR(trans_MLS_zzxz_s, gen_helper_gvec_mls_idx_s) 4089DO_SVE2_RRXR(trans_MLS_zzxz_d, gen_helper_gvec_mls_idx_d) 4090 4091DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h) 4092DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s) 4093DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d) 4094 4095DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h) 4096DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s) 4097DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d) 4098 4099#undef DO_SVE2_RRXR 4100 4101#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \ 4102 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \ 4103 { \ 4104 return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->rd, \ 4105 (a->index << 1) | TOP, FUNC); \ 4106 } 4107 4108DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false) 4109DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false) 4110DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true) 4111DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true) 4112 4113DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false) 4114DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false) 4115DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true) 4116DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true) 4117 4118DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false) 4119DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false) 4120DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true) 4121DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true) 4122 4123DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false) 4124DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false) 4125DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true) 4126DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true) 4127 4128DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false) 4129DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false) 4130DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true) 4131DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true) 4132 4133DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false) 4134DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false) 4135DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true) 4136DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true) 4137 4138#undef DO_SVE2_RRXR_TB 4139 4140#define DO_SVE2_RRXR_ROT(NAME, FUNC) \ 4141 static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \ 4142 { \ 4143 return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, \ 4144 (a->index << 2) | a->rot, FUNC); \ 4145 } 4146 4147DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h) 4148DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s) 4149 4150DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h) 4151DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s) 4152 4153DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s) 4154DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) 4155 4156#undef DO_SVE2_RRXR_ROT 4157 4158/* 4159 *** SVE Floating Point Multiply-Add Indexed Group 4160 */ 4161 4162static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) 4163{ 4164 static gen_helper_gvec_4_ptr * const fns[3] = { 4165 gen_helper_gvec_fmla_idx_h, 4166 gen_helper_gvec_fmla_idx_s, 4167 gen_helper_gvec_fmla_idx_d, 4168 }; 4169 4170 if (sve_access_check(s)) { 4171 unsigned vsz = vec_full_reg_size(s); 4172 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4173 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), 4174 vec_full_reg_offset(s, a->rn), 4175 vec_full_reg_offset(s, a->rm), 4176 vec_full_reg_offset(s, a->ra), 4177 status, vsz, vsz, (a->index << 1) | sub, 4178 fns[a->esz - 1]); 4179 tcg_temp_free_ptr(status); 4180 } 4181 return true; 4182} 4183 4184static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a) 4185{ 4186 return do_FMLA_zzxz(s, a, false); 4187} 4188 4189static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a) 4190{ 4191 return do_FMLA_zzxz(s, a, true); 4192} 4193 4194/* 4195 *** SVE Floating Point Multiply Indexed Group 4196 */ 4197 4198static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a) 4199{ 4200 static gen_helper_gvec_3_ptr * const fns[3] = { 4201 gen_helper_gvec_fmul_idx_h, 4202 gen_helper_gvec_fmul_idx_s, 4203 gen_helper_gvec_fmul_idx_d, 4204 }; 4205 4206 if (sve_access_check(s)) { 4207 unsigned vsz = vec_full_reg_size(s); 4208 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4209 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 4210 vec_full_reg_offset(s, a->rn), 4211 vec_full_reg_offset(s, a->rm), 4212 status, vsz, vsz, a->index, fns[a->esz - 1]); 4213 tcg_temp_free_ptr(status); 4214 } 4215 return true; 4216} 4217 4218/* 4219 *** SVE Floating Point Fast Reduction Group 4220 */ 4221 4222typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr, 4223 TCGv_ptr, TCGv_i32); 4224 4225static void do_reduce(DisasContext *s, arg_rpr_esz *a, 4226 gen_helper_fp_reduce *fn) 4227{ 4228 unsigned vsz = vec_full_reg_size(s); 4229 unsigned p2vsz = pow2ceil(vsz); 4230 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, vsz, p2vsz)); 4231 TCGv_ptr t_zn, t_pg, status; 4232 TCGv_i64 temp; 4233 4234 temp = tcg_temp_new_i64(); 4235 t_zn = tcg_temp_new_ptr(); 4236 t_pg = tcg_temp_new_ptr(); 4237 4238 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); 4239 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 4240 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4241 4242 fn(temp, t_zn, t_pg, status, t_desc); 4243 tcg_temp_free_ptr(t_zn); 4244 tcg_temp_free_ptr(t_pg); 4245 tcg_temp_free_ptr(status); 4246 tcg_temp_free_i32(t_desc); 4247 4248 write_fp_dreg(s, a->rd, temp); 4249 tcg_temp_free_i64(temp); 4250} 4251 4252#define DO_VPZ(NAME, name) \ 4253static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \ 4254{ \ 4255 static gen_helper_fp_reduce * const fns[3] = { \ 4256 gen_helper_sve_##name##_h, \ 4257 gen_helper_sve_##name##_s, \ 4258 gen_helper_sve_##name##_d, \ 4259 }; \ 4260 if (a->esz == 0) { \ 4261 return false; \ 4262 } \ 4263 if (sve_access_check(s)) { \ 4264 do_reduce(s, a, fns[a->esz - 1]); \ 4265 } \ 4266 return true; \ 4267} 4268 4269DO_VPZ(FADDV, faddv) 4270DO_VPZ(FMINNMV, fminnmv) 4271DO_VPZ(FMAXNMV, fmaxnmv) 4272DO_VPZ(FMINV, fminv) 4273DO_VPZ(FMAXV, fmaxv) 4274 4275/* 4276 *** SVE Floating Point Unary Operations - Unpredicated Group 4277 */ 4278 4279static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn) 4280{ 4281 unsigned vsz = vec_full_reg_size(s); 4282 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4283 4284 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd), 4285 vec_full_reg_offset(s, a->rn), 4286 status, vsz, vsz, 0, fn); 4287 tcg_temp_free_ptr(status); 4288} 4289 4290static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a) 4291{ 4292 static gen_helper_gvec_2_ptr * const fns[3] = { 4293 gen_helper_gvec_frecpe_h, 4294 gen_helper_gvec_frecpe_s, 4295 gen_helper_gvec_frecpe_d, 4296 }; 4297 if (a->esz == 0) { 4298 return false; 4299 } 4300 if (sve_access_check(s)) { 4301 do_zz_fp(s, a, fns[a->esz - 1]); 4302 } 4303 return true; 4304} 4305 4306static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a) 4307{ 4308 static gen_helper_gvec_2_ptr * const fns[3] = { 4309 gen_helper_gvec_frsqrte_h, 4310 gen_helper_gvec_frsqrte_s, 4311 gen_helper_gvec_frsqrte_d, 4312 }; 4313 if (a->esz == 0) { 4314 return false; 4315 } 4316 if (sve_access_check(s)) { 4317 do_zz_fp(s, a, fns[a->esz - 1]); 4318 } 4319 return true; 4320} 4321 4322/* 4323 *** SVE Floating Point Compare with Zero Group 4324 */ 4325 4326static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a, 4327 gen_helper_gvec_3_ptr *fn) 4328{ 4329 unsigned vsz = vec_full_reg_size(s); 4330 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4331 4332 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), 4333 vec_full_reg_offset(s, a->rn), 4334 pred_full_reg_offset(s, a->pg), 4335 status, vsz, vsz, 0, fn); 4336 tcg_temp_free_ptr(status); 4337} 4338 4339#define DO_PPZ(NAME, name) \ 4340static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \ 4341{ \ 4342 static gen_helper_gvec_3_ptr * const fns[3] = { \ 4343 gen_helper_sve_##name##_h, \ 4344 gen_helper_sve_##name##_s, \ 4345 gen_helper_sve_##name##_d, \ 4346 }; \ 4347 if (a->esz == 0) { \ 4348 return false; \ 4349 } \ 4350 if (sve_access_check(s)) { \ 4351 do_ppz_fp(s, a, fns[a->esz - 1]); \ 4352 } \ 4353 return true; \ 4354} 4355 4356DO_PPZ(FCMGE_ppz0, fcmge0) 4357DO_PPZ(FCMGT_ppz0, fcmgt0) 4358DO_PPZ(FCMLE_ppz0, fcmle0) 4359DO_PPZ(FCMLT_ppz0, fcmlt0) 4360DO_PPZ(FCMEQ_ppz0, fcmeq0) 4361DO_PPZ(FCMNE_ppz0, fcmne0) 4362 4363#undef DO_PPZ 4364 4365/* 4366 *** SVE floating-point trig multiply-add coefficient 4367 */ 4368 4369static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a) 4370{ 4371 static gen_helper_gvec_3_ptr * const fns[3] = { 4372 gen_helper_sve_ftmad_h, 4373 gen_helper_sve_ftmad_s, 4374 gen_helper_sve_ftmad_d, 4375 }; 4376 4377 if (a->esz == 0) { 4378 return false; 4379 } 4380 if (sve_access_check(s)) { 4381 unsigned vsz = vec_full_reg_size(s); 4382 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4383 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 4384 vec_full_reg_offset(s, a->rn), 4385 vec_full_reg_offset(s, a->rm), 4386 status, vsz, vsz, a->imm, fns[a->esz - 1]); 4387 tcg_temp_free_ptr(status); 4388 } 4389 return true; 4390} 4391 4392/* 4393 *** SVE Floating Point Accumulating Reduction Group 4394 */ 4395 4396static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) 4397{ 4398 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr, 4399 TCGv_ptr, TCGv_ptr, TCGv_i32); 4400 static fadda_fn * const fns[3] = { 4401 gen_helper_sve_fadda_h, 4402 gen_helper_sve_fadda_s, 4403 gen_helper_sve_fadda_d, 4404 }; 4405 unsigned vsz = vec_full_reg_size(s); 4406 TCGv_ptr t_rm, t_pg, t_fpst; 4407 TCGv_i64 t_val; 4408 TCGv_i32 t_desc; 4409 4410 if (a->esz == 0) { 4411 return false; 4412 } 4413 if (!sve_access_check(s)) { 4414 return true; 4415 } 4416 4417 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz); 4418 t_rm = tcg_temp_new_ptr(); 4419 t_pg = tcg_temp_new_ptr(); 4420 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm)); 4421 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); 4422 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4423 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0)); 4424 4425 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); 4426 4427 tcg_temp_free_i32(t_desc); 4428 tcg_temp_free_ptr(t_fpst); 4429 tcg_temp_free_ptr(t_pg); 4430 tcg_temp_free_ptr(t_rm); 4431 4432 write_fp_dreg(s, a->rd, t_val); 4433 tcg_temp_free_i64(t_val); 4434 return true; 4435} 4436 4437/* 4438 *** SVE Floating Point Arithmetic - Unpredicated Group 4439 */ 4440 4441static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a, 4442 gen_helper_gvec_3_ptr *fn) 4443{ 4444 if (fn == NULL) { 4445 return false; 4446 } 4447 if (sve_access_check(s)) { 4448 unsigned vsz = vec_full_reg_size(s); 4449 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4450 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 4451 vec_full_reg_offset(s, a->rn), 4452 vec_full_reg_offset(s, a->rm), 4453 status, vsz, vsz, 0, fn); 4454 tcg_temp_free_ptr(status); 4455 } 4456 return true; 4457} 4458 4459 4460#define DO_FP3(NAME, name) \ 4461static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \ 4462{ \ 4463 static gen_helper_gvec_3_ptr * const fns[4] = { \ 4464 NULL, gen_helper_gvec_##name##_h, \ 4465 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ 4466 }; \ 4467 return do_zzz_fp(s, a, fns[a->esz]); \ 4468} 4469 4470DO_FP3(FADD_zzz, fadd) 4471DO_FP3(FSUB_zzz, fsub) 4472DO_FP3(FMUL_zzz, fmul) 4473DO_FP3(FTSMUL, ftsmul) 4474DO_FP3(FRECPS, recps) 4475DO_FP3(FRSQRTS, rsqrts) 4476 4477#undef DO_FP3 4478 4479/* 4480 *** SVE Floating Point Arithmetic - Predicated Group 4481 */ 4482 4483static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a, 4484 gen_helper_gvec_4_ptr *fn) 4485{ 4486 if (fn == NULL) { 4487 return false; 4488 } 4489 if (sve_access_check(s)) { 4490 unsigned vsz = vec_full_reg_size(s); 4491 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4492 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), 4493 vec_full_reg_offset(s, a->rn), 4494 vec_full_reg_offset(s, a->rm), 4495 pred_full_reg_offset(s, a->pg), 4496 status, vsz, vsz, 0, fn); 4497 tcg_temp_free_ptr(status); 4498 } 4499 return true; 4500} 4501 4502#define DO_FP3(NAME, name) \ 4503static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \ 4504{ \ 4505 static gen_helper_gvec_4_ptr * const fns[4] = { \ 4506 NULL, gen_helper_sve_##name##_h, \ 4507 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 4508 }; \ 4509 return do_zpzz_fp(s, a, fns[a->esz]); \ 4510} 4511 4512DO_FP3(FADD_zpzz, fadd) 4513DO_FP3(FSUB_zpzz, fsub) 4514DO_FP3(FMUL_zpzz, fmul) 4515DO_FP3(FMIN_zpzz, fmin) 4516DO_FP3(FMAX_zpzz, fmax) 4517DO_FP3(FMINNM_zpzz, fminnum) 4518DO_FP3(FMAXNM_zpzz, fmaxnum) 4519DO_FP3(FABD, fabd) 4520DO_FP3(FSCALE, fscalbn) 4521DO_FP3(FDIV, fdiv) 4522DO_FP3(FMULX, fmulx) 4523 4524#undef DO_FP3 4525 4526typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr, 4527 TCGv_i64, TCGv_ptr, TCGv_i32); 4528 4529static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, 4530 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn) 4531{ 4532 unsigned vsz = vec_full_reg_size(s); 4533 TCGv_ptr t_zd, t_zn, t_pg, status; 4534 TCGv_i32 desc; 4535 4536 t_zd = tcg_temp_new_ptr(); 4537 t_zn = tcg_temp_new_ptr(); 4538 t_pg = tcg_temp_new_ptr(); 4539 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd)); 4540 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn)); 4541 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 4542 4543 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 4544 desc = tcg_const_i32(simd_desc(vsz, vsz, 0)); 4545 fn(t_zd, t_zn, t_pg, scalar, status, desc); 4546 4547 tcg_temp_free_i32(desc); 4548 tcg_temp_free_ptr(status); 4549 tcg_temp_free_ptr(t_pg); 4550 tcg_temp_free_ptr(t_zn); 4551 tcg_temp_free_ptr(t_zd); 4552} 4553 4554static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, 4555 gen_helper_sve_fp2scalar *fn) 4556{ 4557 TCGv_i64 temp = tcg_const_i64(imm); 4558 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn); 4559 tcg_temp_free_i64(temp); 4560} 4561 4562#define DO_FP_IMM(NAME, name, const0, const1) \ 4563static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \ 4564{ \ 4565 static gen_helper_sve_fp2scalar * const fns[3] = { \ 4566 gen_helper_sve_##name##_h, \ 4567 gen_helper_sve_##name##_s, \ 4568 gen_helper_sve_##name##_d \ 4569 }; \ 4570 static uint64_t const val[3][2] = { \ 4571 { float16_##const0, float16_##const1 }, \ 4572 { float32_##const0, float32_##const1 }, \ 4573 { float64_##const0, float64_##const1 }, \ 4574 }; \ 4575 if (a->esz == 0) { \ 4576 return false; \ 4577 } \ 4578 if (sve_access_check(s)) { \ 4579 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \ 4580 } \ 4581 return true; \ 4582} 4583 4584DO_FP_IMM(FADD, fadds, half, one) 4585DO_FP_IMM(FSUB, fsubs, half, one) 4586DO_FP_IMM(FMUL, fmuls, half, two) 4587DO_FP_IMM(FSUBR, fsubrs, half, one) 4588DO_FP_IMM(FMAXNM, fmaxnms, zero, one) 4589DO_FP_IMM(FMINNM, fminnms, zero, one) 4590DO_FP_IMM(FMAX, fmaxs, zero, one) 4591DO_FP_IMM(FMIN, fmins, zero, one) 4592 4593#undef DO_FP_IMM 4594 4595static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, 4596 gen_helper_gvec_4_ptr *fn) 4597{ 4598 if (fn == NULL) { 4599 return false; 4600 } 4601 if (sve_access_check(s)) { 4602 unsigned vsz = vec_full_reg_size(s); 4603 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4604 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd), 4605 vec_full_reg_offset(s, a->rn), 4606 vec_full_reg_offset(s, a->rm), 4607 pred_full_reg_offset(s, a->pg), 4608 status, vsz, vsz, 0, fn); 4609 tcg_temp_free_ptr(status); 4610 } 4611 return true; 4612} 4613 4614#define DO_FPCMP(NAME, name) \ 4615static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \ 4616{ \ 4617 static gen_helper_gvec_4_ptr * const fns[4] = { \ 4618 NULL, gen_helper_sve_##name##_h, \ 4619 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 4620 }; \ 4621 return do_fp_cmp(s, a, fns[a->esz]); \ 4622} 4623 4624DO_FPCMP(FCMGE, fcmge) 4625DO_FPCMP(FCMGT, fcmgt) 4626DO_FPCMP(FCMEQ, fcmeq) 4627DO_FPCMP(FCMNE, fcmne) 4628DO_FPCMP(FCMUO, fcmuo) 4629DO_FPCMP(FACGE, facge) 4630DO_FPCMP(FACGT, facgt) 4631 4632#undef DO_FPCMP 4633 4634static bool trans_FCADD(DisasContext *s, arg_FCADD *a) 4635{ 4636 static gen_helper_gvec_4_ptr * const fns[3] = { 4637 gen_helper_sve_fcadd_h, 4638 gen_helper_sve_fcadd_s, 4639 gen_helper_sve_fcadd_d 4640 }; 4641 4642 if (a->esz == 0) { 4643 return false; 4644 } 4645 if (sve_access_check(s)) { 4646 unsigned vsz = vec_full_reg_size(s); 4647 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4648 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), 4649 vec_full_reg_offset(s, a->rn), 4650 vec_full_reg_offset(s, a->rm), 4651 pred_full_reg_offset(s, a->pg), 4652 status, vsz, vsz, a->rot, fns[a->esz - 1]); 4653 tcg_temp_free_ptr(status); 4654 } 4655 return true; 4656} 4657 4658static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, 4659 gen_helper_gvec_5_ptr *fn) 4660{ 4661 if (a->esz == 0) { 4662 return false; 4663 } 4664 if (sve_access_check(s)) { 4665 unsigned vsz = vec_full_reg_size(s); 4666 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4667 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd), 4668 vec_full_reg_offset(s, a->rn), 4669 vec_full_reg_offset(s, a->rm), 4670 vec_full_reg_offset(s, a->ra), 4671 pred_full_reg_offset(s, a->pg), 4672 status, vsz, vsz, 0, fn); 4673 tcg_temp_free_ptr(status); 4674 } 4675 return true; 4676} 4677 4678#define DO_FMLA(NAME, name) \ 4679static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \ 4680{ \ 4681 static gen_helper_gvec_5_ptr * const fns[4] = { \ 4682 NULL, gen_helper_sve_##name##_h, \ 4683 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ 4684 }; \ 4685 return do_fmla(s, a, fns[a->esz]); \ 4686} 4687 4688DO_FMLA(FMLA_zpzzz, fmla_zpzzz) 4689DO_FMLA(FMLS_zpzzz, fmls_zpzzz) 4690DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz) 4691DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) 4692 4693#undef DO_FMLA 4694 4695static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a) 4696{ 4697 static gen_helper_gvec_5_ptr * const fns[4] = { 4698 NULL, 4699 gen_helper_sve_fcmla_zpzzz_h, 4700 gen_helper_sve_fcmla_zpzzz_s, 4701 gen_helper_sve_fcmla_zpzzz_d, 4702 }; 4703 4704 if (a->esz == 0) { 4705 return false; 4706 } 4707 if (sve_access_check(s)) { 4708 unsigned vsz = vec_full_reg_size(s); 4709 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4710 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd), 4711 vec_full_reg_offset(s, a->rn), 4712 vec_full_reg_offset(s, a->rm), 4713 vec_full_reg_offset(s, a->ra), 4714 pred_full_reg_offset(s, a->pg), 4715 status, vsz, vsz, a->rot, fns[a->esz]); 4716 tcg_temp_free_ptr(status); 4717 } 4718 return true; 4719} 4720 4721static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a) 4722{ 4723 static gen_helper_gvec_4_ptr * const fns[2] = { 4724 gen_helper_gvec_fcmlah_idx, 4725 gen_helper_gvec_fcmlas_idx, 4726 }; 4727 4728 tcg_debug_assert(a->esz == 1 || a->esz == 2); 4729 tcg_debug_assert(a->rd == a->ra); 4730 if (sve_access_check(s)) { 4731 unsigned vsz = vec_full_reg_size(s); 4732 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4733 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), 4734 vec_full_reg_offset(s, a->rn), 4735 vec_full_reg_offset(s, a->rm), 4736 vec_full_reg_offset(s, a->ra), 4737 status, vsz, vsz, 4738 a->index * 4 + a->rot, 4739 fns[a->esz - 1]); 4740 tcg_temp_free_ptr(status); 4741 } 4742 return true; 4743} 4744 4745/* 4746 *** SVE Floating Point Unary Operations Predicated Group 4747 */ 4748 4749static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg, 4750 bool is_fp16, gen_helper_gvec_3_ptr *fn) 4751{ 4752 if (sve_access_check(s)) { 4753 unsigned vsz = vec_full_reg_size(s); 4754 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 4755 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 4756 vec_full_reg_offset(s, rn), 4757 pred_full_reg_offset(s, pg), 4758 status, vsz, vsz, 0, fn); 4759 tcg_temp_free_ptr(status); 4760 } 4761 return true; 4762} 4763 4764static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a) 4765{ 4766 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh); 4767} 4768 4769static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a) 4770{ 4771 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs); 4772} 4773 4774static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a) 4775{ 4776 if (!dc_isar_feature(aa64_sve_bf16, s)) { 4777 return false; 4778 } 4779 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt); 4780} 4781 4782static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a) 4783{ 4784 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh); 4785} 4786 4787static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a) 4788{ 4789 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd); 4790} 4791 4792static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a) 4793{ 4794 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds); 4795} 4796 4797static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a) 4798{ 4799 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd); 4800} 4801 4802static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a) 4803{ 4804 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh); 4805} 4806 4807static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a) 4808{ 4809 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh); 4810} 4811 4812static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a) 4813{ 4814 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs); 4815} 4816 4817static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a) 4818{ 4819 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs); 4820} 4821 4822static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a) 4823{ 4824 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd); 4825} 4826 4827static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a) 4828{ 4829 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd); 4830} 4831 4832static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a) 4833{ 4834 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss); 4835} 4836 4837static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a) 4838{ 4839 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss); 4840} 4841 4842static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a) 4843{ 4844 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd); 4845} 4846 4847static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a) 4848{ 4849 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd); 4850} 4851 4852static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a) 4853{ 4854 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds); 4855} 4856 4857static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a) 4858{ 4859 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds); 4860} 4861 4862static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a) 4863{ 4864 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd); 4865} 4866 4867static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a) 4868{ 4869 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd); 4870} 4871 4872static gen_helper_gvec_3_ptr * const frint_fns[3] = { 4873 gen_helper_sve_frint_h, 4874 gen_helper_sve_frint_s, 4875 gen_helper_sve_frint_d 4876}; 4877 4878static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a) 4879{ 4880 if (a->esz == 0) { 4881 return false; 4882 } 4883 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, 4884 frint_fns[a->esz - 1]); 4885} 4886 4887static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a) 4888{ 4889 static gen_helper_gvec_3_ptr * const fns[3] = { 4890 gen_helper_sve_frintx_h, 4891 gen_helper_sve_frintx_s, 4892 gen_helper_sve_frintx_d 4893 }; 4894 if (a->esz == 0) { 4895 return false; 4896 } 4897 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]); 4898} 4899 4900static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, 4901 int mode, gen_helper_gvec_3_ptr *fn) 4902{ 4903 if (sve_access_check(s)) { 4904 unsigned vsz = vec_full_reg_size(s); 4905 TCGv_i32 tmode = tcg_const_i32(mode); 4906 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 4907 4908 gen_helper_set_rmode(tmode, tmode, status); 4909 4910 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 4911 vec_full_reg_offset(s, a->rn), 4912 pred_full_reg_offset(s, a->pg), 4913 status, vsz, vsz, 0, fn); 4914 4915 gen_helper_set_rmode(tmode, tmode, status); 4916 tcg_temp_free_i32(tmode); 4917 tcg_temp_free_ptr(status); 4918 } 4919 return true; 4920} 4921 4922static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a) 4923{ 4924 if (a->esz == 0) { 4925 return false; 4926 } 4927 return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]); 4928} 4929 4930static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a) 4931{ 4932 if (a->esz == 0) { 4933 return false; 4934 } 4935 return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]); 4936} 4937 4938static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a) 4939{ 4940 if (a->esz == 0) { 4941 return false; 4942 } 4943 return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]); 4944} 4945 4946static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a) 4947{ 4948 if (a->esz == 0) { 4949 return false; 4950 } 4951 return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]); 4952} 4953 4954static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a) 4955{ 4956 if (a->esz == 0) { 4957 return false; 4958 } 4959 return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]); 4960} 4961 4962static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a) 4963{ 4964 static gen_helper_gvec_3_ptr * const fns[3] = { 4965 gen_helper_sve_frecpx_h, 4966 gen_helper_sve_frecpx_s, 4967 gen_helper_sve_frecpx_d 4968 }; 4969 if (a->esz == 0) { 4970 return false; 4971 } 4972 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]); 4973} 4974 4975static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a) 4976{ 4977 static gen_helper_gvec_3_ptr * const fns[3] = { 4978 gen_helper_sve_fsqrt_h, 4979 gen_helper_sve_fsqrt_s, 4980 gen_helper_sve_fsqrt_d 4981 }; 4982 if (a->esz == 0) { 4983 return false; 4984 } 4985 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]); 4986} 4987 4988static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a) 4989{ 4990 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh); 4991} 4992 4993static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a) 4994{ 4995 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh); 4996} 4997 4998static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a) 4999{ 5000 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh); 5001} 5002 5003static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a) 5004{ 5005 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss); 5006} 5007 5008static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a) 5009{ 5010 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds); 5011} 5012 5013static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a) 5014{ 5015 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd); 5016} 5017 5018static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a) 5019{ 5020 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd); 5021} 5022 5023static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a) 5024{ 5025 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh); 5026} 5027 5028static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a) 5029{ 5030 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh); 5031} 5032 5033static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a) 5034{ 5035 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh); 5036} 5037 5038static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a) 5039{ 5040 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss); 5041} 5042 5043static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a) 5044{ 5045 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds); 5046} 5047 5048static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a) 5049{ 5050 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd); 5051} 5052 5053static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a) 5054{ 5055 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd); 5056} 5057 5058/* 5059 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group 5060 */ 5061 5062/* Subroutine loading a vector register at VOFS of LEN bytes. 5063 * The load should begin at the address Rn + IMM. 5064 */ 5065 5066static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) 5067{ 5068 int len_align = QEMU_ALIGN_DOWN(len, 8); 5069 int len_remain = len % 8; 5070 int nparts = len / 8 + ctpop8(len_remain); 5071 int midx = get_mem_index(s); 5072 TCGv_i64 dirty_addr, clean_addr, t0, t1; 5073 5074 dirty_addr = tcg_temp_new_i64(); 5075 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 5076 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); 5077 tcg_temp_free_i64(dirty_addr); 5078 5079 /* 5080 * Note that unpredicated load/store of vector/predicate registers 5081 * are defined as a stream of bytes, which equates to little-endian 5082 * operations on larger quantities. 5083 * Attempt to keep code expansion to a minimum by limiting the 5084 * amount of unrolling done. 5085 */ 5086 if (nparts <= 4) { 5087 int i; 5088 5089 t0 = tcg_temp_new_i64(); 5090 for (i = 0; i < len_align; i += 8) { 5091 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ); 5092 tcg_gen_st_i64(t0, cpu_env, vofs + i); 5093 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 5094 } 5095 tcg_temp_free_i64(t0); 5096 } else { 5097 TCGLabel *loop = gen_new_label(); 5098 TCGv_ptr tp, i = tcg_const_local_ptr(0); 5099 5100 /* Copy the clean address into a local temp, live across the loop. */ 5101 t0 = clean_addr; 5102 clean_addr = new_tmp_a64_local(s); 5103 tcg_gen_mov_i64(clean_addr, t0); 5104 5105 gen_set_label(loop); 5106 5107 t0 = tcg_temp_new_i64(); 5108 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ); 5109 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 5110 5111 tp = tcg_temp_new_ptr(); 5112 tcg_gen_add_ptr(tp, cpu_env, i); 5113 tcg_gen_addi_ptr(i, i, 8); 5114 tcg_gen_st_i64(t0, tp, vofs); 5115 tcg_temp_free_ptr(tp); 5116 tcg_temp_free_i64(t0); 5117 5118 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 5119 tcg_temp_free_ptr(i); 5120 } 5121 5122 /* 5123 * Predicate register loads can be any multiple of 2. 5124 * Note that we still store the entire 64-bit unit into cpu_env. 5125 */ 5126 if (len_remain) { 5127 t0 = tcg_temp_new_i64(); 5128 switch (len_remain) { 5129 case 2: 5130 case 4: 5131 case 8: 5132 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, 5133 MO_LE | ctz32(len_remain)); 5134 break; 5135 5136 case 6: 5137 t1 = tcg_temp_new_i64(); 5138 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL); 5139 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 5140 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW); 5141 tcg_gen_deposit_i64(t0, t0, t1, 32, 32); 5142 tcg_temp_free_i64(t1); 5143 break; 5144 5145 default: 5146 g_assert_not_reached(); 5147 } 5148 tcg_gen_st_i64(t0, cpu_env, vofs + len_align); 5149 tcg_temp_free_i64(t0); 5150 } 5151} 5152 5153/* Similarly for stores. */ 5154static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) 5155{ 5156 int len_align = QEMU_ALIGN_DOWN(len, 8); 5157 int len_remain = len % 8; 5158 int nparts = len / 8 + ctpop8(len_remain); 5159 int midx = get_mem_index(s); 5160 TCGv_i64 dirty_addr, clean_addr, t0; 5161 5162 dirty_addr = tcg_temp_new_i64(); 5163 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); 5164 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); 5165 tcg_temp_free_i64(dirty_addr); 5166 5167 /* Note that unpredicated load/store of vector/predicate registers 5168 * are defined as a stream of bytes, which equates to little-endian 5169 * operations on larger quantities. There is no nice way to force 5170 * a little-endian store for aarch64_be-linux-user out of line. 5171 * 5172 * Attempt to keep code expansion to a minimum by limiting the 5173 * amount of unrolling done. 5174 */ 5175 if (nparts <= 4) { 5176 int i; 5177 5178 t0 = tcg_temp_new_i64(); 5179 for (i = 0; i < len_align; i += 8) { 5180 tcg_gen_ld_i64(t0, cpu_env, vofs + i); 5181 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ); 5182 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 5183 } 5184 tcg_temp_free_i64(t0); 5185 } else { 5186 TCGLabel *loop = gen_new_label(); 5187 TCGv_ptr tp, i = tcg_const_local_ptr(0); 5188 5189 /* Copy the clean address into a local temp, live across the loop. */ 5190 t0 = clean_addr; 5191 clean_addr = new_tmp_a64_local(s); 5192 tcg_gen_mov_i64(clean_addr, t0); 5193 5194 gen_set_label(loop); 5195 5196 t0 = tcg_temp_new_i64(); 5197 tp = tcg_temp_new_ptr(); 5198 tcg_gen_add_ptr(tp, cpu_env, i); 5199 tcg_gen_ld_i64(t0, tp, vofs); 5200 tcg_gen_addi_ptr(i, i, 8); 5201 tcg_temp_free_ptr(tp); 5202 5203 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ); 5204 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 5205 tcg_temp_free_i64(t0); 5206 5207 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); 5208 tcg_temp_free_ptr(i); 5209 } 5210 5211 /* Predicate register stores can be any multiple of 2. */ 5212 if (len_remain) { 5213 t0 = tcg_temp_new_i64(); 5214 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align); 5215 5216 switch (len_remain) { 5217 case 2: 5218 case 4: 5219 case 8: 5220 tcg_gen_qemu_st_i64(t0, clean_addr, midx, 5221 MO_LE | ctz32(len_remain)); 5222 break; 5223 5224 case 6: 5225 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL); 5226 tcg_gen_addi_i64(clean_addr, clean_addr, 4); 5227 tcg_gen_shri_i64(t0, t0, 32); 5228 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW); 5229 break; 5230 5231 default: 5232 g_assert_not_reached(); 5233 } 5234 tcg_temp_free_i64(t0); 5235 } 5236} 5237 5238static bool trans_LDR_zri(DisasContext *s, arg_rri *a) 5239{ 5240 if (sve_access_check(s)) { 5241 int size = vec_full_reg_size(s); 5242 int off = vec_full_reg_offset(s, a->rd); 5243 do_ldr(s, off, size, a->rn, a->imm * size); 5244 } 5245 return true; 5246} 5247 5248static bool trans_LDR_pri(DisasContext *s, arg_rri *a) 5249{ 5250 if (sve_access_check(s)) { 5251 int size = pred_full_reg_size(s); 5252 int off = pred_full_reg_offset(s, a->rd); 5253 do_ldr(s, off, size, a->rn, a->imm * size); 5254 } 5255 return true; 5256} 5257 5258static bool trans_STR_zri(DisasContext *s, arg_rri *a) 5259{ 5260 if (sve_access_check(s)) { 5261 int size = vec_full_reg_size(s); 5262 int off = vec_full_reg_offset(s, a->rd); 5263 do_str(s, off, size, a->rn, a->imm * size); 5264 } 5265 return true; 5266} 5267 5268static bool trans_STR_pri(DisasContext *s, arg_rri *a) 5269{ 5270 if (sve_access_check(s)) { 5271 int size = pred_full_reg_size(s); 5272 int off = pred_full_reg_offset(s, a->rd); 5273 do_str(s, off, size, a->rn, a->imm * size); 5274 } 5275 return true; 5276} 5277 5278/* 5279 *** SVE Memory - Contiguous Load Group 5280 */ 5281 5282/* The memory mode of the dtype. */ 5283static const MemOp dtype_mop[16] = { 5284 MO_UB, MO_UB, MO_UB, MO_UB, 5285 MO_SL, MO_UW, MO_UW, MO_UW, 5286 MO_SW, MO_SW, MO_UL, MO_UL, 5287 MO_SB, MO_SB, MO_SB, MO_Q 5288}; 5289 5290#define dtype_msz(x) (dtype_mop[x] & MO_SIZE) 5291 5292/* The vector element size of dtype. */ 5293static const uint8_t dtype_esz[16] = { 5294 0, 1, 2, 3, 5295 3, 1, 2, 3, 5296 3, 2, 2, 3, 5297 3, 2, 1, 3 5298}; 5299 5300static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 5301 int dtype, uint32_t mte_n, bool is_write, 5302 gen_helper_gvec_mem *fn) 5303{ 5304 unsigned vsz = vec_full_reg_size(s); 5305 TCGv_ptr t_pg; 5306 TCGv_i32 t_desc; 5307 int desc = 0; 5308 5309 /* 5310 * For e.g. LD4, there are not enough arguments to pass all 4 5311 * registers as pointers, so encode the regno into the data field. 5312 * For consistency, do this even for LD1. 5313 */ 5314 if (s->mte_active[0]) { 5315 int msz = dtype_msz(dtype); 5316 5317 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 5318 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 5319 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 5320 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 5321 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1); 5322 desc <<= SVE_MTEDESC_SHIFT; 5323 } else { 5324 addr = clean_data_tbi(s, addr); 5325 } 5326 5327 desc = simd_desc(vsz, vsz, zt | desc); 5328 t_desc = tcg_const_i32(desc); 5329 t_pg = tcg_temp_new_ptr(); 5330 5331 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 5332 fn(cpu_env, t_pg, addr, t_desc); 5333 5334 tcg_temp_free_ptr(t_pg); 5335 tcg_temp_free_i32(t_desc); 5336} 5337 5338/* Indexed by [mte][be][dtype][nreg] */ 5339static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = { 5340 { /* mte inactive, little-endian */ 5341 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 5342 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 5343 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 5344 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 5345 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 5346 5347 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, 5348 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, 5349 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, 5350 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, 5351 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, 5352 5353 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, 5354 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, 5355 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, 5356 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, 5357 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, 5358 5359 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 5360 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 5361 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 5362 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, 5363 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, 5364 5365 /* mte inactive, big-endian */ 5366 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, 5367 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, 5368 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, 5369 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, 5370 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, 5371 5372 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, 5373 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, 5374 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, 5375 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, 5376 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, 5377 5378 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, 5379 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, 5380 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, 5381 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, 5382 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, 5383 5384 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, 5385 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, 5386 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, 5387 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, 5388 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, 5389 5390 { /* mte active, little-endian */ 5391 { { gen_helper_sve_ld1bb_r_mte, 5392 gen_helper_sve_ld2bb_r_mte, 5393 gen_helper_sve_ld3bb_r_mte, 5394 gen_helper_sve_ld4bb_r_mte }, 5395 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 5396 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 5397 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 5398 5399 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, 5400 { gen_helper_sve_ld1hh_le_r_mte, 5401 gen_helper_sve_ld2hh_le_r_mte, 5402 gen_helper_sve_ld3hh_le_r_mte, 5403 gen_helper_sve_ld4hh_le_r_mte }, 5404 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, 5405 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, 5406 5407 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, 5408 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, 5409 { gen_helper_sve_ld1ss_le_r_mte, 5410 gen_helper_sve_ld2ss_le_r_mte, 5411 gen_helper_sve_ld3ss_le_r_mte, 5412 gen_helper_sve_ld4ss_le_r_mte }, 5413 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, 5414 5415 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 5416 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 5417 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 5418 { gen_helper_sve_ld1dd_le_r_mte, 5419 gen_helper_sve_ld2dd_le_r_mte, 5420 gen_helper_sve_ld3dd_le_r_mte, 5421 gen_helper_sve_ld4dd_le_r_mte } }, 5422 5423 /* mte active, big-endian */ 5424 { { gen_helper_sve_ld1bb_r_mte, 5425 gen_helper_sve_ld2bb_r_mte, 5426 gen_helper_sve_ld3bb_r_mte, 5427 gen_helper_sve_ld4bb_r_mte }, 5428 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, 5429 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, 5430 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, 5431 5432 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, 5433 { gen_helper_sve_ld1hh_be_r_mte, 5434 gen_helper_sve_ld2hh_be_r_mte, 5435 gen_helper_sve_ld3hh_be_r_mte, 5436 gen_helper_sve_ld4hh_be_r_mte }, 5437 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, 5438 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, 5439 5440 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, 5441 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, 5442 { gen_helper_sve_ld1ss_be_r_mte, 5443 gen_helper_sve_ld2ss_be_r_mte, 5444 gen_helper_sve_ld3ss_be_r_mte, 5445 gen_helper_sve_ld4ss_be_r_mte }, 5446 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, 5447 5448 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, 5449 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, 5450 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, 5451 { gen_helper_sve_ld1dd_be_r_mte, 5452 gen_helper_sve_ld2dd_be_r_mte, 5453 gen_helper_sve_ld3dd_be_r_mte, 5454 gen_helper_sve_ld4dd_be_r_mte } } }, 5455}; 5456 5457static void do_ld_zpa(DisasContext *s, int zt, int pg, 5458 TCGv_i64 addr, int dtype, int nreg) 5459{ 5460 gen_helper_gvec_mem *fn 5461 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; 5462 5463 /* 5464 * While there are holes in the table, they are not 5465 * accessible via the instruction encoding. 5466 */ 5467 assert(fn != NULL); 5468 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn); 5469} 5470 5471static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) 5472{ 5473 if (a->rm == 31) { 5474 return false; 5475 } 5476 if (sve_access_check(s)) { 5477 TCGv_i64 addr = new_tmp_a64(s); 5478 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 5479 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5480 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 5481 } 5482 return true; 5483} 5484 5485static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) 5486{ 5487 if (sve_access_check(s)) { 5488 int vsz = vec_full_reg_size(s); 5489 int elements = vsz >> dtype_esz[a->dtype]; 5490 TCGv_i64 addr = new_tmp_a64(s); 5491 5492 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 5493 (a->imm * elements * (a->nreg + 1)) 5494 << dtype_msz(a->dtype)); 5495 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); 5496 } 5497 return true; 5498} 5499 5500static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) 5501{ 5502 static gen_helper_gvec_mem * const fns[2][2][16] = { 5503 { /* mte inactive, little-endian */ 5504 { gen_helper_sve_ldff1bb_r, 5505 gen_helper_sve_ldff1bhu_r, 5506 gen_helper_sve_ldff1bsu_r, 5507 gen_helper_sve_ldff1bdu_r, 5508 5509 gen_helper_sve_ldff1sds_le_r, 5510 gen_helper_sve_ldff1hh_le_r, 5511 gen_helper_sve_ldff1hsu_le_r, 5512 gen_helper_sve_ldff1hdu_le_r, 5513 5514 gen_helper_sve_ldff1hds_le_r, 5515 gen_helper_sve_ldff1hss_le_r, 5516 gen_helper_sve_ldff1ss_le_r, 5517 gen_helper_sve_ldff1sdu_le_r, 5518 5519 gen_helper_sve_ldff1bds_r, 5520 gen_helper_sve_ldff1bss_r, 5521 gen_helper_sve_ldff1bhs_r, 5522 gen_helper_sve_ldff1dd_le_r }, 5523 5524 /* mte inactive, big-endian */ 5525 { gen_helper_sve_ldff1bb_r, 5526 gen_helper_sve_ldff1bhu_r, 5527 gen_helper_sve_ldff1bsu_r, 5528 gen_helper_sve_ldff1bdu_r, 5529 5530 gen_helper_sve_ldff1sds_be_r, 5531 gen_helper_sve_ldff1hh_be_r, 5532 gen_helper_sve_ldff1hsu_be_r, 5533 gen_helper_sve_ldff1hdu_be_r, 5534 5535 gen_helper_sve_ldff1hds_be_r, 5536 gen_helper_sve_ldff1hss_be_r, 5537 gen_helper_sve_ldff1ss_be_r, 5538 gen_helper_sve_ldff1sdu_be_r, 5539 5540 gen_helper_sve_ldff1bds_r, 5541 gen_helper_sve_ldff1bss_r, 5542 gen_helper_sve_ldff1bhs_r, 5543 gen_helper_sve_ldff1dd_be_r } }, 5544 5545 { /* mte active, little-endian */ 5546 { gen_helper_sve_ldff1bb_r_mte, 5547 gen_helper_sve_ldff1bhu_r_mte, 5548 gen_helper_sve_ldff1bsu_r_mte, 5549 gen_helper_sve_ldff1bdu_r_mte, 5550 5551 gen_helper_sve_ldff1sds_le_r_mte, 5552 gen_helper_sve_ldff1hh_le_r_mte, 5553 gen_helper_sve_ldff1hsu_le_r_mte, 5554 gen_helper_sve_ldff1hdu_le_r_mte, 5555 5556 gen_helper_sve_ldff1hds_le_r_mte, 5557 gen_helper_sve_ldff1hss_le_r_mte, 5558 gen_helper_sve_ldff1ss_le_r_mte, 5559 gen_helper_sve_ldff1sdu_le_r_mte, 5560 5561 gen_helper_sve_ldff1bds_r_mte, 5562 gen_helper_sve_ldff1bss_r_mte, 5563 gen_helper_sve_ldff1bhs_r_mte, 5564 gen_helper_sve_ldff1dd_le_r_mte }, 5565 5566 /* mte active, big-endian */ 5567 { gen_helper_sve_ldff1bb_r_mte, 5568 gen_helper_sve_ldff1bhu_r_mte, 5569 gen_helper_sve_ldff1bsu_r_mte, 5570 gen_helper_sve_ldff1bdu_r_mte, 5571 5572 gen_helper_sve_ldff1sds_be_r_mte, 5573 gen_helper_sve_ldff1hh_be_r_mte, 5574 gen_helper_sve_ldff1hsu_be_r_mte, 5575 gen_helper_sve_ldff1hdu_be_r_mte, 5576 5577 gen_helper_sve_ldff1hds_be_r_mte, 5578 gen_helper_sve_ldff1hss_be_r_mte, 5579 gen_helper_sve_ldff1ss_be_r_mte, 5580 gen_helper_sve_ldff1sdu_be_r_mte, 5581 5582 gen_helper_sve_ldff1bds_r_mte, 5583 gen_helper_sve_ldff1bss_r_mte, 5584 gen_helper_sve_ldff1bhs_r_mte, 5585 gen_helper_sve_ldff1dd_be_r_mte } }, 5586 }; 5587 5588 if (sve_access_check(s)) { 5589 TCGv_i64 addr = new_tmp_a64(s); 5590 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 5591 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5592 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 5593 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 5594 } 5595 return true; 5596} 5597 5598static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) 5599{ 5600 static gen_helper_gvec_mem * const fns[2][2][16] = { 5601 { /* mte inactive, little-endian */ 5602 { gen_helper_sve_ldnf1bb_r, 5603 gen_helper_sve_ldnf1bhu_r, 5604 gen_helper_sve_ldnf1bsu_r, 5605 gen_helper_sve_ldnf1bdu_r, 5606 5607 gen_helper_sve_ldnf1sds_le_r, 5608 gen_helper_sve_ldnf1hh_le_r, 5609 gen_helper_sve_ldnf1hsu_le_r, 5610 gen_helper_sve_ldnf1hdu_le_r, 5611 5612 gen_helper_sve_ldnf1hds_le_r, 5613 gen_helper_sve_ldnf1hss_le_r, 5614 gen_helper_sve_ldnf1ss_le_r, 5615 gen_helper_sve_ldnf1sdu_le_r, 5616 5617 gen_helper_sve_ldnf1bds_r, 5618 gen_helper_sve_ldnf1bss_r, 5619 gen_helper_sve_ldnf1bhs_r, 5620 gen_helper_sve_ldnf1dd_le_r }, 5621 5622 /* mte inactive, big-endian */ 5623 { gen_helper_sve_ldnf1bb_r, 5624 gen_helper_sve_ldnf1bhu_r, 5625 gen_helper_sve_ldnf1bsu_r, 5626 gen_helper_sve_ldnf1bdu_r, 5627 5628 gen_helper_sve_ldnf1sds_be_r, 5629 gen_helper_sve_ldnf1hh_be_r, 5630 gen_helper_sve_ldnf1hsu_be_r, 5631 gen_helper_sve_ldnf1hdu_be_r, 5632 5633 gen_helper_sve_ldnf1hds_be_r, 5634 gen_helper_sve_ldnf1hss_be_r, 5635 gen_helper_sve_ldnf1ss_be_r, 5636 gen_helper_sve_ldnf1sdu_be_r, 5637 5638 gen_helper_sve_ldnf1bds_r, 5639 gen_helper_sve_ldnf1bss_r, 5640 gen_helper_sve_ldnf1bhs_r, 5641 gen_helper_sve_ldnf1dd_be_r } }, 5642 5643 { /* mte inactive, little-endian */ 5644 { gen_helper_sve_ldnf1bb_r_mte, 5645 gen_helper_sve_ldnf1bhu_r_mte, 5646 gen_helper_sve_ldnf1bsu_r_mte, 5647 gen_helper_sve_ldnf1bdu_r_mte, 5648 5649 gen_helper_sve_ldnf1sds_le_r_mte, 5650 gen_helper_sve_ldnf1hh_le_r_mte, 5651 gen_helper_sve_ldnf1hsu_le_r_mte, 5652 gen_helper_sve_ldnf1hdu_le_r_mte, 5653 5654 gen_helper_sve_ldnf1hds_le_r_mte, 5655 gen_helper_sve_ldnf1hss_le_r_mte, 5656 gen_helper_sve_ldnf1ss_le_r_mte, 5657 gen_helper_sve_ldnf1sdu_le_r_mte, 5658 5659 gen_helper_sve_ldnf1bds_r_mte, 5660 gen_helper_sve_ldnf1bss_r_mte, 5661 gen_helper_sve_ldnf1bhs_r_mte, 5662 gen_helper_sve_ldnf1dd_le_r_mte }, 5663 5664 /* mte inactive, big-endian */ 5665 { gen_helper_sve_ldnf1bb_r_mte, 5666 gen_helper_sve_ldnf1bhu_r_mte, 5667 gen_helper_sve_ldnf1bsu_r_mte, 5668 gen_helper_sve_ldnf1bdu_r_mte, 5669 5670 gen_helper_sve_ldnf1sds_be_r_mte, 5671 gen_helper_sve_ldnf1hh_be_r_mte, 5672 gen_helper_sve_ldnf1hsu_be_r_mte, 5673 gen_helper_sve_ldnf1hdu_be_r_mte, 5674 5675 gen_helper_sve_ldnf1hds_be_r_mte, 5676 gen_helper_sve_ldnf1hss_be_r_mte, 5677 gen_helper_sve_ldnf1ss_be_r_mte, 5678 gen_helper_sve_ldnf1sdu_be_r_mte, 5679 5680 gen_helper_sve_ldnf1bds_r_mte, 5681 gen_helper_sve_ldnf1bss_r_mte, 5682 gen_helper_sve_ldnf1bhs_r_mte, 5683 gen_helper_sve_ldnf1dd_be_r_mte } }, 5684 }; 5685 5686 if (sve_access_check(s)) { 5687 int vsz = vec_full_reg_size(s); 5688 int elements = vsz >> dtype_esz[a->dtype]; 5689 int off = (a->imm * elements) << dtype_msz(a->dtype); 5690 TCGv_i64 addr = new_tmp_a64(s); 5691 5692 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); 5693 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, 5694 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); 5695 } 5696 return true; 5697} 5698 5699static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 5700{ 5701 unsigned vsz = vec_full_reg_size(s); 5702 TCGv_ptr t_pg; 5703 int poff; 5704 5705 /* Load the first quadword using the normal predicated load helpers. */ 5706 poff = pred_full_reg_offset(s, pg); 5707 if (vsz > 16) { 5708 /* 5709 * Zero-extend the first 16 bits of the predicate into a temporary. 5710 * This avoids triggering an assert making sure we don't have bits 5711 * set within a predicate beyond VQ, but we have lowered VQ to 1 5712 * for this load operation. 5713 */ 5714 TCGv_i64 tmp = tcg_temp_new_i64(); 5715#ifdef HOST_WORDS_BIGENDIAN 5716 poff += 6; 5717#endif 5718 tcg_gen_ld16u_i64(tmp, cpu_env, poff); 5719 5720 poff = offsetof(CPUARMState, vfp.preg_tmp); 5721 tcg_gen_st_i64(tmp, cpu_env, poff); 5722 tcg_temp_free_i64(tmp); 5723 } 5724 5725 t_pg = tcg_temp_new_ptr(); 5726 tcg_gen_addi_ptr(t_pg, cpu_env, poff); 5727 5728 gen_helper_gvec_mem *fn 5729 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 5730 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt))); 5731 5732 tcg_temp_free_ptr(t_pg); 5733 5734 /* Replicate that first quadword. */ 5735 if (vsz > 16) { 5736 int doff = vec_full_reg_offset(s, zt); 5737 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16); 5738 } 5739} 5740 5741static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a) 5742{ 5743 if (a->rm == 31) { 5744 return false; 5745 } 5746 if (sve_access_check(s)) { 5747 int msz = dtype_msz(a->dtype); 5748 TCGv_i64 addr = new_tmp_a64(s); 5749 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz); 5750 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5751 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 5752 } 5753 return true; 5754} 5755 5756static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) 5757{ 5758 if (sve_access_check(s)) { 5759 TCGv_i64 addr = new_tmp_a64(s); 5760 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16); 5761 do_ldrq(s, a->rd, a->pg, addr, a->dtype); 5762 } 5763 return true; 5764} 5765 5766static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) 5767{ 5768 unsigned vsz = vec_full_reg_size(s); 5769 unsigned vsz_r32; 5770 TCGv_ptr t_pg; 5771 int poff, doff; 5772 5773 if (vsz < 32) { 5774 /* 5775 * Note that this UNDEFINED check comes after CheckSVEEnabled() 5776 * in the ARM pseudocode, which is the sve_access_check() done 5777 * in our caller. We should not now return false from the caller. 5778 */ 5779 unallocated_encoding(s); 5780 return; 5781 } 5782 5783 /* Load the first octaword using the normal predicated load helpers. */ 5784 5785 poff = pred_full_reg_offset(s, pg); 5786 if (vsz > 32) { 5787 /* 5788 * Zero-extend the first 32 bits of the predicate into a temporary. 5789 * This avoids triggering an assert making sure we don't have bits 5790 * set within a predicate beyond VQ, but we have lowered VQ to 2 5791 * for this load operation. 5792 */ 5793 TCGv_i64 tmp = tcg_temp_new_i64(); 5794#ifdef HOST_WORDS_BIGENDIAN 5795 poff += 4; 5796#endif 5797 tcg_gen_ld32u_i64(tmp, cpu_env, poff); 5798 5799 poff = offsetof(CPUARMState, vfp.preg_tmp); 5800 tcg_gen_st_i64(tmp, cpu_env, poff); 5801 tcg_temp_free_i64(tmp); 5802 } 5803 5804 t_pg = tcg_temp_new_ptr(); 5805 tcg_gen_addi_ptr(t_pg, cpu_env, poff); 5806 5807 gen_helper_gvec_mem *fn 5808 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; 5809 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt))); 5810 5811 tcg_temp_free_ptr(t_pg); 5812 5813 /* 5814 * Replicate that first octaword. 5815 * The replication happens in units of 32; if the full vector size 5816 * is not a multiple of 32, the final bits are zeroed. 5817 */ 5818 doff = vec_full_reg_offset(s, zt); 5819 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32); 5820 if (vsz >= 64) { 5821 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32); 5822 } 5823 vsz -= vsz_r32; 5824 if (vsz) { 5825 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0); 5826 } 5827} 5828 5829static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a) 5830{ 5831 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 5832 return false; 5833 } 5834 if (a->rm == 31) { 5835 return false; 5836 } 5837 if (sve_access_check(s)) { 5838 TCGv_i64 addr = new_tmp_a64(s); 5839 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); 5840 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 5841 do_ldro(s, a->rd, a->pg, addr, a->dtype); 5842 } 5843 return true; 5844} 5845 5846static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a) 5847{ 5848 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 5849 return false; 5850 } 5851 if (sve_access_check(s)) { 5852 TCGv_i64 addr = new_tmp_a64(s); 5853 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32); 5854 do_ldro(s, a->rd, a->pg, addr, a->dtype); 5855 } 5856 return true; 5857} 5858 5859/* Load and broadcast element. */ 5860static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) 5861{ 5862 unsigned vsz = vec_full_reg_size(s); 5863 unsigned psz = pred_full_reg_size(s); 5864 unsigned esz = dtype_esz[a->dtype]; 5865 unsigned msz = dtype_msz(a->dtype); 5866 TCGLabel *over; 5867 TCGv_i64 temp, clean_addr; 5868 5869 if (!sve_access_check(s)) { 5870 return true; 5871 } 5872 5873 over = gen_new_label(); 5874 5875 /* If the guarding predicate has no bits set, no load occurs. */ 5876 if (psz <= 8) { 5877 /* Reduce the pred_esz_masks value simply to reduce the 5878 * size of the code generated here. 5879 */ 5880 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8); 5881 temp = tcg_temp_new_i64(); 5882 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg)); 5883 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask); 5884 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over); 5885 tcg_temp_free_i64(temp); 5886 } else { 5887 TCGv_i32 t32 = tcg_temp_new_i32(); 5888 find_last_active(s, t32, esz, a->pg); 5889 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over); 5890 tcg_temp_free_i32(t32); 5891 } 5892 5893 /* Load the data. */ 5894 temp = tcg_temp_new_i64(); 5895 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); 5896 clean_addr = gen_mte_check1(s, temp, false, true, msz); 5897 5898 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), 5899 finalize_memop(s, dtype_mop[a->dtype])); 5900 5901 /* Broadcast to *all* elements. */ 5902 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5903 vsz, vsz, temp); 5904 tcg_temp_free_i64(temp); 5905 5906 /* Zero the inactive elements. */ 5907 gen_set_label(over); 5908 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); 5909} 5910 5911static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, 5912 int msz, int esz, int nreg) 5913{ 5914 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { 5915 { { { gen_helper_sve_st1bb_r, 5916 gen_helper_sve_st1bh_r, 5917 gen_helper_sve_st1bs_r, 5918 gen_helper_sve_st1bd_r }, 5919 { NULL, 5920 gen_helper_sve_st1hh_le_r, 5921 gen_helper_sve_st1hs_le_r, 5922 gen_helper_sve_st1hd_le_r }, 5923 { NULL, NULL, 5924 gen_helper_sve_st1ss_le_r, 5925 gen_helper_sve_st1sd_le_r }, 5926 { NULL, NULL, NULL, 5927 gen_helper_sve_st1dd_le_r } }, 5928 { { gen_helper_sve_st1bb_r, 5929 gen_helper_sve_st1bh_r, 5930 gen_helper_sve_st1bs_r, 5931 gen_helper_sve_st1bd_r }, 5932 { NULL, 5933 gen_helper_sve_st1hh_be_r, 5934 gen_helper_sve_st1hs_be_r, 5935 gen_helper_sve_st1hd_be_r }, 5936 { NULL, NULL, 5937 gen_helper_sve_st1ss_be_r, 5938 gen_helper_sve_st1sd_be_r }, 5939 { NULL, NULL, NULL, 5940 gen_helper_sve_st1dd_be_r } } }, 5941 5942 { { { gen_helper_sve_st1bb_r_mte, 5943 gen_helper_sve_st1bh_r_mte, 5944 gen_helper_sve_st1bs_r_mte, 5945 gen_helper_sve_st1bd_r_mte }, 5946 { NULL, 5947 gen_helper_sve_st1hh_le_r_mte, 5948 gen_helper_sve_st1hs_le_r_mte, 5949 gen_helper_sve_st1hd_le_r_mte }, 5950 { NULL, NULL, 5951 gen_helper_sve_st1ss_le_r_mte, 5952 gen_helper_sve_st1sd_le_r_mte }, 5953 { NULL, NULL, NULL, 5954 gen_helper_sve_st1dd_le_r_mte } }, 5955 { { gen_helper_sve_st1bb_r_mte, 5956 gen_helper_sve_st1bh_r_mte, 5957 gen_helper_sve_st1bs_r_mte, 5958 gen_helper_sve_st1bd_r_mte }, 5959 { NULL, 5960 gen_helper_sve_st1hh_be_r_mte, 5961 gen_helper_sve_st1hs_be_r_mte, 5962 gen_helper_sve_st1hd_be_r_mte }, 5963 { NULL, NULL, 5964 gen_helper_sve_st1ss_be_r_mte, 5965 gen_helper_sve_st1sd_be_r_mte }, 5966 { NULL, NULL, NULL, 5967 gen_helper_sve_st1dd_be_r_mte } } }, 5968 }; 5969 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { 5970 { { { gen_helper_sve_st2bb_r, 5971 gen_helper_sve_st2hh_le_r, 5972 gen_helper_sve_st2ss_le_r, 5973 gen_helper_sve_st2dd_le_r }, 5974 { gen_helper_sve_st3bb_r, 5975 gen_helper_sve_st3hh_le_r, 5976 gen_helper_sve_st3ss_le_r, 5977 gen_helper_sve_st3dd_le_r }, 5978 { gen_helper_sve_st4bb_r, 5979 gen_helper_sve_st4hh_le_r, 5980 gen_helper_sve_st4ss_le_r, 5981 gen_helper_sve_st4dd_le_r } }, 5982 { { gen_helper_sve_st2bb_r, 5983 gen_helper_sve_st2hh_be_r, 5984 gen_helper_sve_st2ss_be_r, 5985 gen_helper_sve_st2dd_be_r }, 5986 { gen_helper_sve_st3bb_r, 5987 gen_helper_sve_st3hh_be_r, 5988 gen_helper_sve_st3ss_be_r, 5989 gen_helper_sve_st3dd_be_r }, 5990 { gen_helper_sve_st4bb_r, 5991 gen_helper_sve_st4hh_be_r, 5992 gen_helper_sve_st4ss_be_r, 5993 gen_helper_sve_st4dd_be_r } } }, 5994 { { { gen_helper_sve_st2bb_r_mte, 5995 gen_helper_sve_st2hh_le_r_mte, 5996 gen_helper_sve_st2ss_le_r_mte, 5997 gen_helper_sve_st2dd_le_r_mte }, 5998 { gen_helper_sve_st3bb_r_mte, 5999 gen_helper_sve_st3hh_le_r_mte, 6000 gen_helper_sve_st3ss_le_r_mte, 6001 gen_helper_sve_st3dd_le_r_mte }, 6002 { gen_helper_sve_st4bb_r_mte, 6003 gen_helper_sve_st4hh_le_r_mte, 6004 gen_helper_sve_st4ss_le_r_mte, 6005 gen_helper_sve_st4dd_le_r_mte } }, 6006 { { gen_helper_sve_st2bb_r_mte, 6007 gen_helper_sve_st2hh_be_r_mte, 6008 gen_helper_sve_st2ss_be_r_mte, 6009 gen_helper_sve_st2dd_be_r_mte }, 6010 { gen_helper_sve_st3bb_r_mte, 6011 gen_helper_sve_st3hh_be_r_mte, 6012 gen_helper_sve_st3ss_be_r_mte, 6013 gen_helper_sve_st3dd_be_r_mte }, 6014 { gen_helper_sve_st4bb_r_mte, 6015 gen_helper_sve_st4hh_be_r_mte, 6016 gen_helper_sve_st4ss_be_r_mte, 6017 gen_helper_sve_st4dd_be_r_mte } } }, 6018 }; 6019 gen_helper_gvec_mem *fn; 6020 int be = s->be_data == MO_BE; 6021 6022 if (nreg == 0) { 6023 /* ST1 */ 6024 fn = fn_single[s->mte_active[0]][be][msz][esz]; 6025 nreg = 1; 6026 } else { 6027 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ 6028 assert(msz == esz); 6029 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; 6030 } 6031 assert(fn != NULL); 6032 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn); 6033} 6034 6035static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) 6036{ 6037 if (a->rm == 31 || a->msz > a->esz) { 6038 return false; 6039 } 6040 if (sve_access_check(s)) { 6041 TCGv_i64 addr = new_tmp_a64(s); 6042 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); 6043 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); 6044 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 6045 } 6046 return true; 6047} 6048 6049static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) 6050{ 6051 if (a->msz > a->esz) { 6052 return false; 6053 } 6054 if (sve_access_check(s)) { 6055 int vsz = vec_full_reg_size(s); 6056 int elements = vsz >> a->esz; 6057 TCGv_i64 addr = new_tmp_a64(s); 6058 6059 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), 6060 (a->imm * elements * (a->nreg + 1)) << a->msz); 6061 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); 6062 } 6063 return true; 6064} 6065 6066/* 6067 *** SVE gather loads / scatter stores 6068 */ 6069 6070static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, 6071 int scale, TCGv_i64 scalar, int msz, bool is_write, 6072 gen_helper_gvec_mem_scatter *fn) 6073{ 6074 unsigned vsz = vec_full_reg_size(s); 6075 TCGv_ptr t_zm = tcg_temp_new_ptr(); 6076 TCGv_ptr t_pg = tcg_temp_new_ptr(); 6077 TCGv_ptr t_zt = tcg_temp_new_ptr(); 6078 TCGv_i32 t_desc; 6079 int desc = 0; 6080 6081 if (s->mte_active[0]) { 6082 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 6083 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 6084 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 6085 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 6086 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1); 6087 desc <<= SVE_MTEDESC_SHIFT; 6088 } 6089 desc = simd_desc(vsz, vsz, desc | scale); 6090 t_desc = tcg_const_i32(desc); 6091 6092 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); 6093 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm)); 6094 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt)); 6095 fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc); 6096 6097 tcg_temp_free_ptr(t_zt); 6098 tcg_temp_free_ptr(t_zm); 6099 tcg_temp_free_ptr(t_pg); 6100 tcg_temp_free_i32(t_desc); 6101} 6102 6103/* Indexed by [mte][be][ff][xs][u][msz]. */ 6104static gen_helper_gvec_mem_scatter * const 6105gather_load_fn32[2][2][2][2][2][3] = { 6106 { /* MTE Inactive */ 6107 { /* Little-endian */ 6108 { { { gen_helper_sve_ldbss_zsu, 6109 gen_helper_sve_ldhss_le_zsu, 6110 NULL, }, 6111 { gen_helper_sve_ldbsu_zsu, 6112 gen_helper_sve_ldhsu_le_zsu, 6113 gen_helper_sve_ldss_le_zsu, } }, 6114 { { gen_helper_sve_ldbss_zss, 6115 gen_helper_sve_ldhss_le_zss, 6116 NULL, }, 6117 { gen_helper_sve_ldbsu_zss, 6118 gen_helper_sve_ldhsu_le_zss, 6119 gen_helper_sve_ldss_le_zss, } } }, 6120 6121 /* First-fault */ 6122 { { { gen_helper_sve_ldffbss_zsu, 6123 gen_helper_sve_ldffhss_le_zsu, 6124 NULL, }, 6125 { gen_helper_sve_ldffbsu_zsu, 6126 gen_helper_sve_ldffhsu_le_zsu, 6127 gen_helper_sve_ldffss_le_zsu, } }, 6128 { { gen_helper_sve_ldffbss_zss, 6129 gen_helper_sve_ldffhss_le_zss, 6130 NULL, }, 6131 { gen_helper_sve_ldffbsu_zss, 6132 gen_helper_sve_ldffhsu_le_zss, 6133 gen_helper_sve_ldffss_le_zss, } } } }, 6134 6135 { /* Big-endian */ 6136 { { { gen_helper_sve_ldbss_zsu, 6137 gen_helper_sve_ldhss_be_zsu, 6138 NULL, }, 6139 { gen_helper_sve_ldbsu_zsu, 6140 gen_helper_sve_ldhsu_be_zsu, 6141 gen_helper_sve_ldss_be_zsu, } }, 6142 { { gen_helper_sve_ldbss_zss, 6143 gen_helper_sve_ldhss_be_zss, 6144 NULL, }, 6145 { gen_helper_sve_ldbsu_zss, 6146 gen_helper_sve_ldhsu_be_zss, 6147 gen_helper_sve_ldss_be_zss, } } }, 6148 6149 /* First-fault */ 6150 { { { gen_helper_sve_ldffbss_zsu, 6151 gen_helper_sve_ldffhss_be_zsu, 6152 NULL, }, 6153 { gen_helper_sve_ldffbsu_zsu, 6154 gen_helper_sve_ldffhsu_be_zsu, 6155 gen_helper_sve_ldffss_be_zsu, } }, 6156 { { gen_helper_sve_ldffbss_zss, 6157 gen_helper_sve_ldffhss_be_zss, 6158 NULL, }, 6159 { gen_helper_sve_ldffbsu_zss, 6160 gen_helper_sve_ldffhsu_be_zss, 6161 gen_helper_sve_ldffss_be_zss, } } } } }, 6162 { /* MTE Active */ 6163 { /* Little-endian */ 6164 { { { gen_helper_sve_ldbss_zsu_mte, 6165 gen_helper_sve_ldhss_le_zsu_mte, 6166 NULL, }, 6167 { gen_helper_sve_ldbsu_zsu_mte, 6168 gen_helper_sve_ldhsu_le_zsu_mte, 6169 gen_helper_sve_ldss_le_zsu_mte, } }, 6170 { { gen_helper_sve_ldbss_zss_mte, 6171 gen_helper_sve_ldhss_le_zss_mte, 6172 NULL, }, 6173 { gen_helper_sve_ldbsu_zss_mte, 6174 gen_helper_sve_ldhsu_le_zss_mte, 6175 gen_helper_sve_ldss_le_zss_mte, } } }, 6176 6177 /* First-fault */ 6178 { { { gen_helper_sve_ldffbss_zsu_mte, 6179 gen_helper_sve_ldffhss_le_zsu_mte, 6180 NULL, }, 6181 { gen_helper_sve_ldffbsu_zsu_mte, 6182 gen_helper_sve_ldffhsu_le_zsu_mte, 6183 gen_helper_sve_ldffss_le_zsu_mte, } }, 6184 { { gen_helper_sve_ldffbss_zss_mte, 6185 gen_helper_sve_ldffhss_le_zss_mte, 6186 NULL, }, 6187 { gen_helper_sve_ldffbsu_zss_mte, 6188 gen_helper_sve_ldffhsu_le_zss_mte, 6189 gen_helper_sve_ldffss_le_zss_mte, } } } }, 6190 6191 { /* Big-endian */ 6192 { { { gen_helper_sve_ldbss_zsu_mte, 6193 gen_helper_sve_ldhss_be_zsu_mte, 6194 NULL, }, 6195 { gen_helper_sve_ldbsu_zsu_mte, 6196 gen_helper_sve_ldhsu_be_zsu_mte, 6197 gen_helper_sve_ldss_be_zsu_mte, } }, 6198 { { gen_helper_sve_ldbss_zss_mte, 6199 gen_helper_sve_ldhss_be_zss_mte, 6200 NULL, }, 6201 { gen_helper_sve_ldbsu_zss_mte, 6202 gen_helper_sve_ldhsu_be_zss_mte, 6203 gen_helper_sve_ldss_be_zss_mte, } } }, 6204 6205 /* First-fault */ 6206 { { { gen_helper_sve_ldffbss_zsu_mte, 6207 gen_helper_sve_ldffhss_be_zsu_mte, 6208 NULL, }, 6209 { gen_helper_sve_ldffbsu_zsu_mte, 6210 gen_helper_sve_ldffhsu_be_zsu_mte, 6211 gen_helper_sve_ldffss_be_zsu_mte, } }, 6212 { { gen_helper_sve_ldffbss_zss_mte, 6213 gen_helper_sve_ldffhss_be_zss_mte, 6214 NULL, }, 6215 { gen_helper_sve_ldffbsu_zss_mte, 6216 gen_helper_sve_ldffhsu_be_zss_mte, 6217 gen_helper_sve_ldffss_be_zss_mte, } } } } }, 6218}; 6219 6220/* Note that we overload xs=2 to indicate 64-bit offset. */ 6221static gen_helper_gvec_mem_scatter * const 6222gather_load_fn64[2][2][2][3][2][4] = { 6223 { /* MTE Inactive */ 6224 { /* Little-endian */ 6225 { { { gen_helper_sve_ldbds_zsu, 6226 gen_helper_sve_ldhds_le_zsu, 6227 gen_helper_sve_ldsds_le_zsu, 6228 NULL, }, 6229 { gen_helper_sve_ldbdu_zsu, 6230 gen_helper_sve_ldhdu_le_zsu, 6231 gen_helper_sve_ldsdu_le_zsu, 6232 gen_helper_sve_lddd_le_zsu, } }, 6233 { { gen_helper_sve_ldbds_zss, 6234 gen_helper_sve_ldhds_le_zss, 6235 gen_helper_sve_ldsds_le_zss, 6236 NULL, }, 6237 { gen_helper_sve_ldbdu_zss, 6238 gen_helper_sve_ldhdu_le_zss, 6239 gen_helper_sve_ldsdu_le_zss, 6240 gen_helper_sve_lddd_le_zss, } }, 6241 { { gen_helper_sve_ldbds_zd, 6242 gen_helper_sve_ldhds_le_zd, 6243 gen_helper_sve_ldsds_le_zd, 6244 NULL, }, 6245 { gen_helper_sve_ldbdu_zd, 6246 gen_helper_sve_ldhdu_le_zd, 6247 gen_helper_sve_ldsdu_le_zd, 6248 gen_helper_sve_lddd_le_zd, } } }, 6249 6250 /* First-fault */ 6251 { { { gen_helper_sve_ldffbds_zsu, 6252 gen_helper_sve_ldffhds_le_zsu, 6253 gen_helper_sve_ldffsds_le_zsu, 6254 NULL, }, 6255 { gen_helper_sve_ldffbdu_zsu, 6256 gen_helper_sve_ldffhdu_le_zsu, 6257 gen_helper_sve_ldffsdu_le_zsu, 6258 gen_helper_sve_ldffdd_le_zsu, } }, 6259 { { gen_helper_sve_ldffbds_zss, 6260 gen_helper_sve_ldffhds_le_zss, 6261 gen_helper_sve_ldffsds_le_zss, 6262 NULL, }, 6263 { gen_helper_sve_ldffbdu_zss, 6264 gen_helper_sve_ldffhdu_le_zss, 6265 gen_helper_sve_ldffsdu_le_zss, 6266 gen_helper_sve_ldffdd_le_zss, } }, 6267 { { gen_helper_sve_ldffbds_zd, 6268 gen_helper_sve_ldffhds_le_zd, 6269 gen_helper_sve_ldffsds_le_zd, 6270 NULL, }, 6271 { gen_helper_sve_ldffbdu_zd, 6272 gen_helper_sve_ldffhdu_le_zd, 6273 gen_helper_sve_ldffsdu_le_zd, 6274 gen_helper_sve_ldffdd_le_zd, } } } }, 6275 { /* Big-endian */ 6276 { { { gen_helper_sve_ldbds_zsu, 6277 gen_helper_sve_ldhds_be_zsu, 6278 gen_helper_sve_ldsds_be_zsu, 6279 NULL, }, 6280 { gen_helper_sve_ldbdu_zsu, 6281 gen_helper_sve_ldhdu_be_zsu, 6282 gen_helper_sve_ldsdu_be_zsu, 6283 gen_helper_sve_lddd_be_zsu, } }, 6284 { { gen_helper_sve_ldbds_zss, 6285 gen_helper_sve_ldhds_be_zss, 6286 gen_helper_sve_ldsds_be_zss, 6287 NULL, }, 6288 { gen_helper_sve_ldbdu_zss, 6289 gen_helper_sve_ldhdu_be_zss, 6290 gen_helper_sve_ldsdu_be_zss, 6291 gen_helper_sve_lddd_be_zss, } }, 6292 { { gen_helper_sve_ldbds_zd, 6293 gen_helper_sve_ldhds_be_zd, 6294 gen_helper_sve_ldsds_be_zd, 6295 NULL, }, 6296 { gen_helper_sve_ldbdu_zd, 6297 gen_helper_sve_ldhdu_be_zd, 6298 gen_helper_sve_ldsdu_be_zd, 6299 gen_helper_sve_lddd_be_zd, } } }, 6300 6301 /* First-fault */ 6302 { { { gen_helper_sve_ldffbds_zsu, 6303 gen_helper_sve_ldffhds_be_zsu, 6304 gen_helper_sve_ldffsds_be_zsu, 6305 NULL, }, 6306 { gen_helper_sve_ldffbdu_zsu, 6307 gen_helper_sve_ldffhdu_be_zsu, 6308 gen_helper_sve_ldffsdu_be_zsu, 6309 gen_helper_sve_ldffdd_be_zsu, } }, 6310 { { gen_helper_sve_ldffbds_zss, 6311 gen_helper_sve_ldffhds_be_zss, 6312 gen_helper_sve_ldffsds_be_zss, 6313 NULL, }, 6314 { gen_helper_sve_ldffbdu_zss, 6315 gen_helper_sve_ldffhdu_be_zss, 6316 gen_helper_sve_ldffsdu_be_zss, 6317 gen_helper_sve_ldffdd_be_zss, } }, 6318 { { gen_helper_sve_ldffbds_zd, 6319 gen_helper_sve_ldffhds_be_zd, 6320 gen_helper_sve_ldffsds_be_zd, 6321 NULL, }, 6322 { gen_helper_sve_ldffbdu_zd, 6323 gen_helper_sve_ldffhdu_be_zd, 6324 gen_helper_sve_ldffsdu_be_zd, 6325 gen_helper_sve_ldffdd_be_zd, } } } } }, 6326 { /* MTE Active */ 6327 { /* Little-endian */ 6328 { { { gen_helper_sve_ldbds_zsu_mte, 6329 gen_helper_sve_ldhds_le_zsu_mte, 6330 gen_helper_sve_ldsds_le_zsu_mte, 6331 NULL, }, 6332 { gen_helper_sve_ldbdu_zsu_mte, 6333 gen_helper_sve_ldhdu_le_zsu_mte, 6334 gen_helper_sve_ldsdu_le_zsu_mte, 6335 gen_helper_sve_lddd_le_zsu_mte, } }, 6336 { { gen_helper_sve_ldbds_zss_mte, 6337 gen_helper_sve_ldhds_le_zss_mte, 6338 gen_helper_sve_ldsds_le_zss_mte, 6339 NULL, }, 6340 { gen_helper_sve_ldbdu_zss_mte, 6341 gen_helper_sve_ldhdu_le_zss_mte, 6342 gen_helper_sve_ldsdu_le_zss_mte, 6343 gen_helper_sve_lddd_le_zss_mte, } }, 6344 { { gen_helper_sve_ldbds_zd_mte, 6345 gen_helper_sve_ldhds_le_zd_mte, 6346 gen_helper_sve_ldsds_le_zd_mte, 6347 NULL, }, 6348 { gen_helper_sve_ldbdu_zd_mte, 6349 gen_helper_sve_ldhdu_le_zd_mte, 6350 gen_helper_sve_ldsdu_le_zd_mte, 6351 gen_helper_sve_lddd_le_zd_mte, } } }, 6352 6353 /* First-fault */ 6354 { { { gen_helper_sve_ldffbds_zsu_mte, 6355 gen_helper_sve_ldffhds_le_zsu_mte, 6356 gen_helper_sve_ldffsds_le_zsu_mte, 6357 NULL, }, 6358 { gen_helper_sve_ldffbdu_zsu_mte, 6359 gen_helper_sve_ldffhdu_le_zsu_mte, 6360 gen_helper_sve_ldffsdu_le_zsu_mte, 6361 gen_helper_sve_ldffdd_le_zsu_mte, } }, 6362 { { gen_helper_sve_ldffbds_zss_mte, 6363 gen_helper_sve_ldffhds_le_zss_mte, 6364 gen_helper_sve_ldffsds_le_zss_mte, 6365 NULL, }, 6366 { gen_helper_sve_ldffbdu_zss_mte, 6367 gen_helper_sve_ldffhdu_le_zss_mte, 6368 gen_helper_sve_ldffsdu_le_zss_mte, 6369 gen_helper_sve_ldffdd_le_zss_mte, } }, 6370 { { gen_helper_sve_ldffbds_zd_mte, 6371 gen_helper_sve_ldffhds_le_zd_mte, 6372 gen_helper_sve_ldffsds_le_zd_mte, 6373 NULL, }, 6374 { gen_helper_sve_ldffbdu_zd_mte, 6375 gen_helper_sve_ldffhdu_le_zd_mte, 6376 gen_helper_sve_ldffsdu_le_zd_mte, 6377 gen_helper_sve_ldffdd_le_zd_mte, } } } }, 6378 { /* Big-endian */ 6379 { { { gen_helper_sve_ldbds_zsu_mte, 6380 gen_helper_sve_ldhds_be_zsu_mte, 6381 gen_helper_sve_ldsds_be_zsu_mte, 6382 NULL, }, 6383 { gen_helper_sve_ldbdu_zsu_mte, 6384 gen_helper_sve_ldhdu_be_zsu_mte, 6385 gen_helper_sve_ldsdu_be_zsu_mte, 6386 gen_helper_sve_lddd_be_zsu_mte, } }, 6387 { { gen_helper_sve_ldbds_zss_mte, 6388 gen_helper_sve_ldhds_be_zss_mte, 6389 gen_helper_sve_ldsds_be_zss_mte, 6390 NULL, }, 6391 { gen_helper_sve_ldbdu_zss_mte, 6392 gen_helper_sve_ldhdu_be_zss_mte, 6393 gen_helper_sve_ldsdu_be_zss_mte, 6394 gen_helper_sve_lddd_be_zss_mte, } }, 6395 { { gen_helper_sve_ldbds_zd_mte, 6396 gen_helper_sve_ldhds_be_zd_mte, 6397 gen_helper_sve_ldsds_be_zd_mte, 6398 NULL, }, 6399 { gen_helper_sve_ldbdu_zd_mte, 6400 gen_helper_sve_ldhdu_be_zd_mte, 6401 gen_helper_sve_ldsdu_be_zd_mte, 6402 gen_helper_sve_lddd_be_zd_mte, } } }, 6403 6404 /* First-fault */ 6405 { { { gen_helper_sve_ldffbds_zsu_mte, 6406 gen_helper_sve_ldffhds_be_zsu_mte, 6407 gen_helper_sve_ldffsds_be_zsu_mte, 6408 NULL, }, 6409 { gen_helper_sve_ldffbdu_zsu_mte, 6410 gen_helper_sve_ldffhdu_be_zsu_mte, 6411 gen_helper_sve_ldffsdu_be_zsu_mte, 6412 gen_helper_sve_ldffdd_be_zsu_mte, } }, 6413 { { gen_helper_sve_ldffbds_zss_mte, 6414 gen_helper_sve_ldffhds_be_zss_mte, 6415 gen_helper_sve_ldffsds_be_zss_mte, 6416 NULL, }, 6417 { gen_helper_sve_ldffbdu_zss_mte, 6418 gen_helper_sve_ldffhdu_be_zss_mte, 6419 gen_helper_sve_ldffsdu_be_zss_mte, 6420 gen_helper_sve_ldffdd_be_zss_mte, } }, 6421 { { gen_helper_sve_ldffbds_zd_mte, 6422 gen_helper_sve_ldffhds_be_zd_mte, 6423 gen_helper_sve_ldffsds_be_zd_mte, 6424 NULL, }, 6425 { gen_helper_sve_ldffbdu_zd_mte, 6426 gen_helper_sve_ldffhdu_be_zd_mte, 6427 gen_helper_sve_ldffsdu_be_zd_mte, 6428 gen_helper_sve_ldffdd_be_zd_mte, } } } } }, 6429}; 6430 6431static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) 6432{ 6433 gen_helper_gvec_mem_scatter *fn = NULL; 6434 bool be = s->be_data == MO_BE; 6435 bool mte = s->mte_active[0]; 6436 6437 if (!sve_access_check(s)) { 6438 return true; 6439 } 6440 6441 switch (a->esz) { 6442 case MO_32: 6443 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; 6444 break; 6445 case MO_64: 6446 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; 6447 break; 6448 } 6449 assert(fn != NULL); 6450 6451 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 6452 cpu_reg_sp(s, a->rn), a->msz, false, fn); 6453 return true; 6454} 6455 6456static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) 6457{ 6458 gen_helper_gvec_mem_scatter *fn = NULL; 6459 bool be = s->be_data == MO_BE; 6460 bool mte = s->mte_active[0]; 6461 TCGv_i64 imm; 6462 6463 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { 6464 return false; 6465 } 6466 if (!sve_access_check(s)) { 6467 return true; 6468 } 6469 6470 switch (a->esz) { 6471 case MO_32: 6472 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; 6473 break; 6474 case MO_64: 6475 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; 6476 break; 6477 } 6478 assert(fn != NULL); 6479 6480 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x]) 6481 * by loading the immediate into the scalar parameter. 6482 */ 6483 imm = tcg_const_i64(a->imm << a->msz); 6484 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn); 6485 tcg_temp_free_i64(imm); 6486 return true; 6487} 6488 6489static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a) 6490{ 6491 if (!dc_isar_feature(aa64_sve2, s)) { 6492 return false; 6493 } 6494 return trans_LD1_zprz(s, a); 6495} 6496 6497/* Indexed by [mte][be][xs][msz]. */ 6498static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { 6499 { /* MTE Inactive */ 6500 { /* Little-endian */ 6501 { gen_helper_sve_stbs_zsu, 6502 gen_helper_sve_sths_le_zsu, 6503 gen_helper_sve_stss_le_zsu, }, 6504 { gen_helper_sve_stbs_zss, 6505 gen_helper_sve_sths_le_zss, 6506 gen_helper_sve_stss_le_zss, } }, 6507 { /* Big-endian */ 6508 { gen_helper_sve_stbs_zsu, 6509 gen_helper_sve_sths_be_zsu, 6510 gen_helper_sve_stss_be_zsu, }, 6511 { gen_helper_sve_stbs_zss, 6512 gen_helper_sve_sths_be_zss, 6513 gen_helper_sve_stss_be_zss, } } }, 6514 { /* MTE Active */ 6515 { /* Little-endian */ 6516 { gen_helper_sve_stbs_zsu_mte, 6517 gen_helper_sve_sths_le_zsu_mte, 6518 gen_helper_sve_stss_le_zsu_mte, }, 6519 { gen_helper_sve_stbs_zss_mte, 6520 gen_helper_sve_sths_le_zss_mte, 6521 gen_helper_sve_stss_le_zss_mte, } }, 6522 { /* Big-endian */ 6523 { gen_helper_sve_stbs_zsu_mte, 6524 gen_helper_sve_sths_be_zsu_mte, 6525 gen_helper_sve_stss_be_zsu_mte, }, 6526 { gen_helper_sve_stbs_zss_mte, 6527 gen_helper_sve_sths_be_zss_mte, 6528 gen_helper_sve_stss_be_zss_mte, } } }, 6529}; 6530 6531/* Note that we overload xs=2 to indicate 64-bit offset. */ 6532static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { 6533 { /* MTE Inactive */ 6534 { /* Little-endian */ 6535 { gen_helper_sve_stbd_zsu, 6536 gen_helper_sve_sthd_le_zsu, 6537 gen_helper_sve_stsd_le_zsu, 6538 gen_helper_sve_stdd_le_zsu, }, 6539 { gen_helper_sve_stbd_zss, 6540 gen_helper_sve_sthd_le_zss, 6541 gen_helper_sve_stsd_le_zss, 6542 gen_helper_sve_stdd_le_zss, }, 6543 { gen_helper_sve_stbd_zd, 6544 gen_helper_sve_sthd_le_zd, 6545 gen_helper_sve_stsd_le_zd, 6546 gen_helper_sve_stdd_le_zd, } }, 6547 { /* Big-endian */ 6548 { gen_helper_sve_stbd_zsu, 6549 gen_helper_sve_sthd_be_zsu, 6550 gen_helper_sve_stsd_be_zsu, 6551 gen_helper_sve_stdd_be_zsu, }, 6552 { gen_helper_sve_stbd_zss, 6553 gen_helper_sve_sthd_be_zss, 6554 gen_helper_sve_stsd_be_zss, 6555 gen_helper_sve_stdd_be_zss, }, 6556 { gen_helper_sve_stbd_zd, 6557 gen_helper_sve_sthd_be_zd, 6558 gen_helper_sve_stsd_be_zd, 6559 gen_helper_sve_stdd_be_zd, } } }, 6560 { /* MTE Inactive */ 6561 { /* Little-endian */ 6562 { gen_helper_sve_stbd_zsu_mte, 6563 gen_helper_sve_sthd_le_zsu_mte, 6564 gen_helper_sve_stsd_le_zsu_mte, 6565 gen_helper_sve_stdd_le_zsu_mte, }, 6566 { gen_helper_sve_stbd_zss_mte, 6567 gen_helper_sve_sthd_le_zss_mte, 6568 gen_helper_sve_stsd_le_zss_mte, 6569 gen_helper_sve_stdd_le_zss_mte, }, 6570 { gen_helper_sve_stbd_zd_mte, 6571 gen_helper_sve_sthd_le_zd_mte, 6572 gen_helper_sve_stsd_le_zd_mte, 6573 gen_helper_sve_stdd_le_zd_mte, } }, 6574 { /* Big-endian */ 6575 { gen_helper_sve_stbd_zsu_mte, 6576 gen_helper_sve_sthd_be_zsu_mte, 6577 gen_helper_sve_stsd_be_zsu_mte, 6578 gen_helper_sve_stdd_be_zsu_mte, }, 6579 { gen_helper_sve_stbd_zss_mte, 6580 gen_helper_sve_sthd_be_zss_mte, 6581 gen_helper_sve_stsd_be_zss_mte, 6582 gen_helper_sve_stdd_be_zss_mte, }, 6583 { gen_helper_sve_stbd_zd_mte, 6584 gen_helper_sve_sthd_be_zd_mte, 6585 gen_helper_sve_stsd_be_zd_mte, 6586 gen_helper_sve_stdd_be_zd_mte, } } }, 6587}; 6588 6589static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) 6590{ 6591 gen_helper_gvec_mem_scatter *fn; 6592 bool be = s->be_data == MO_BE; 6593 bool mte = s->mte_active[0]; 6594 6595 if (a->esz < a->msz || (a->msz == 0 && a->scale)) { 6596 return false; 6597 } 6598 if (!sve_access_check(s)) { 6599 return true; 6600 } 6601 switch (a->esz) { 6602 case MO_32: 6603 fn = scatter_store_fn32[mte][be][a->xs][a->msz]; 6604 break; 6605 case MO_64: 6606 fn = scatter_store_fn64[mte][be][a->xs][a->msz]; 6607 break; 6608 default: 6609 g_assert_not_reached(); 6610 } 6611 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, 6612 cpu_reg_sp(s, a->rn), a->msz, true, fn); 6613 return true; 6614} 6615 6616static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) 6617{ 6618 gen_helper_gvec_mem_scatter *fn = NULL; 6619 bool be = s->be_data == MO_BE; 6620 bool mte = s->mte_active[0]; 6621 TCGv_i64 imm; 6622 6623 if (a->esz < a->msz) { 6624 return false; 6625 } 6626 if (!sve_access_check(s)) { 6627 return true; 6628 } 6629 6630 switch (a->esz) { 6631 case MO_32: 6632 fn = scatter_store_fn32[mte][be][0][a->msz]; 6633 break; 6634 case MO_64: 6635 fn = scatter_store_fn64[mte][be][2][a->msz]; 6636 break; 6637 } 6638 assert(fn != NULL); 6639 6640 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x]) 6641 * by loading the immediate into the scalar parameter. 6642 */ 6643 imm = tcg_const_i64(a->imm << a->msz); 6644 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn); 6645 tcg_temp_free_i64(imm); 6646 return true; 6647} 6648 6649static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) 6650{ 6651 if (!dc_isar_feature(aa64_sve2, s)) { 6652 return false; 6653 } 6654 return trans_ST1_zprz(s, a); 6655} 6656 6657/* 6658 * Prefetches 6659 */ 6660 6661static bool trans_PRF(DisasContext *s, arg_PRF *a) 6662{ 6663 /* Prefetch is a nop within QEMU. */ 6664 (void)sve_access_check(s); 6665 return true; 6666} 6667 6668static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) 6669{ 6670 if (a->rm == 31) { 6671 return false; 6672 } 6673 /* Prefetch is a nop within QEMU. */ 6674 (void)sve_access_check(s); 6675 return true; 6676} 6677 6678/* 6679 * Move Prefix 6680 * 6681 * TODO: The implementation so far could handle predicated merging movprfx. 6682 * The helper functions as written take an extra source register to 6683 * use in the operation, but the result is only written when predication 6684 * succeeds. For unpredicated movprfx, we need to rearrange the helpers 6685 * to allow the final write back to the destination to be unconditional. 6686 * For predicated zeroing movprfx, we need to rearrange the helpers to 6687 * allow the final write back to zero inactives. 6688 * 6689 * In the meantime, just emit the moves. 6690 */ 6691 6692static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a) 6693{ 6694 return do_mov_z(s, a->rd, a->rn); 6695} 6696 6697static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a) 6698{ 6699 if (sve_access_check(s)) { 6700 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz); 6701 } 6702 return true; 6703} 6704 6705static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a) 6706{ 6707 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false); 6708} 6709 6710/* 6711 * SVE2 Integer Multiply - Unpredicated 6712 */ 6713 6714static bool trans_MUL_zzz(DisasContext *s, arg_rrr_esz *a) 6715{ 6716 if (!dc_isar_feature(aa64_sve2, s)) { 6717 return false; 6718 } 6719 if (sve_access_check(s)) { 6720 gen_gvec_fn_zzz(s, tcg_gen_gvec_mul, a->esz, a->rd, a->rn, a->rm); 6721 } 6722 return true; 6723} 6724 6725static bool do_sve2_zzz_ool(DisasContext *s, arg_rrr_esz *a, 6726 gen_helper_gvec_3 *fn) 6727{ 6728 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) { 6729 return false; 6730 } 6731 if (sve_access_check(s)) { 6732 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0); 6733 } 6734 return true; 6735} 6736 6737static bool trans_SMULH_zzz(DisasContext *s, arg_rrr_esz *a) 6738{ 6739 static gen_helper_gvec_3 * const fns[4] = { 6740 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, 6741 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d, 6742 }; 6743 return do_sve2_zzz_ool(s, a, fns[a->esz]); 6744} 6745 6746static bool trans_UMULH_zzz(DisasContext *s, arg_rrr_esz *a) 6747{ 6748 static gen_helper_gvec_3 * const fns[4] = { 6749 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h, 6750 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d, 6751 }; 6752 return do_sve2_zzz_ool(s, a, fns[a->esz]); 6753} 6754 6755static bool trans_PMUL_zzz(DisasContext *s, arg_rrr_esz *a) 6756{ 6757 return do_sve2_zzz_ool(s, a, gen_helper_gvec_pmul_b); 6758} 6759 6760static bool trans_SQDMULH_zzz(DisasContext *s, arg_rrr_esz *a) 6761{ 6762 static gen_helper_gvec_3 * const fns[4] = { 6763 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, 6764 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, 6765 }; 6766 return do_sve2_zzz_ool(s, a, fns[a->esz]); 6767} 6768 6769static bool trans_SQRDMULH_zzz(DisasContext *s, arg_rrr_esz *a) 6770{ 6771 static gen_helper_gvec_3 * const fns[4] = { 6772 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, 6773 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, 6774 }; 6775 return do_sve2_zzz_ool(s, a, fns[a->esz]); 6776} 6777 6778/* 6779 * SVE2 Integer - Predicated 6780 */ 6781 6782static bool do_sve2_zpzz_ool(DisasContext *s, arg_rprr_esz *a, 6783 gen_helper_gvec_4 *fn) 6784{ 6785 if (!dc_isar_feature(aa64_sve2, s)) { 6786 return false; 6787 } 6788 return do_zpzz_ool(s, a, fn); 6789} 6790 6791static bool trans_SADALP_zpzz(DisasContext *s, arg_rprr_esz *a) 6792{ 6793 static gen_helper_gvec_4 * const fns[3] = { 6794 gen_helper_sve2_sadalp_zpzz_h, 6795 gen_helper_sve2_sadalp_zpzz_s, 6796 gen_helper_sve2_sadalp_zpzz_d, 6797 }; 6798 if (a->esz == 0) { 6799 return false; 6800 } 6801 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]); 6802} 6803 6804static bool trans_UADALP_zpzz(DisasContext *s, arg_rprr_esz *a) 6805{ 6806 static gen_helper_gvec_4 * const fns[3] = { 6807 gen_helper_sve2_uadalp_zpzz_h, 6808 gen_helper_sve2_uadalp_zpzz_s, 6809 gen_helper_sve2_uadalp_zpzz_d, 6810 }; 6811 if (a->esz == 0) { 6812 return false; 6813 } 6814 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]); 6815} 6816 6817/* 6818 * SVE2 integer unary operations (predicated) 6819 */ 6820 6821static bool do_sve2_zpz_ool(DisasContext *s, arg_rpr_esz *a, 6822 gen_helper_gvec_3 *fn) 6823{ 6824 if (!dc_isar_feature(aa64_sve2, s)) { 6825 return false; 6826 } 6827 return do_zpz_ool(s, a, fn); 6828} 6829 6830static bool trans_URECPE(DisasContext *s, arg_rpr_esz *a) 6831{ 6832 if (a->esz != 2) { 6833 return false; 6834 } 6835 return do_sve2_zpz_ool(s, a, gen_helper_sve2_urecpe_s); 6836} 6837 6838static bool trans_URSQRTE(DisasContext *s, arg_rpr_esz *a) 6839{ 6840 if (a->esz != 2) { 6841 return false; 6842 } 6843 return do_sve2_zpz_ool(s, a, gen_helper_sve2_ursqrte_s); 6844} 6845 6846static bool trans_SQABS(DisasContext *s, arg_rpr_esz *a) 6847{ 6848 static gen_helper_gvec_3 * const fns[4] = { 6849 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h, 6850 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d, 6851 }; 6852 return do_sve2_zpz_ool(s, a, fns[a->esz]); 6853} 6854 6855static bool trans_SQNEG(DisasContext *s, arg_rpr_esz *a) 6856{ 6857 static gen_helper_gvec_3 * const fns[4] = { 6858 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h, 6859 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d, 6860 }; 6861 return do_sve2_zpz_ool(s, a, fns[a->esz]); 6862} 6863 6864#define DO_SVE2_ZPZZ(NAME, name) \ 6865static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \ 6866{ \ 6867 static gen_helper_gvec_4 * const fns[4] = { \ 6868 gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \ 6869 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d, \ 6870 }; \ 6871 return do_sve2_zpzz_ool(s, a, fns[a->esz]); \ 6872} 6873 6874DO_SVE2_ZPZZ(SQSHL, sqshl) 6875DO_SVE2_ZPZZ(SQRSHL, sqrshl) 6876DO_SVE2_ZPZZ(SRSHL, srshl) 6877 6878DO_SVE2_ZPZZ(UQSHL, uqshl) 6879DO_SVE2_ZPZZ(UQRSHL, uqrshl) 6880DO_SVE2_ZPZZ(URSHL, urshl) 6881 6882DO_SVE2_ZPZZ(SHADD, shadd) 6883DO_SVE2_ZPZZ(SRHADD, srhadd) 6884DO_SVE2_ZPZZ(SHSUB, shsub) 6885 6886DO_SVE2_ZPZZ(UHADD, uhadd) 6887DO_SVE2_ZPZZ(URHADD, urhadd) 6888DO_SVE2_ZPZZ(UHSUB, uhsub) 6889 6890DO_SVE2_ZPZZ(ADDP, addp) 6891DO_SVE2_ZPZZ(SMAXP, smaxp) 6892DO_SVE2_ZPZZ(UMAXP, umaxp) 6893DO_SVE2_ZPZZ(SMINP, sminp) 6894DO_SVE2_ZPZZ(UMINP, uminp) 6895 6896DO_SVE2_ZPZZ(SQADD_zpzz, sqadd) 6897DO_SVE2_ZPZZ(UQADD_zpzz, uqadd) 6898DO_SVE2_ZPZZ(SQSUB_zpzz, sqsub) 6899DO_SVE2_ZPZZ(UQSUB_zpzz, uqsub) 6900DO_SVE2_ZPZZ(SUQADD, suqadd) 6901DO_SVE2_ZPZZ(USQADD, usqadd) 6902 6903/* 6904 * SVE2 Widening Integer Arithmetic 6905 */ 6906 6907static bool do_sve2_zzw_ool(DisasContext *s, arg_rrr_esz *a, 6908 gen_helper_gvec_3 *fn, int data) 6909{ 6910 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) { 6911 return false; 6912 } 6913 if (sve_access_check(s)) { 6914 unsigned vsz = vec_full_reg_size(s); 6915 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), 6916 vec_full_reg_offset(s, a->rn), 6917 vec_full_reg_offset(s, a->rm), 6918 vsz, vsz, data, fn); 6919 } 6920 return true; 6921} 6922 6923#define DO_SVE2_ZZZ_TB(NAME, name, SEL1, SEL2) \ 6924static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \ 6925{ \ 6926 static gen_helper_gvec_3 * const fns[4] = { \ 6927 NULL, gen_helper_sve2_##name##_h, \ 6928 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ 6929 }; \ 6930 return do_sve2_zzw_ool(s, a, fns[a->esz], (SEL2 << 1) | SEL1); \ 6931} 6932 6933DO_SVE2_ZZZ_TB(SADDLB, saddl, false, false) 6934DO_SVE2_ZZZ_TB(SSUBLB, ssubl, false, false) 6935DO_SVE2_ZZZ_TB(SABDLB, sabdl, false, false) 6936 6937DO_SVE2_ZZZ_TB(UADDLB, uaddl, false, false) 6938DO_SVE2_ZZZ_TB(USUBLB, usubl, false, false) 6939DO_SVE2_ZZZ_TB(UABDLB, uabdl, false, false) 6940 6941DO_SVE2_ZZZ_TB(SADDLT, saddl, true, true) 6942DO_SVE2_ZZZ_TB(SSUBLT, ssubl, true, true) 6943DO_SVE2_ZZZ_TB(SABDLT, sabdl, true, true) 6944 6945DO_SVE2_ZZZ_TB(UADDLT, uaddl, true, true) 6946DO_SVE2_ZZZ_TB(USUBLT, usubl, true, true) 6947DO_SVE2_ZZZ_TB(UABDLT, uabdl, true, true) 6948 6949DO_SVE2_ZZZ_TB(SADDLBT, saddl, false, true) 6950DO_SVE2_ZZZ_TB(SSUBLBT, ssubl, false, true) 6951DO_SVE2_ZZZ_TB(SSUBLTB, ssubl, true, false) 6952 6953DO_SVE2_ZZZ_TB(SQDMULLB_zzz, sqdmull_zzz, false, false) 6954DO_SVE2_ZZZ_TB(SQDMULLT_zzz, sqdmull_zzz, true, true) 6955 6956DO_SVE2_ZZZ_TB(SMULLB_zzz, smull_zzz, false, false) 6957DO_SVE2_ZZZ_TB(SMULLT_zzz, smull_zzz, true, true) 6958 6959DO_SVE2_ZZZ_TB(UMULLB_zzz, umull_zzz, false, false) 6960DO_SVE2_ZZZ_TB(UMULLT_zzz, umull_zzz, true, true) 6961 6962static bool do_eor_tb(DisasContext *s, arg_rrr_esz *a, bool sel1) 6963{ 6964 static gen_helper_gvec_3 * const fns[4] = { 6965 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h, 6966 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d, 6967 }; 6968 return do_sve2_zzw_ool(s, a, fns[a->esz], (!sel1 << 1) | sel1); 6969} 6970 6971static bool trans_EORBT(DisasContext *s, arg_rrr_esz *a) 6972{ 6973 return do_eor_tb(s, a, false); 6974} 6975 6976static bool trans_EORTB(DisasContext *s, arg_rrr_esz *a) 6977{ 6978 return do_eor_tb(s, a, true); 6979} 6980 6981static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) 6982{ 6983 static gen_helper_gvec_3 * const fns[4] = { 6984 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h, 6985 NULL, gen_helper_sve2_pmull_d, 6986 }; 6987 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) { 6988 return false; 6989 } 6990 return do_sve2_zzw_ool(s, a, fns[a->esz], sel); 6991} 6992 6993static bool trans_PMULLB(DisasContext *s, arg_rrr_esz *a) 6994{ 6995 return do_trans_pmull(s, a, false); 6996} 6997 6998static bool trans_PMULLT(DisasContext *s, arg_rrr_esz *a) 6999{ 7000 return do_trans_pmull(s, a, true); 7001} 7002 7003#define DO_SVE2_ZZZ_WTB(NAME, name, SEL2) \ 7004static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \ 7005{ \ 7006 static gen_helper_gvec_3 * const fns[4] = { \ 7007 NULL, gen_helper_sve2_##name##_h, \ 7008 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ 7009 }; \ 7010 return do_sve2_zzw_ool(s, a, fns[a->esz], SEL2); \ 7011} 7012 7013DO_SVE2_ZZZ_WTB(SADDWB, saddw, false) 7014DO_SVE2_ZZZ_WTB(SADDWT, saddw, true) 7015DO_SVE2_ZZZ_WTB(SSUBWB, ssubw, false) 7016DO_SVE2_ZZZ_WTB(SSUBWT, ssubw, true) 7017 7018DO_SVE2_ZZZ_WTB(UADDWB, uaddw, false) 7019DO_SVE2_ZZZ_WTB(UADDWT, uaddw, true) 7020DO_SVE2_ZZZ_WTB(USUBWB, usubw, false) 7021DO_SVE2_ZZZ_WTB(USUBWT, usubw, true) 7022 7023static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 7024{ 7025 int top = imm & 1; 7026 int shl = imm >> 1; 7027 int halfbits = 4 << vece; 7028 7029 if (top) { 7030 if (shl == halfbits) { 7031 TCGv_vec t = tcg_temp_new_vec_matching(d); 7032 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 7033 tcg_gen_and_vec(vece, d, n, t); 7034 tcg_temp_free_vec(t); 7035 } else { 7036 tcg_gen_sari_vec(vece, d, n, halfbits); 7037 tcg_gen_shli_vec(vece, d, d, shl); 7038 } 7039 } else { 7040 tcg_gen_shli_vec(vece, d, n, halfbits); 7041 tcg_gen_sari_vec(vece, d, d, halfbits - shl); 7042 } 7043} 7044 7045static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm) 7046{ 7047 int halfbits = 4 << vece; 7048 int top = imm & 1; 7049 int shl = (imm >> 1); 7050 int shift; 7051 uint64_t mask; 7052 7053 mask = MAKE_64BIT_MASK(0, halfbits); 7054 mask <<= shl; 7055 mask = dup_const(vece, mask); 7056 7057 shift = shl - top * halfbits; 7058 if (shift < 0) { 7059 tcg_gen_shri_i64(d, n, -shift); 7060 } else { 7061 tcg_gen_shli_i64(d, n, shift); 7062 } 7063 tcg_gen_andi_i64(d, d, mask); 7064} 7065 7066static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 7067{ 7068 gen_ushll_i64(MO_16, d, n, imm); 7069} 7070 7071static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 7072{ 7073 gen_ushll_i64(MO_32, d, n, imm); 7074} 7075 7076static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm) 7077{ 7078 gen_ushll_i64(MO_64, d, n, imm); 7079} 7080 7081static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) 7082{ 7083 int halfbits = 4 << vece; 7084 int top = imm & 1; 7085 int shl = imm >> 1; 7086 7087 if (top) { 7088 if (shl == halfbits) { 7089 TCGv_vec t = tcg_temp_new_vec_matching(d); 7090 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); 7091 tcg_gen_and_vec(vece, d, n, t); 7092 tcg_temp_free_vec(t); 7093 } else { 7094 tcg_gen_shri_vec(vece, d, n, halfbits); 7095 tcg_gen_shli_vec(vece, d, d, shl); 7096 } 7097 } else { 7098 if (shl == 0) { 7099 TCGv_vec t = tcg_temp_new_vec_matching(d); 7100 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 7101 tcg_gen_and_vec(vece, d, n, t); 7102 tcg_temp_free_vec(t); 7103 } else { 7104 tcg_gen_shli_vec(vece, d, n, halfbits); 7105 tcg_gen_shri_vec(vece, d, d, halfbits - shl); 7106 } 7107 } 7108} 7109 7110static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a, 7111 bool sel, bool uns) 7112{ 7113 static const TCGOpcode sshll_list[] = { 7114 INDEX_op_shli_vec, INDEX_op_sari_vec, 0 7115 }; 7116 static const TCGOpcode ushll_list[] = { 7117 INDEX_op_shli_vec, INDEX_op_shri_vec, 0 7118 }; 7119 static const GVecGen2i ops[2][3] = { 7120 { { .fniv = gen_sshll_vec, 7121 .opt_opc = sshll_list, 7122 .fno = gen_helper_sve2_sshll_h, 7123 .vece = MO_16 }, 7124 { .fniv = gen_sshll_vec, 7125 .opt_opc = sshll_list, 7126 .fno = gen_helper_sve2_sshll_s, 7127 .vece = MO_32 }, 7128 { .fniv = gen_sshll_vec, 7129 .opt_opc = sshll_list, 7130 .fno = gen_helper_sve2_sshll_d, 7131 .vece = MO_64 } }, 7132 { { .fni8 = gen_ushll16_i64, 7133 .fniv = gen_ushll_vec, 7134 .opt_opc = ushll_list, 7135 .fno = gen_helper_sve2_ushll_h, 7136 .vece = MO_16 }, 7137 { .fni8 = gen_ushll32_i64, 7138 .fniv = gen_ushll_vec, 7139 .opt_opc = ushll_list, 7140 .fno = gen_helper_sve2_ushll_s, 7141 .vece = MO_32 }, 7142 { .fni8 = gen_ushll64_i64, 7143 .fniv = gen_ushll_vec, 7144 .opt_opc = ushll_list, 7145 .fno = gen_helper_sve2_ushll_d, 7146 .vece = MO_64 } }, 7147 }; 7148 7149 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) { 7150 return false; 7151 } 7152 if (sve_access_check(s)) { 7153 unsigned vsz = vec_full_reg_size(s); 7154 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 7155 vec_full_reg_offset(s, a->rn), 7156 vsz, vsz, (a->imm << 1) | sel, 7157 &ops[uns][a->esz]); 7158 } 7159 return true; 7160} 7161 7162static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a) 7163{ 7164 return do_sve2_shll_tb(s, a, false, false); 7165} 7166 7167static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a) 7168{ 7169 return do_sve2_shll_tb(s, a, true, false); 7170} 7171 7172static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a) 7173{ 7174 return do_sve2_shll_tb(s, a, false, true); 7175} 7176 7177static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a) 7178{ 7179 return do_sve2_shll_tb(s, a, true, true); 7180} 7181 7182static bool trans_BEXT(DisasContext *s, arg_rrr_esz *a) 7183{ 7184 static gen_helper_gvec_3 * const fns[4] = { 7185 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, 7186 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, 7187 }; 7188 if (!dc_isar_feature(aa64_sve2_bitperm, s)) { 7189 return false; 7190 } 7191 return do_sve2_zzw_ool(s, a, fns[a->esz], 0); 7192} 7193 7194static bool trans_BDEP(DisasContext *s, arg_rrr_esz *a) 7195{ 7196 static gen_helper_gvec_3 * const fns[4] = { 7197 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, 7198 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, 7199 }; 7200 if (!dc_isar_feature(aa64_sve2_bitperm, s)) { 7201 return false; 7202 } 7203 return do_sve2_zzw_ool(s, a, fns[a->esz], 0); 7204} 7205 7206static bool trans_BGRP(DisasContext *s, arg_rrr_esz *a) 7207{ 7208 static gen_helper_gvec_3 * const fns[4] = { 7209 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, 7210 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, 7211 }; 7212 if (!dc_isar_feature(aa64_sve2_bitperm, s)) { 7213 return false; 7214 } 7215 return do_sve2_zzw_ool(s, a, fns[a->esz], 0); 7216} 7217 7218static bool do_cadd(DisasContext *s, arg_rrr_esz *a, bool sq, bool rot) 7219{ 7220 static gen_helper_gvec_3 * const fns[2][4] = { 7221 { gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, 7222 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d }, 7223 { gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h, 7224 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d }, 7225 }; 7226 return do_sve2_zzw_ool(s, a, fns[sq][a->esz], rot); 7227} 7228 7229static bool trans_CADD_rot90(DisasContext *s, arg_rrr_esz *a) 7230{ 7231 return do_cadd(s, a, false, false); 7232} 7233 7234static bool trans_CADD_rot270(DisasContext *s, arg_rrr_esz *a) 7235{ 7236 return do_cadd(s, a, false, true); 7237} 7238 7239static bool trans_SQCADD_rot90(DisasContext *s, arg_rrr_esz *a) 7240{ 7241 return do_cadd(s, a, true, false); 7242} 7243 7244static bool trans_SQCADD_rot270(DisasContext *s, arg_rrr_esz *a) 7245{ 7246 return do_cadd(s, a, true, true); 7247} 7248 7249static bool do_sve2_zzzz_ool(DisasContext *s, arg_rrrr_esz *a, 7250 gen_helper_gvec_4 *fn, int data) 7251{ 7252 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) { 7253 return false; 7254 } 7255 if (sve_access_check(s)) { 7256 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 7257 } 7258 return true; 7259} 7260 7261static bool do_abal(DisasContext *s, arg_rrrr_esz *a, bool uns, bool sel) 7262{ 7263 static gen_helper_gvec_4 * const fns[2][4] = { 7264 { NULL, gen_helper_sve2_sabal_h, 7265 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d }, 7266 { NULL, gen_helper_sve2_uabal_h, 7267 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d }, 7268 }; 7269 return do_sve2_zzzz_ool(s, a, fns[uns][a->esz], sel); 7270} 7271 7272static bool trans_SABALB(DisasContext *s, arg_rrrr_esz *a) 7273{ 7274 return do_abal(s, a, false, false); 7275} 7276 7277static bool trans_SABALT(DisasContext *s, arg_rrrr_esz *a) 7278{ 7279 return do_abal(s, a, false, true); 7280} 7281 7282static bool trans_UABALB(DisasContext *s, arg_rrrr_esz *a) 7283{ 7284 return do_abal(s, a, true, false); 7285} 7286 7287static bool trans_UABALT(DisasContext *s, arg_rrrr_esz *a) 7288{ 7289 return do_abal(s, a, true, true); 7290} 7291 7292static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel) 7293{ 7294 static gen_helper_gvec_4 * const fns[2] = { 7295 gen_helper_sve2_adcl_s, 7296 gen_helper_sve2_adcl_d, 7297 }; 7298 /* 7299 * Note that in this case the ESZ field encodes both size and sign. 7300 * Split out 'subtract' into bit 1 of the data field for the helper. 7301 */ 7302 return do_sve2_zzzz_ool(s, a, fns[a->esz & 1], (a->esz & 2) | sel); 7303} 7304 7305static bool trans_ADCLB(DisasContext *s, arg_rrrr_esz *a) 7306{ 7307 return do_adcl(s, a, false); 7308} 7309 7310static bool trans_ADCLT(DisasContext *s, arg_rrrr_esz *a) 7311{ 7312 return do_adcl(s, a, true); 7313} 7314 7315static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn) 7316{ 7317 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { 7318 return false; 7319 } 7320 if (sve_access_check(s)) { 7321 unsigned vsz = vec_full_reg_size(s); 7322 unsigned rd_ofs = vec_full_reg_offset(s, a->rd); 7323 unsigned rn_ofs = vec_full_reg_offset(s, a->rn); 7324 fn(a->esz, rd_ofs, rn_ofs, a->imm, vsz, vsz); 7325 } 7326 return true; 7327} 7328 7329static bool trans_SSRA(DisasContext *s, arg_rri_esz *a) 7330{ 7331 return do_sve2_fn2i(s, a, gen_gvec_ssra); 7332} 7333 7334static bool trans_USRA(DisasContext *s, arg_rri_esz *a) 7335{ 7336 return do_sve2_fn2i(s, a, gen_gvec_usra); 7337} 7338 7339static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a) 7340{ 7341 return do_sve2_fn2i(s, a, gen_gvec_srsra); 7342} 7343 7344static bool trans_URSRA(DisasContext *s, arg_rri_esz *a) 7345{ 7346 return do_sve2_fn2i(s, a, gen_gvec_ursra); 7347} 7348 7349static bool trans_SRI(DisasContext *s, arg_rri_esz *a) 7350{ 7351 return do_sve2_fn2i(s, a, gen_gvec_sri); 7352} 7353 7354static bool trans_SLI(DisasContext *s, arg_rri_esz *a) 7355{ 7356 return do_sve2_fn2i(s, a, gen_gvec_sli); 7357} 7358 7359static bool do_sve2_fn_zzz(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *fn) 7360{ 7361 if (!dc_isar_feature(aa64_sve2, s)) { 7362 return false; 7363 } 7364 if (sve_access_check(s)) { 7365 gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm); 7366 } 7367 return true; 7368} 7369 7370static bool trans_SABA(DisasContext *s, arg_rrr_esz *a) 7371{ 7372 return do_sve2_fn_zzz(s, a, gen_gvec_saba); 7373} 7374 7375static bool trans_UABA(DisasContext *s, arg_rrr_esz *a) 7376{ 7377 return do_sve2_fn_zzz(s, a, gen_gvec_uaba); 7378} 7379 7380static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a, 7381 const GVecGen2 ops[3]) 7382{ 7383 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 || 7384 !dc_isar_feature(aa64_sve2, s)) { 7385 return false; 7386 } 7387 if (sve_access_check(s)) { 7388 unsigned vsz = vec_full_reg_size(s); 7389 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd), 7390 vec_full_reg_offset(s, a->rn), 7391 vsz, vsz, &ops[a->esz]); 7392 } 7393 return true; 7394} 7395 7396static const TCGOpcode sqxtn_list[] = { 7397 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 7398}; 7399 7400static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7401{ 7402 TCGv_vec t = tcg_temp_new_vec_matching(d); 7403 int halfbits = 4 << vece; 7404 int64_t mask = (1ull << halfbits) - 1; 7405 int64_t min = -1ull << (halfbits - 1); 7406 int64_t max = -min - 1; 7407 7408 tcg_gen_dupi_vec(vece, t, min); 7409 tcg_gen_smax_vec(vece, d, n, t); 7410 tcg_gen_dupi_vec(vece, t, max); 7411 tcg_gen_smin_vec(vece, d, d, t); 7412 tcg_gen_dupi_vec(vece, t, mask); 7413 tcg_gen_and_vec(vece, d, d, t); 7414 tcg_temp_free_vec(t); 7415} 7416 7417static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a) 7418{ 7419 static const GVecGen2 ops[3] = { 7420 { .fniv = gen_sqxtnb_vec, 7421 .opt_opc = sqxtn_list, 7422 .fno = gen_helper_sve2_sqxtnb_h, 7423 .vece = MO_16 }, 7424 { .fniv = gen_sqxtnb_vec, 7425 .opt_opc = sqxtn_list, 7426 .fno = gen_helper_sve2_sqxtnb_s, 7427 .vece = MO_32 }, 7428 { .fniv = gen_sqxtnb_vec, 7429 .opt_opc = sqxtn_list, 7430 .fno = gen_helper_sve2_sqxtnb_d, 7431 .vece = MO_64 }, 7432 }; 7433 return do_sve2_narrow_extract(s, a, ops); 7434} 7435 7436static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7437{ 7438 TCGv_vec t = tcg_temp_new_vec_matching(d); 7439 int halfbits = 4 << vece; 7440 int64_t mask = (1ull << halfbits) - 1; 7441 int64_t min = -1ull << (halfbits - 1); 7442 int64_t max = -min - 1; 7443 7444 tcg_gen_dupi_vec(vece, t, min); 7445 tcg_gen_smax_vec(vece, n, n, t); 7446 tcg_gen_dupi_vec(vece, t, max); 7447 tcg_gen_smin_vec(vece, n, n, t); 7448 tcg_gen_shli_vec(vece, n, n, halfbits); 7449 tcg_gen_dupi_vec(vece, t, mask); 7450 tcg_gen_bitsel_vec(vece, d, t, d, n); 7451 tcg_temp_free_vec(t); 7452} 7453 7454static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a) 7455{ 7456 static const GVecGen2 ops[3] = { 7457 { .fniv = gen_sqxtnt_vec, 7458 .opt_opc = sqxtn_list, 7459 .load_dest = true, 7460 .fno = gen_helper_sve2_sqxtnt_h, 7461 .vece = MO_16 }, 7462 { .fniv = gen_sqxtnt_vec, 7463 .opt_opc = sqxtn_list, 7464 .load_dest = true, 7465 .fno = gen_helper_sve2_sqxtnt_s, 7466 .vece = MO_32 }, 7467 { .fniv = gen_sqxtnt_vec, 7468 .opt_opc = sqxtn_list, 7469 .load_dest = true, 7470 .fno = gen_helper_sve2_sqxtnt_d, 7471 .vece = MO_64 }, 7472 }; 7473 return do_sve2_narrow_extract(s, a, ops); 7474} 7475 7476static const TCGOpcode uqxtn_list[] = { 7477 INDEX_op_shli_vec, INDEX_op_umin_vec, 0 7478}; 7479 7480static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7481{ 7482 TCGv_vec t = tcg_temp_new_vec_matching(d); 7483 int halfbits = 4 << vece; 7484 int64_t max = (1ull << halfbits) - 1; 7485 7486 tcg_gen_dupi_vec(vece, t, max); 7487 tcg_gen_umin_vec(vece, d, n, t); 7488 tcg_temp_free_vec(t); 7489} 7490 7491static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a) 7492{ 7493 static const GVecGen2 ops[3] = { 7494 { .fniv = gen_uqxtnb_vec, 7495 .opt_opc = uqxtn_list, 7496 .fno = gen_helper_sve2_uqxtnb_h, 7497 .vece = MO_16 }, 7498 { .fniv = gen_uqxtnb_vec, 7499 .opt_opc = uqxtn_list, 7500 .fno = gen_helper_sve2_uqxtnb_s, 7501 .vece = MO_32 }, 7502 { .fniv = gen_uqxtnb_vec, 7503 .opt_opc = uqxtn_list, 7504 .fno = gen_helper_sve2_uqxtnb_d, 7505 .vece = MO_64 }, 7506 }; 7507 return do_sve2_narrow_extract(s, a, ops); 7508} 7509 7510static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7511{ 7512 TCGv_vec t = tcg_temp_new_vec_matching(d); 7513 int halfbits = 4 << vece; 7514 int64_t max = (1ull << halfbits) - 1; 7515 7516 tcg_gen_dupi_vec(vece, t, max); 7517 tcg_gen_umin_vec(vece, n, n, t); 7518 tcg_gen_shli_vec(vece, n, n, halfbits); 7519 tcg_gen_bitsel_vec(vece, d, t, d, n); 7520 tcg_temp_free_vec(t); 7521} 7522 7523static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a) 7524{ 7525 static const GVecGen2 ops[3] = { 7526 { .fniv = gen_uqxtnt_vec, 7527 .opt_opc = uqxtn_list, 7528 .load_dest = true, 7529 .fno = gen_helper_sve2_uqxtnt_h, 7530 .vece = MO_16 }, 7531 { .fniv = gen_uqxtnt_vec, 7532 .opt_opc = uqxtn_list, 7533 .load_dest = true, 7534 .fno = gen_helper_sve2_uqxtnt_s, 7535 .vece = MO_32 }, 7536 { .fniv = gen_uqxtnt_vec, 7537 .opt_opc = uqxtn_list, 7538 .load_dest = true, 7539 .fno = gen_helper_sve2_uqxtnt_d, 7540 .vece = MO_64 }, 7541 }; 7542 return do_sve2_narrow_extract(s, a, ops); 7543} 7544 7545static const TCGOpcode sqxtun_list[] = { 7546 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0 7547}; 7548 7549static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7550{ 7551 TCGv_vec t = tcg_temp_new_vec_matching(d); 7552 int halfbits = 4 << vece; 7553 int64_t max = (1ull << halfbits) - 1; 7554 7555 tcg_gen_dupi_vec(vece, t, 0); 7556 tcg_gen_smax_vec(vece, d, n, t); 7557 tcg_gen_dupi_vec(vece, t, max); 7558 tcg_gen_umin_vec(vece, d, d, t); 7559 tcg_temp_free_vec(t); 7560} 7561 7562static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a) 7563{ 7564 static const GVecGen2 ops[3] = { 7565 { .fniv = gen_sqxtunb_vec, 7566 .opt_opc = sqxtun_list, 7567 .fno = gen_helper_sve2_sqxtunb_h, 7568 .vece = MO_16 }, 7569 { .fniv = gen_sqxtunb_vec, 7570 .opt_opc = sqxtun_list, 7571 .fno = gen_helper_sve2_sqxtunb_s, 7572 .vece = MO_32 }, 7573 { .fniv = gen_sqxtunb_vec, 7574 .opt_opc = sqxtun_list, 7575 .fno = gen_helper_sve2_sqxtunb_d, 7576 .vece = MO_64 }, 7577 }; 7578 return do_sve2_narrow_extract(s, a, ops); 7579} 7580 7581static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) 7582{ 7583 TCGv_vec t = tcg_temp_new_vec_matching(d); 7584 int halfbits = 4 << vece; 7585 int64_t max = (1ull << halfbits) - 1; 7586 7587 tcg_gen_dupi_vec(vece, t, 0); 7588 tcg_gen_smax_vec(vece, n, n, t); 7589 tcg_gen_dupi_vec(vece, t, max); 7590 tcg_gen_umin_vec(vece, n, n, t); 7591 tcg_gen_shli_vec(vece, n, n, halfbits); 7592 tcg_gen_bitsel_vec(vece, d, t, d, n); 7593 tcg_temp_free_vec(t); 7594} 7595 7596static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a) 7597{ 7598 static const GVecGen2 ops[3] = { 7599 { .fniv = gen_sqxtunt_vec, 7600 .opt_opc = sqxtun_list, 7601 .load_dest = true, 7602 .fno = gen_helper_sve2_sqxtunt_h, 7603 .vece = MO_16 }, 7604 { .fniv = gen_sqxtunt_vec, 7605 .opt_opc = sqxtun_list, 7606 .load_dest = true, 7607 .fno = gen_helper_sve2_sqxtunt_s, 7608 .vece = MO_32 }, 7609 { .fniv = gen_sqxtunt_vec, 7610 .opt_opc = sqxtun_list, 7611 .load_dest = true, 7612 .fno = gen_helper_sve2_sqxtunt_d, 7613 .vece = MO_64 }, 7614 }; 7615 return do_sve2_narrow_extract(s, a, ops); 7616} 7617 7618static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a, 7619 const GVecGen2i ops[3]) 7620{ 7621 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) { 7622 return false; 7623 } 7624 assert(a->imm > 0 && a->imm <= (8 << a->esz)); 7625 if (sve_access_check(s)) { 7626 unsigned vsz = vec_full_reg_size(s); 7627 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), 7628 vec_full_reg_offset(s, a->rn), 7629 vsz, vsz, a->imm, &ops[a->esz]); 7630 } 7631 return true; 7632} 7633 7634static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 7635{ 7636 int halfbits = 4 << vece; 7637 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 7638 7639 tcg_gen_shri_i64(d, n, shr); 7640 tcg_gen_andi_i64(d, d, mask); 7641} 7642 7643static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7644{ 7645 gen_shrnb_i64(MO_16, d, n, shr); 7646} 7647 7648static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7649{ 7650 gen_shrnb_i64(MO_32, d, n, shr); 7651} 7652 7653static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7654{ 7655 gen_shrnb_i64(MO_64, d, n, shr); 7656} 7657 7658static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 7659{ 7660 TCGv_vec t = tcg_temp_new_vec_matching(d); 7661 int halfbits = 4 << vece; 7662 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 7663 7664 tcg_gen_shri_vec(vece, n, n, shr); 7665 tcg_gen_dupi_vec(vece, t, mask); 7666 tcg_gen_and_vec(vece, d, n, t); 7667 tcg_temp_free_vec(t); 7668} 7669 7670static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a) 7671{ 7672 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 }; 7673 static const GVecGen2i ops[3] = { 7674 { .fni8 = gen_shrnb16_i64, 7675 .fniv = gen_shrnb_vec, 7676 .opt_opc = vec_list, 7677 .fno = gen_helper_sve2_shrnb_h, 7678 .vece = MO_16 }, 7679 { .fni8 = gen_shrnb32_i64, 7680 .fniv = gen_shrnb_vec, 7681 .opt_opc = vec_list, 7682 .fno = gen_helper_sve2_shrnb_s, 7683 .vece = MO_32 }, 7684 { .fni8 = gen_shrnb64_i64, 7685 .fniv = gen_shrnb_vec, 7686 .opt_opc = vec_list, 7687 .fno = gen_helper_sve2_shrnb_d, 7688 .vece = MO_64 }, 7689 }; 7690 return do_sve2_shr_narrow(s, a, ops); 7691} 7692 7693static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) 7694{ 7695 int halfbits = 4 << vece; 7696 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits)); 7697 7698 tcg_gen_shli_i64(n, n, halfbits - shr); 7699 tcg_gen_andi_i64(n, n, ~mask); 7700 tcg_gen_andi_i64(d, d, mask); 7701 tcg_gen_or_i64(d, d, n); 7702} 7703 7704static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7705{ 7706 gen_shrnt_i64(MO_16, d, n, shr); 7707} 7708 7709static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7710{ 7711 gen_shrnt_i64(MO_32, d, n, shr); 7712} 7713 7714static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr) 7715{ 7716 tcg_gen_shri_i64(n, n, shr); 7717 tcg_gen_deposit_i64(d, d, n, 32, 32); 7718} 7719 7720static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) 7721{ 7722 TCGv_vec t = tcg_temp_new_vec_matching(d); 7723 int halfbits = 4 << vece; 7724 uint64_t mask = MAKE_64BIT_MASK(0, halfbits); 7725 7726 tcg_gen_shli_vec(vece, n, n, halfbits - shr); 7727 tcg_gen_dupi_vec(vece, t, mask); 7728 tcg_gen_bitsel_vec(vece, d, t, d, n); 7729 tcg_temp_free_vec(t); 7730} 7731 7732static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a) 7733{ 7734 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 }; 7735 static const GVecGen2i ops[3] = { 7736 { .fni8 = gen_shrnt16_i64, 7737 .fniv = gen_shrnt_vec, 7738 .opt_opc = vec_list, 7739 .load_dest = true, 7740 .fno = gen_helper_sve2_shrnt_h, 7741 .vece = MO_16 }, 7742 { .fni8 = gen_shrnt32_i64, 7743 .fniv = gen_shrnt_vec, 7744 .opt_opc = vec_list, 7745 .load_dest = true, 7746 .fno = gen_helper_sve2_shrnt_s, 7747 .vece = MO_32 }, 7748 { .fni8 = gen_shrnt64_i64, 7749 .fniv = gen_shrnt_vec, 7750 .opt_opc = vec_list, 7751 .load_dest = true, 7752 .fno = gen_helper_sve2_shrnt_d, 7753 .vece = MO_64 }, 7754 }; 7755 return do_sve2_shr_narrow(s, a, ops); 7756} 7757 7758static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a) 7759{ 7760 static const GVecGen2i ops[3] = { 7761 { .fno = gen_helper_sve2_rshrnb_h }, 7762 { .fno = gen_helper_sve2_rshrnb_s }, 7763 { .fno = gen_helper_sve2_rshrnb_d }, 7764 }; 7765 return do_sve2_shr_narrow(s, a, ops); 7766} 7767 7768static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a) 7769{ 7770 static const GVecGen2i ops[3] = { 7771 { .fno = gen_helper_sve2_rshrnt_h }, 7772 { .fno = gen_helper_sve2_rshrnt_s }, 7773 { .fno = gen_helper_sve2_rshrnt_d }, 7774 }; 7775 return do_sve2_shr_narrow(s, a, ops); 7776} 7777 7778static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, 7779 TCGv_vec n, int64_t shr) 7780{ 7781 TCGv_vec t = tcg_temp_new_vec_matching(d); 7782 int halfbits = 4 << vece; 7783 7784 tcg_gen_sari_vec(vece, n, n, shr); 7785 tcg_gen_dupi_vec(vece, t, 0); 7786 tcg_gen_smax_vec(vece, n, n, t); 7787 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 7788 tcg_gen_umin_vec(vece, d, n, t); 7789 tcg_temp_free_vec(t); 7790} 7791 7792static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a) 7793{ 7794 static const TCGOpcode vec_list[] = { 7795 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0 7796 }; 7797 static const GVecGen2i ops[3] = { 7798 { .fniv = gen_sqshrunb_vec, 7799 .opt_opc = vec_list, 7800 .fno = gen_helper_sve2_sqshrunb_h, 7801 .vece = MO_16 }, 7802 { .fniv = gen_sqshrunb_vec, 7803 .opt_opc = vec_list, 7804 .fno = gen_helper_sve2_sqshrunb_s, 7805 .vece = MO_32 }, 7806 { .fniv = gen_sqshrunb_vec, 7807 .opt_opc = vec_list, 7808 .fno = gen_helper_sve2_sqshrunb_d, 7809 .vece = MO_64 }, 7810 }; 7811 return do_sve2_shr_narrow(s, a, ops); 7812} 7813 7814static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, 7815 TCGv_vec n, int64_t shr) 7816{ 7817 TCGv_vec t = tcg_temp_new_vec_matching(d); 7818 int halfbits = 4 << vece; 7819 7820 tcg_gen_sari_vec(vece, n, n, shr); 7821 tcg_gen_dupi_vec(vece, t, 0); 7822 tcg_gen_smax_vec(vece, n, n, t); 7823 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 7824 tcg_gen_umin_vec(vece, n, n, t); 7825 tcg_gen_shli_vec(vece, n, n, halfbits); 7826 tcg_gen_bitsel_vec(vece, d, t, d, n); 7827 tcg_temp_free_vec(t); 7828} 7829 7830static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a) 7831{ 7832 static const TCGOpcode vec_list[] = { 7833 INDEX_op_shli_vec, INDEX_op_sari_vec, 7834 INDEX_op_smax_vec, INDEX_op_umin_vec, 0 7835 }; 7836 static const GVecGen2i ops[3] = { 7837 { .fniv = gen_sqshrunt_vec, 7838 .opt_opc = vec_list, 7839 .load_dest = true, 7840 .fno = gen_helper_sve2_sqshrunt_h, 7841 .vece = MO_16 }, 7842 { .fniv = gen_sqshrunt_vec, 7843 .opt_opc = vec_list, 7844 .load_dest = true, 7845 .fno = gen_helper_sve2_sqshrunt_s, 7846 .vece = MO_32 }, 7847 { .fniv = gen_sqshrunt_vec, 7848 .opt_opc = vec_list, 7849 .load_dest = true, 7850 .fno = gen_helper_sve2_sqshrunt_d, 7851 .vece = MO_64 }, 7852 }; 7853 return do_sve2_shr_narrow(s, a, ops); 7854} 7855 7856static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a) 7857{ 7858 static const GVecGen2i ops[3] = { 7859 { .fno = gen_helper_sve2_sqrshrunb_h }, 7860 { .fno = gen_helper_sve2_sqrshrunb_s }, 7861 { .fno = gen_helper_sve2_sqrshrunb_d }, 7862 }; 7863 return do_sve2_shr_narrow(s, a, ops); 7864} 7865 7866static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a) 7867{ 7868 static const GVecGen2i ops[3] = { 7869 { .fno = gen_helper_sve2_sqrshrunt_h }, 7870 { .fno = gen_helper_sve2_sqrshrunt_s }, 7871 { .fno = gen_helper_sve2_sqrshrunt_d }, 7872 }; 7873 return do_sve2_shr_narrow(s, a, ops); 7874} 7875 7876static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, 7877 TCGv_vec n, int64_t shr) 7878{ 7879 TCGv_vec t = tcg_temp_new_vec_matching(d); 7880 int halfbits = 4 << vece; 7881 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 7882 int64_t min = -max - 1; 7883 7884 tcg_gen_sari_vec(vece, n, n, shr); 7885 tcg_gen_dupi_vec(vece, t, min); 7886 tcg_gen_smax_vec(vece, n, n, t); 7887 tcg_gen_dupi_vec(vece, t, max); 7888 tcg_gen_smin_vec(vece, n, n, t); 7889 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 7890 tcg_gen_and_vec(vece, d, n, t); 7891 tcg_temp_free_vec(t); 7892} 7893 7894static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a) 7895{ 7896 static const TCGOpcode vec_list[] = { 7897 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0 7898 }; 7899 static const GVecGen2i ops[3] = { 7900 { .fniv = gen_sqshrnb_vec, 7901 .opt_opc = vec_list, 7902 .fno = gen_helper_sve2_sqshrnb_h, 7903 .vece = MO_16 }, 7904 { .fniv = gen_sqshrnb_vec, 7905 .opt_opc = vec_list, 7906 .fno = gen_helper_sve2_sqshrnb_s, 7907 .vece = MO_32 }, 7908 { .fniv = gen_sqshrnb_vec, 7909 .opt_opc = vec_list, 7910 .fno = gen_helper_sve2_sqshrnb_d, 7911 .vece = MO_64 }, 7912 }; 7913 return do_sve2_shr_narrow(s, a, ops); 7914} 7915 7916static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, 7917 TCGv_vec n, int64_t shr) 7918{ 7919 TCGv_vec t = tcg_temp_new_vec_matching(d); 7920 int halfbits = 4 << vece; 7921 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1); 7922 int64_t min = -max - 1; 7923 7924 tcg_gen_sari_vec(vece, n, n, shr); 7925 tcg_gen_dupi_vec(vece, t, min); 7926 tcg_gen_smax_vec(vece, n, n, t); 7927 tcg_gen_dupi_vec(vece, t, max); 7928 tcg_gen_smin_vec(vece, n, n, t); 7929 tcg_gen_shli_vec(vece, n, n, halfbits); 7930 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 7931 tcg_gen_bitsel_vec(vece, d, t, d, n); 7932 tcg_temp_free_vec(t); 7933} 7934 7935static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a) 7936{ 7937 static const TCGOpcode vec_list[] = { 7938 INDEX_op_shli_vec, INDEX_op_sari_vec, 7939 INDEX_op_smax_vec, INDEX_op_smin_vec, 0 7940 }; 7941 static const GVecGen2i ops[3] = { 7942 { .fniv = gen_sqshrnt_vec, 7943 .opt_opc = vec_list, 7944 .load_dest = true, 7945 .fno = gen_helper_sve2_sqshrnt_h, 7946 .vece = MO_16 }, 7947 { .fniv = gen_sqshrnt_vec, 7948 .opt_opc = vec_list, 7949 .load_dest = true, 7950 .fno = gen_helper_sve2_sqshrnt_s, 7951 .vece = MO_32 }, 7952 { .fniv = gen_sqshrnt_vec, 7953 .opt_opc = vec_list, 7954 .load_dest = true, 7955 .fno = gen_helper_sve2_sqshrnt_d, 7956 .vece = MO_64 }, 7957 }; 7958 return do_sve2_shr_narrow(s, a, ops); 7959} 7960 7961static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a) 7962{ 7963 static const GVecGen2i ops[3] = { 7964 { .fno = gen_helper_sve2_sqrshrnb_h }, 7965 { .fno = gen_helper_sve2_sqrshrnb_s }, 7966 { .fno = gen_helper_sve2_sqrshrnb_d }, 7967 }; 7968 return do_sve2_shr_narrow(s, a, ops); 7969} 7970 7971static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a) 7972{ 7973 static const GVecGen2i ops[3] = { 7974 { .fno = gen_helper_sve2_sqrshrnt_h }, 7975 { .fno = gen_helper_sve2_sqrshrnt_s }, 7976 { .fno = gen_helper_sve2_sqrshrnt_d }, 7977 }; 7978 return do_sve2_shr_narrow(s, a, ops); 7979} 7980 7981static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, 7982 TCGv_vec n, int64_t shr) 7983{ 7984 TCGv_vec t = tcg_temp_new_vec_matching(d); 7985 int halfbits = 4 << vece; 7986 7987 tcg_gen_shri_vec(vece, n, n, shr); 7988 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 7989 tcg_gen_umin_vec(vece, d, n, t); 7990 tcg_temp_free_vec(t); 7991} 7992 7993static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a) 7994{ 7995 static const TCGOpcode vec_list[] = { 7996 INDEX_op_shri_vec, INDEX_op_umin_vec, 0 7997 }; 7998 static const GVecGen2i ops[3] = { 7999 { .fniv = gen_uqshrnb_vec, 8000 .opt_opc = vec_list, 8001 .fno = gen_helper_sve2_uqshrnb_h, 8002 .vece = MO_16 }, 8003 { .fniv = gen_uqshrnb_vec, 8004 .opt_opc = vec_list, 8005 .fno = gen_helper_sve2_uqshrnb_s, 8006 .vece = MO_32 }, 8007 { .fniv = gen_uqshrnb_vec, 8008 .opt_opc = vec_list, 8009 .fno = gen_helper_sve2_uqshrnb_d, 8010 .vece = MO_64 }, 8011 }; 8012 return do_sve2_shr_narrow(s, a, ops); 8013} 8014 8015static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, 8016 TCGv_vec n, int64_t shr) 8017{ 8018 TCGv_vec t = tcg_temp_new_vec_matching(d); 8019 int halfbits = 4 << vece; 8020 8021 tcg_gen_shri_vec(vece, n, n, shr); 8022 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); 8023 tcg_gen_umin_vec(vece, n, n, t); 8024 tcg_gen_shli_vec(vece, n, n, halfbits); 8025 tcg_gen_bitsel_vec(vece, d, t, d, n); 8026 tcg_temp_free_vec(t); 8027} 8028 8029static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a) 8030{ 8031 static const TCGOpcode vec_list[] = { 8032 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0 8033 }; 8034 static const GVecGen2i ops[3] = { 8035 { .fniv = gen_uqshrnt_vec, 8036 .opt_opc = vec_list, 8037 .load_dest = true, 8038 .fno = gen_helper_sve2_uqshrnt_h, 8039 .vece = MO_16 }, 8040 { .fniv = gen_uqshrnt_vec, 8041 .opt_opc = vec_list, 8042 .load_dest = true, 8043 .fno = gen_helper_sve2_uqshrnt_s, 8044 .vece = MO_32 }, 8045 { .fniv = gen_uqshrnt_vec, 8046 .opt_opc = vec_list, 8047 .load_dest = true, 8048 .fno = gen_helper_sve2_uqshrnt_d, 8049 .vece = MO_64 }, 8050 }; 8051 return do_sve2_shr_narrow(s, a, ops); 8052} 8053 8054static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a) 8055{ 8056 static const GVecGen2i ops[3] = { 8057 { .fno = gen_helper_sve2_uqrshrnb_h }, 8058 { .fno = gen_helper_sve2_uqrshrnb_s }, 8059 { .fno = gen_helper_sve2_uqrshrnb_d }, 8060 }; 8061 return do_sve2_shr_narrow(s, a, ops); 8062} 8063 8064static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a) 8065{ 8066 static const GVecGen2i ops[3] = { 8067 { .fno = gen_helper_sve2_uqrshrnt_h }, 8068 { .fno = gen_helper_sve2_uqrshrnt_s }, 8069 { .fno = gen_helper_sve2_uqrshrnt_d }, 8070 }; 8071 return do_sve2_shr_narrow(s, a, ops); 8072} 8073 8074#define DO_SVE2_ZZZ_NARROW(NAME, name) \ 8075static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \ 8076{ \ 8077 static gen_helper_gvec_3 * const fns[4] = { \ 8078 NULL, gen_helper_sve2_##name##_h, \ 8079 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ 8080 }; \ 8081 return do_sve2_zzz_ool(s, a, fns[a->esz]); \ 8082} 8083 8084DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb) 8085DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt) 8086DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb) 8087DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt) 8088 8089DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb) 8090DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt) 8091DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb) 8092DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt) 8093 8094static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a, 8095 gen_helper_gvec_flags_4 *fn) 8096{ 8097 if (!dc_isar_feature(aa64_sve2, s)) { 8098 return false; 8099 } 8100 return do_ppzz_flags(s, a, fn); 8101} 8102 8103#define DO_SVE2_PPZZ_MATCH(NAME, name) \ 8104static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \ 8105{ \ 8106 static gen_helper_gvec_flags_4 * const fns[4] = { \ 8107 gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h, \ 8108 NULL, NULL \ 8109 }; \ 8110 return do_sve2_ppzz_flags(s, a, fns[a->esz]); \ 8111} 8112 8113DO_SVE2_PPZZ_MATCH(MATCH, match) 8114DO_SVE2_PPZZ_MATCH(NMATCH, nmatch) 8115 8116static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a) 8117{ 8118 static gen_helper_gvec_4 * const fns[2] = { 8119 gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d 8120 }; 8121 if (a->esz < 2) { 8122 return false; 8123 } 8124 return do_sve2_zpzz_ool(s, a, fns[a->esz - 2]); 8125} 8126 8127static bool trans_HISTSEG(DisasContext *s, arg_rrr_esz *a) 8128{ 8129 if (a->esz != 0) { 8130 return false; 8131 } 8132 return do_sve2_zzz_ool(s, a, gen_helper_sve2_histseg); 8133} 8134 8135static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a, 8136 gen_helper_gvec_4_ptr *fn) 8137{ 8138 if (!dc_isar_feature(aa64_sve2, s)) { 8139 return false; 8140 } 8141 return do_zpzz_fp(s, a, fn); 8142} 8143 8144#define DO_SVE2_ZPZZ_FP(NAME, name) \ 8145static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \ 8146{ \ 8147 static gen_helper_gvec_4_ptr * const fns[4] = { \ 8148 NULL, gen_helper_sve2_##name##_zpzz_h, \ 8149 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d \ 8150 }; \ 8151 return do_sve2_zpzz_fp(s, a, fns[a->esz]); \ 8152} 8153 8154DO_SVE2_ZPZZ_FP(FADDP, faddp) 8155DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp) 8156DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp) 8157DO_SVE2_ZPZZ_FP(FMAXP, fmaxp) 8158DO_SVE2_ZPZZ_FP(FMINP, fminp) 8159 8160/* 8161 * SVE Integer Multiply-Add (unpredicated) 8162 */ 8163 8164static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a) 8165{ 8166 gen_helper_gvec_4_ptr *fn; 8167 8168 switch (a->esz) { 8169 case MO_32: 8170 if (!dc_isar_feature(aa64_sve_f32mm, s)) { 8171 return false; 8172 } 8173 fn = gen_helper_fmmla_s; 8174 break; 8175 case MO_64: 8176 if (!dc_isar_feature(aa64_sve_f64mm, s)) { 8177 return false; 8178 } 8179 fn = gen_helper_fmmla_d; 8180 break; 8181 default: 8182 return false; 8183 } 8184 8185 if (sve_access_check(s)) { 8186 unsigned vsz = vec_full_reg_size(s); 8187 TCGv_ptr status = fpstatus_ptr(FPST_FPCR); 8188 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), 8189 vec_full_reg_offset(s, a->rn), 8190 vec_full_reg_offset(s, a->rm), 8191 vec_full_reg_offset(s, a->ra), 8192 status, vsz, vsz, 0, fn); 8193 tcg_temp_free_ptr(status); 8194 } 8195 return true; 8196} 8197 8198static bool do_sqdmlal_zzzw(DisasContext *s, arg_rrrr_esz *a, 8199 bool sel1, bool sel2) 8200{ 8201 static gen_helper_gvec_4 * const fns[] = { 8202 NULL, gen_helper_sve2_sqdmlal_zzzw_h, 8203 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, 8204 }; 8205 return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1); 8206} 8207 8208static bool do_sqdmlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, 8209 bool sel1, bool sel2) 8210{ 8211 static gen_helper_gvec_4 * const fns[] = { 8212 NULL, gen_helper_sve2_sqdmlsl_zzzw_h, 8213 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d, 8214 }; 8215 return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1); 8216} 8217 8218static bool trans_SQDMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a) 8219{ 8220 return do_sqdmlal_zzzw(s, a, false, false); 8221} 8222 8223static bool trans_SQDMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a) 8224{ 8225 return do_sqdmlal_zzzw(s, a, true, true); 8226} 8227 8228static bool trans_SQDMLALBT(DisasContext *s, arg_rrrr_esz *a) 8229{ 8230 return do_sqdmlal_zzzw(s, a, false, true); 8231} 8232 8233static bool trans_SQDMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a) 8234{ 8235 return do_sqdmlsl_zzzw(s, a, false, false); 8236} 8237 8238static bool trans_SQDMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a) 8239{ 8240 return do_sqdmlsl_zzzw(s, a, true, true); 8241} 8242 8243static bool trans_SQDMLSLBT(DisasContext *s, arg_rrrr_esz *a) 8244{ 8245 return do_sqdmlsl_zzzw(s, a, false, true); 8246} 8247 8248static bool trans_SQRDMLAH_zzzz(DisasContext *s, arg_rrrr_esz *a) 8249{ 8250 static gen_helper_gvec_4 * const fns[] = { 8251 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h, 8252 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d, 8253 }; 8254 return do_sve2_zzzz_ool(s, a, fns[a->esz], 0); 8255} 8256 8257static bool trans_SQRDMLSH_zzzz(DisasContext *s, arg_rrrr_esz *a) 8258{ 8259 static gen_helper_gvec_4 * const fns[] = { 8260 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h, 8261 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d, 8262 }; 8263 return do_sve2_zzzz_ool(s, a, fns[a->esz], 0); 8264} 8265 8266static bool do_smlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 8267{ 8268 static gen_helper_gvec_4 * const fns[] = { 8269 NULL, gen_helper_sve2_smlal_zzzw_h, 8270 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d, 8271 }; 8272 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel); 8273} 8274 8275static bool trans_SMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a) 8276{ 8277 return do_smlal_zzzw(s, a, false); 8278} 8279 8280static bool trans_SMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a) 8281{ 8282 return do_smlal_zzzw(s, a, true); 8283} 8284 8285static bool do_umlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 8286{ 8287 static gen_helper_gvec_4 * const fns[] = { 8288 NULL, gen_helper_sve2_umlal_zzzw_h, 8289 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d, 8290 }; 8291 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel); 8292} 8293 8294static bool trans_UMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a) 8295{ 8296 return do_umlal_zzzw(s, a, false); 8297} 8298 8299static bool trans_UMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a) 8300{ 8301 return do_umlal_zzzw(s, a, true); 8302} 8303 8304static bool do_smlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 8305{ 8306 static gen_helper_gvec_4 * const fns[] = { 8307 NULL, gen_helper_sve2_smlsl_zzzw_h, 8308 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d, 8309 }; 8310 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel); 8311} 8312 8313static bool trans_SMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a) 8314{ 8315 return do_smlsl_zzzw(s, a, false); 8316} 8317 8318static bool trans_SMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a) 8319{ 8320 return do_smlsl_zzzw(s, a, true); 8321} 8322 8323static bool do_umlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 8324{ 8325 static gen_helper_gvec_4 * const fns[] = { 8326 NULL, gen_helper_sve2_umlsl_zzzw_h, 8327 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d, 8328 }; 8329 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel); 8330} 8331 8332static bool trans_UMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a) 8333{ 8334 return do_umlsl_zzzw(s, a, false); 8335} 8336 8337static bool trans_UMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a) 8338{ 8339 return do_umlsl_zzzw(s, a, true); 8340} 8341 8342static bool trans_CMLA_zzzz(DisasContext *s, arg_CMLA_zzzz *a) 8343{ 8344 static gen_helper_gvec_4 * const fns[] = { 8345 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h, 8346 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d, 8347 }; 8348 8349 if (!dc_isar_feature(aa64_sve2, s)) { 8350 return false; 8351 } 8352 if (sve_access_check(s)) { 8353 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot); 8354 } 8355 return true; 8356} 8357 8358static bool trans_CDOT_zzzz(DisasContext *s, arg_CMLA_zzzz *a) 8359{ 8360 if (!dc_isar_feature(aa64_sve2, s) || a->esz < MO_32) { 8361 return false; 8362 } 8363 if (sve_access_check(s)) { 8364 gen_helper_gvec_4 *fn = (a->esz == MO_32 8365 ? gen_helper_sve2_cdot_zzzz_s 8366 : gen_helper_sve2_cdot_zzzz_d); 8367 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->rot); 8368 } 8369 return true; 8370} 8371 8372static bool trans_SQRDCMLAH_zzzz(DisasContext *s, arg_SQRDCMLAH_zzzz *a) 8373{ 8374 static gen_helper_gvec_4 * const fns[] = { 8375 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h, 8376 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d, 8377 }; 8378 8379 if (!dc_isar_feature(aa64_sve2, s)) { 8380 return false; 8381 } 8382 if (sve_access_check(s)) { 8383 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot); 8384 } 8385 return true; 8386} 8387 8388static bool trans_USDOT_zzzz(DisasContext *s, arg_USDOT_zzzz *a) 8389{ 8390 if (a->esz != 2 || !dc_isar_feature(aa64_sve_i8mm, s)) { 8391 return false; 8392 } 8393 if (sve_access_check(s)) { 8394 unsigned vsz = vec_full_reg_size(s); 8395 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), 8396 vec_full_reg_offset(s, a->rn), 8397 vec_full_reg_offset(s, a->rm), 8398 vec_full_reg_offset(s, a->ra), 8399 vsz, vsz, 0, gen_helper_gvec_usdot_b); 8400 } 8401 return true; 8402} 8403 8404static bool trans_AESMC(DisasContext *s, arg_AESMC *a) 8405{ 8406 if (!dc_isar_feature(aa64_sve2_aes, s)) { 8407 return false; 8408 } 8409 if (sve_access_check(s)) { 8410 gen_gvec_ool_zz(s, gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt); 8411 } 8412 return true; 8413} 8414 8415static bool do_aese(DisasContext *s, arg_rrr_esz *a, bool decrypt) 8416{ 8417 if (!dc_isar_feature(aa64_sve2_aes, s)) { 8418 return false; 8419 } 8420 if (sve_access_check(s)) { 8421 gen_gvec_ool_zzz(s, gen_helper_crypto_aese, 8422 a->rd, a->rn, a->rm, decrypt); 8423 } 8424 return true; 8425} 8426 8427static bool trans_AESE(DisasContext *s, arg_rrr_esz *a) 8428{ 8429 return do_aese(s, a, false); 8430} 8431 8432static bool trans_AESD(DisasContext *s, arg_rrr_esz *a) 8433{ 8434 return do_aese(s, a, true); 8435} 8436 8437static bool do_sm4(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn) 8438{ 8439 if (!dc_isar_feature(aa64_sve2_sm4, s)) { 8440 return false; 8441 } 8442 if (sve_access_check(s)) { 8443 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0); 8444 } 8445 return true; 8446} 8447 8448static bool trans_SM4E(DisasContext *s, arg_rrr_esz *a) 8449{ 8450 return do_sm4(s, a, gen_helper_crypto_sm4e); 8451} 8452 8453static bool trans_SM4EKEY(DisasContext *s, arg_rrr_esz *a) 8454{ 8455 return do_sm4(s, a, gen_helper_crypto_sm4ekey); 8456} 8457 8458static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a) 8459{ 8460 if (!dc_isar_feature(aa64_sve2_sha3, s)) { 8461 return false; 8462 } 8463 if (sve_access_check(s)) { 8464 gen_gvec_fn_zzz(s, gen_gvec_rax1, MO_64, a->rd, a->rn, a->rm); 8465 } 8466 return true; 8467} 8468 8469static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a) 8470{ 8471 if (!dc_isar_feature(aa64_sve2, s)) { 8472 return false; 8473 } 8474 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh); 8475} 8476 8477static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a) 8478{ 8479 if (!dc_isar_feature(aa64_sve_bf16, s)) { 8480 return false; 8481 } 8482 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt); 8483} 8484 8485static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a) 8486{ 8487 if (!dc_isar_feature(aa64_sve2, s)) { 8488 return false; 8489 } 8490 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds); 8491} 8492 8493static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a) 8494{ 8495 if (!dc_isar_feature(aa64_sve2, s)) { 8496 return false; 8497 } 8498 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs); 8499} 8500 8501static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a) 8502{ 8503 if (!dc_isar_feature(aa64_sve2, s)) { 8504 return false; 8505 } 8506 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd); 8507} 8508 8509static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a) 8510{ 8511 if (!dc_isar_feature(aa64_sve2, s)) { 8512 return false; 8513 } 8514 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds); 8515} 8516 8517static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a) 8518{ 8519 if (!dc_isar_feature(aa64_sve2, s)) { 8520 return false; 8521 } 8522 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds); 8523} 8524 8525static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a) 8526{ 8527 static gen_helper_gvec_3_ptr * const fns[] = { 8528 NULL, gen_helper_flogb_h, 8529 gen_helper_flogb_s, gen_helper_flogb_d 8530 }; 8531 8532 if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) { 8533 return false; 8534 } 8535 if (sve_access_check(s)) { 8536 TCGv_ptr status = 8537 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8538 unsigned vsz = vec_full_reg_size(s); 8539 8540 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 8541 vec_full_reg_offset(s, a->rn), 8542 pred_full_reg_offset(s, a->pg), 8543 status, vsz, vsz, 0, fns[a->esz]); 8544 tcg_temp_free_ptr(status); 8545 } 8546 return true; 8547} 8548 8549static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) 8550{ 8551 if (!dc_isar_feature(aa64_sve2, s)) { 8552 return false; 8553 } 8554 if (sve_access_check(s)) { 8555 unsigned vsz = vec_full_reg_size(s); 8556 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), 8557 vec_full_reg_offset(s, a->rn), 8558 vec_full_reg_offset(s, a->rm), 8559 vec_full_reg_offset(s, a->ra), 8560 cpu_env, vsz, vsz, (sel << 1) | sub, 8561 gen_helper_sve2_fmlal_zzzw_s); 8562 } 8563 return true; 8564} 8565 8566static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a) 8567{ 8568 return do_FMLAL_zzzw(s, a, false, false); 8569} 8570 8571static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a) 8572{ 8573 return do_FMLAL_zzzw(s, a, false, true); 8574} 8575 8576static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a) 8577{ 8578 return do_FMLAL_zzzw(s, a, true, false); 8579} 8580 8581static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a) 8582{ 8583 return do_FMLAL_zzzw(s, a, true, true); 8584} 8585 8586static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) 8587{ 8588 if (!dc_isar_feature(aa64_sve2, s)) { 8589 return false; 8590 } 8591 if (sve_access_check(s)) { 8592 unsigned vsz = vec_full_reg_size(s); 8593 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), 8594 vec_full_reg_offset(s, a->rn), 8595 vec_full_reg_offset(s, a->rm), 8596 vec_full_reg_offset(s, a->ra), 8597 cpu_env, vsz, vsz, 8598 (a->index << 2) | (sel << 1) | sub, 8599 gen_helper_sve2_fmlal_zzxw_s); 8600 } 8601 return true; 8602} 8603 8604static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a) 8605{ 8606 return do_FMLAL_zzxw(s, a, false, false); 8607} 8608 8609static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a) 8610{ 8611 return do_FMLAL_zzxw(s, a, false, true); 8612} 8613 8614static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a) 8615{ 8616 return do_FMLAL_zzxw(s, a, true, false); 8617} 8618 8619static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a) 8620{ 8621 return do_FMLAL_zzxw(s, a, true, true); 8622} 8623 8624static bool do_i8mm_zzzz_ool(DisasContext *s, arg_rrrr_esz *a, 8625 gen_helper_gvec_4 *fn, int data) 8626{ 8627 if (!dc_isar_feature(aa64_sve_i8mm, s)) { 8628 return false; 8629 } 8630 if (sve_access_check(s)) { 8631 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); 8632 } 8633 return true; 8634} 8635 8636static bool trans_SMMLA(DisasContext *s, arg_rrrr_esz *a) 8637{ 8638 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_smmla_b, 0); 8639} 8640 8641static bool trans_USMMLA(DisasContext *s, arg_rrrr_esz *a) 8642{ 8643 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_usmmla_b, 0); 8644} 8645 8646static bool trans_UMMLA(DisasContext *s, arg_rrrr_esz *a) 8647{ 8648 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_ummla_b, 0); 8649} 8650 8651static bool trans_BFDOT_zzzz(DisasContext *s, arg_rrrr_esz *a) 8652{ 8653 if (!dc_isar_feature(aa64_sve_bf16, s)) { 8654 return false; 8655 } 8656 if (sve_access_check(s)) { 8657 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot, 8658 a->rd, a->rn, a->rm, a->ra, 0); 8659 } 8660 return true; 8661} 8662 8663static bool trans_BFDOT_zzxz(DisasContext *s, arg_rrxr_esz *a) 8664{ 8665 if (!dc_isar_feature(aa64_sve_bf16, s)) { 8666 return false; 8667 } 8668 if (sve_access_check(s)) { 8669 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot_idx, 8670 a->rd, a->rn, a->rm, a->ra, a->index); 8671 } 8672 return true; 8673} 8674 8675static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a) 8676{ 8677 if (!dc_isar_feature(aa64_sve_bf16, s)) { 8678 return false; 8679 } 8680 if (sve_access_check(s)) { 8681 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfmmla, 8682 a->rd, a->rn, a->rm, a->ra, 0); 8683 } 8684 return true; 8685} 8686 8687static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) 8688{ 8689 if (!dc_isar_feature(aa64_sve_bf16, s)) { 8690 return false; 8691 } 8692 if (sve_access_check(s)) { 8693 TCGv_ptr status = fpstatus_ptr(FPST_FPCR); 8694 unsigned vsz = vec_full_reg_size(s); 8695 8696 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), 8697 vec_full_reg_offset(s, a->rn), 8698 vec_full_reg_offset(s, a->rm), 8699 vec_full_reg_offset(s, a->ra), 8700 status, vsz, vsz, sel, 8701 gen_helper_gvec_bfmlal); 8702 tcg_temp_free_ptr(status); 8703 } 8704 return true; 8705} 8706 8707static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a) 8708{ 8709 return do_BFMLAL_zzzw(s, a, false); 8710} 8711 8712static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a) 8713{ 8714 return do_BFMLAL_zzzw(s, a, true); 8715} 8716 8717static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) 8718{ 8719 if (!dc_isar_feature(aa64_sve_bf16, s)) { 8720 return false; 8721 } 8722 if (sve_access_check(s)) { 8723 TCGv_ptr status = fpstatus_ptr(FPST_FPCR); 8724 unsigned vsz = vec_full_reg_size(s); 8725 8726 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), 8727 vec_full_reg_offset(s, a->rn), 8728 vec_full_reg_offset(s, a->rm), 8729 vec_full_reg_offset(s, a->ra), 8730 status, vsz, vsz, (a->index << 1) | sel, 8731 gen_helper_gvec_bfmlal_idx); 8732 tcg_temp_free_ptr(status); 8733 } 8734 return true; 8735} 8736 8737static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a) 8738{ 8739 return do_BFMLAL_zzxw(s, a, false); 8740} 8741 8742static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a) 8743{ 8744 return do_BFMLAL_zzxw(s, a, true); 8745}