translate-mve.c (76335B)
1/* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20#include "qemu/osdep.h" 21#include "tcg/tcg-op.h" 22#include "tcg/tcg-op-gvec.h" 23#include "exec/exec-all.h" 24#include "exec/gen-icount.h" 25#include "translate.h" 26#include "translate-a32.h" 27 28static inline int vidup_imm(DisasContext *s, int x) 29{ 30 return 1 << x; 31} 32 33/* Include the generated decoder */ 34#include "decode-mve.c.inc" 35 36typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 37typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 38typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32); 39typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 40typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 41typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 42typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 43typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 44typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 45typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 46typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 47typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 48typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 49typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 50typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 51typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 52typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 53 54/* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 55static inline long mve_qreg_offset(unsigned reg) 56{ 57 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 58} 59 60static TCGv_ptr mve_qreg_ptr(unsigned reg) 61{ 62 TCGv_ptr ret = tcg_temp_new_ptr(); 63 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 64 return ret; 65} 66 67static bool mve_no_predication(DisasContext *s) 68{ 69 /* 70 * Return true if we are executing the entire MVE instruction 71 * with no predication or partial-execution, and so we can safely 72 * use an inline TCG vector implementation. 73 */ 74 return s->eci == 0 && s->mve_no_pred; 75} 76 77static bool mve_check_qreg_bank(DisasContext *s, int qmask) 78{ 79 /* 80 * Check whether Qregs are in range. For v8.1M only Q0..Q7 81 * are supported, see VFPSmallRegisterBank(). 82 */ 83 return qmask < 8; 84} 85 86bool mve_eci_check(DisasContext *s) 87{ 88 /* 89 * This is a beatwise insn: check that ECI is valid (not a 90 * reserved value) and note that we are handling it. 91 * Return true if OK, false if we generated an exception. 92 */ 93 s->eci_handled = true; 94 switch (s->eci) { 95 case ECI_NONE: 96 case ECI_A0: 97 case ECI_A0A1: 98 case ECI_A0A1A2: 99 case ECI_A0A1A2B0: 100 return true; 101 default: 102 /* Reserved value: INVSTATE UsageFault */ 103 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), 104 default_exception_el(s)); 105 return false; 106 } 107} 108 109void mve_update_eci(DisasContext *s) 110{ 111 /* 112 * The helper function will always update the CPUState field, 113 * so we only need to update the DisasContext field. 114 */ 115 if (s->eci) { 116 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 117 } 118} 119 120void mve_update_and_store_eci(DisasContext *s) 121{ 122 /* 123 * For insns which don't call a helper function that will call 124 * mve_advance_vpt(), this version updates s->eci and also stores 125 * it out to the CPUState field. 126 */ 127 if (s->eci) { 128 mve_update_eci(s); 129 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 130 } 131} 132 133static bool mve_skip_first_beat(DisasContext *s) 134{ 135 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 136 switch (s->eci) { 137 case ECI_NONE: 138 return false; 139 case ECI_A0: 140 case ECI_A0A1: 141 case ECI_A0A1A2: 142 case ECI_A0A1A2B0: 143 return true; 144 default: 145 g_assert_not_reached(); 146 } 147} 148 149static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 150 unsigned msize) 151{ 152 TCGv_i32 addr; 153 uint32_t offset; 154 TCGv_ptr qreg; 155 156 if (!dc_isar_feature(aa32_mve, s) || 157 !mve_check_qreg_bank(s, a->qd) || 158 !fn) { 159 return false; 160 } 161 162 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 163 if (a->rn == 15 || (a->rn == 13 && a->w)) { 164 return false; 165 } 166 167 if (!mve_eci_check(s) || !vfp_access_check(s)) { 168 return true; 169 } 170 171 offset = a->imm << msize; 172 if (!a->a) { 173 offset = -offset; 174 } 175 addr = load_reg(s, a->rn); 176 if (a->p) { 177 tcg_gen_addi_i32(addr, addr, offset); 178 } 179 180 qreg = mve_qreg_ptr(a->qd); 181 fn(cpu_env, qreg, addr); 182 tcg_temp_free_ptr(qreg); 183 184 /* 185 * Writeback always happens after the last beat of the insn, 186 * regardless of predication 187 */ 188 if (a->w) { 189 if (!a->p) { 190 tcg_gen_addi_i32(addr, addr, offset); 191 } 192 store_reg(s, a->rn, addr); 193 } else { 194 tcg_temp_free_i32(addr); 195 } 196 mve_update_eci(s); 197 return true; 198} 199 200static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 201{ 202 static MVEGenLdStFn * const ldstfns[4][2] = { 203 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 204 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 205 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 206 { NULL, NULL } 207 }; 208 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 209} 210 211#define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 212 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 213 { \ 214 static MVEGenLdStFn * const ldstfns[2][2] = { \ 215 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 216 { NULL, gen_helper_mve_##ULD }, \ 217 }; \ 218 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 219 } 220 221DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 222DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 223DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 224 225static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn) 226{ 227 TCGv_i32 addr; 228 TCGv_ptr qd, qm; 229 230 if (!dc_isar_feature(aa32_mve, s) || 231 !mve_check_qreg_bank(s, a->qd | a->qm) || 232 !fn || a->rn == 15) { 233 /* Rn case is UNPREDICTABLE */ 234 return false; 235 } 236 237 if (!mve_eci_check(s) || !vfp_access_check(s)) { 238 return true; 239 } 240 241 addr = load_reg(s, a->rn); 242 243 qd = mve_qreg_ptr(a->qd); 244 qm = mve_qreg_ptr(a->qm); 245 fn(cpu_env, qd, qm, addr); 246 tcg_temp_free_ptr(qd); 247 tcg_temp_free_ptr(qm); 248 tcg_temp_free_i32(addr); 249 mve_update_eci(s); 250 return true; 251} 252 253/* 254 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads 255 * signextended to halfword elements in register". _os_ indicates that 256 * the offsets in Qm should be scaled by the element size. 257 */ 258/* This macro is just to make the arrays more compact in these functions */ 259#define F(N) gen_helper_mve_##N 260 261/* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */ 262static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a) 263{ 264 static MVEGenLdStSGFn * const fns[2][4][4] = { { 265 { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL }, 266 { NULL, NULL, F(vldrh_sg_sw), NULL }, 267 { NULL, NULL, NULL, NULL }, 268 { NULL, NULL, NULL, NULL } 269 }, { 270 { NULL, NULL, NULL, NULL }, 271 { NULL, NULL, F(vldrh_sg_os_sw), NULL }, 272 { NULL, NULL, NULL, NULL }, 273 { NULL, NULL, NULL, NULL } 274 } 275 }; 276 if (a->qd == a->qm) { 277 return false; /* UNPREDICTABLE */ 278 } 279 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 280} 281 282static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a) 283{ 284 static MVEGenLdStSGFn * const fns[2][4][4] = { { 285 { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL }, 286 { NULL, F(vldrh_sg_uh), F(vldrh_sg_uw), NULL }, 287 { NULL, NULL, F(vldrw_sg_uw), NULL }, 288 { NULL, NULL, NULL, F(vldrd_sg_ud) } 289 }, { 290 { NULL, NULL, NULL, NULL }, 291 { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL }, 292 { NULL, NULL, F(vldrw_sg_os_uw), NULL }, 293 { NULL, NULL, NULL, F(vldrd_sg_os_ud) } 294 } 295 }; 296 if (a->qd == a->qm) { 297 return false; /* UNPREDICTABLE */ 298 } 299 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 300} 301 302static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a) 303{ 304 static MVEGenLdStSGFn * const fns[2][4][4] = { { 305 { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL }, 306 { NULL, F(vstrh_sg_uh), F(vstrh_sg_uw), NULL }, 307 { NULL, NULL, F(vstrw_sg_uw), NULL }, 308 { NULL, NULL, NULL, F(vstrd_sg_ud) } 309 }, { 310 { NULL, NULL, NULL, NULL }, 311 { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL }, 312 { NULL, NULL, F(vstrw_sg_os_uw), NULL }, 313 { NULL, NULL, NULL, F(vstrd_sg_os_ud) } 314 } 315 }; 316 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 317} 318 319#undef F 320 321static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a, 322 MVEGenLdStSGFn *fn, unsigned msize) 323{ 324 uint32_t offset; 325 TCGv_ptr qd, qm; 326 327 if (!dc_isar_feature(aa32_mve, s) || 328 !mve_check_qreg_bank(s, a->qd | a->qm) || 329 !fn) { 330 return false; 331 } 332 333 if (!mve_eci_check(s) || !vfp_access_check(s)) { 334 return true; 335 } 336 337 offset = a->imm << msize; 338 if (!a->a) { 339 offset = -offset; 340 } 341 342 qd = mve_qreg_ptr(a->qd); 343 qm = mve_qreg_ptr(a->qm); 344 fn(cpu_env, qd, qm, tcg_constant_i32(offset)); 345 tcg_temp_free_ptr(qd); 346 tcg_temp_free_ptr(qm); 347 mve_update_eci(s); 348 return true; 349} 350 351static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 352{ 353 static MVEGenLdStSGFn * const fns[] = { 354 gen_helper_mve_vldrw_sg_uw, 355 gen_helper_mve_vldrw_sg_wb_uw, 356 }; 357 if (a->qd == a->qm) { 358 return false; /* UNPREDICTABLE */ 359 } 360 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 361} 362 363static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 364{ 365 static MVEGenLdStSGFn * const fns[] = { 366 gen_helper_mve_vldrd_sg_ud, 367 gen_helper_mve_vldrd_sg_wb_ud, 368 }; 369 if (a->qd == a->qm) { 370 return false; /* UNPREDICTABLE */ 371 } 372 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 373} 374 375static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 376{ 377 static MVEGenLdStSGFn * const fns[] = { 378 gen_helper_mve_vstrw_sg_uw, 379 gen_helper_mve_vstrw_sg_wb_uw, 380 }; 381 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 382} 383 384static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 385{ 386 static MVEGenLdStSGFn * const fns[] = { 387 gen_helper_mve_vstrd_sg_ud, 388 gen_helper_mve_vstrd_sg_wb_ud, 389 }; 390 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 391} 392 393static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn, 394 int addrinc) 395{ 396 TCGv_i32 rn; 397 398 if (!dc_isar_feature(aa32_mve, s) || 399 !mve_check_qreg_bank(s, a->qd) || 400 !fn || (a->rn == 13 && a->w) || a->rn == 15) { 401 /* Variously UNPREDICTABLE or UNDEF or related-encoding */ 402 return false; 403 } 404 if (!mve_eci_check(s) || !vfp_access_check(s)) { 405 return true; 406 } 407 408 rn = load_reg(s, a->rn); 409 /* 410 * We pass the index of Qd, not a pointer, because the helper must 411 * access multiple Q registers starting at Qd and working up. 412 */ 413 fn(cpu_env, tcg_constant_i32(a->qd), rn); 414 415 if (a->w) { 416 tcg_gen_addi_i32(rn, rn, addrinc); 417 store_reg(s, a->rn, rn); 418 } else { 419 tcg_temp_free_i32(rn); 420 } 421 mve_update_and_store_eci(s); 422 return true; 423} 424 425/* This macro is just to make the arrays more compact in these functions */ 426#define F(N) gen_helper_mve_##N 427 428static bool trans_VLD2(DisasContext *s, arg_vldst_il *a) 429{ 430 static MVEGenLdStIlFn * const fns[4][4] = { 431 { F(vld20b), F(vld20h), F(vld20w), NULL, }, 432 { F(vld21b), F(vld21h), F(vld21w), NULL, }, 433 { NULL, NULL, NULL, NULL }, 434 { NULL, NULL, NULL, NULL }, 435 }; 436 if (a->qd > 6) { 437 return false; 438 } 439 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 440} 441 442static bool trans_VLD4(DisasContext *s, arg_vldst_il *a) 443{ 444 static MVEGenLdStIlFn * const fns[4][4] = { 445 { F(vld40b), F(vld40h), F(vld40w), NULL, }, 446 { F(vld41b), F(vld41h), F(vld41w), NULL, }, 447 { F(vld42b), F(vld42h), F(vld42w), NULL, }, 448 { F(vld43b), F(vld43h), F(vld43w), NULL, }, 449 }; 450 if (a->qd > 4) { 451 return false; 452 } 453 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 454} 455 456static bool trans_VST2(DisasContext *s, arg_vldst_il *a) 457{ 458 static MVEGenLdStIlFn * const fns[4][4] = { 459 { F(vst20b), F(vst20h), F(vst20w), NULL, }, 460 { F(vst21b), F(vst21h), F(vst21w), NULL, }, 461 { NULL, NULL, NULL, NULL }, 462 { NULL, NULL, NULL, NULL }, 463 }; 464 if (a->qd > 6) { 465 return false; 466 } 467 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 468} 469 470static bool trans_VST4(DisasContext *s, arg_vldst_il *a) 471{ 472 static MVEGenLdStIlFn * const fns[4][4] = { 473 { F(vst40b), F(vst40h), F(vst40w), NULL, }, 474 { F(vst41b), F(vst41h), F(vst41w), NULL, }, 475 { F(vst42b), F(vst42h), F(vst42w), NULL, }, 476 { F(vst43b), F(vst43h), F(vst43w), NULL, }, 477 }; 478 if (a->qd > 4) { 479 return false; 480 } 481 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 482} 483 484#undef F 485 486static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 487{ 488 TCGv_ptr qd; 489 TCGv_i32 rt; 490 491 if (!dc_isar_feature(aa32_mve, s) || 492 !mve_check_qreg_bank(s, a->qd)) { 493 return false; 494 } 495 if (a->rt == 13 || a->rt == 15) { 496 /* UNPREDICTABLE; we choose to UNDEF */ 497 return false; 498 } 499 if (!mve_eci_check(s) || !vfp_access_check(s)) { 500 return true; 501 } 502 503 rt = load_reg(s, a->rt); 504 if (mve_no_predication(s)) { 505 tcg_gen_gvec_dup_i32(a->size, mve_qreg_offset(a->qd), 16, 16, rt); 506 } else { 507 qd = mve_qreg_ptr(a->qd); 508 tcg_gen_dup_i32(a->size, rt, rt); 509 gen_helper_mve_vdup(cpu_env, qd, rt); 510 tcg_temp_free_ptr(qd); 511 } 512 tcg_temp_free_i32(rt); 513 mve_update_eci(s); 514 return true; 515} 516 517static bool do_1op_vec(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn, 518 GVecGen2Fn vecfn) 519{ 520 TCGv_ptr qd, qm; 521 522 if (!dc_isar_feature(aa32_mve, s) || 523 !mve_check_qreg_bank(s, a->qd | a->qm) || 524 !fn) { 525 return false; 526 } 527 528 if (!mve_eci_check(s) || !vfp_access_check(s)) { 529 return true; 530 } 531 532 if (vecfn && mve_no_predication(s)) { 533 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 16, 16); 534 } else { 535 qd = mve_qreg_ptr(a->qd); 536 qm = mve_qreg_ptr(a->qm); 537 fn(cpu_env, qd, qm); 538 tcg_temp_free_ptr(qd); 539 tcg_temp_free_ptr(qm); 540 } 541 mve_update_eci(s); 542 return true; 543} 544 545static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 546{ 547 return do_1op_vec(s, a, fn, NULL); 548} 549 550#define DO_1OP_VEC(INSN, FN, VECFN) \ 551 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 552 { \ 553 static MVEGenOneOpFn * const fns[] = { \ 554 gen_helper_mve_##FN##b, \ 555 gen_helper_mve_##FN##h, \ 556 gen_helper_mve_##FN##w, \ 557 NULL, \ 558 }; \ 559 return do_1op_vec(s, a, fns[a->size], VECFN); \ 560 } 561 562#define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL) 563 564DO_1OP(VCLZ, vclz) 565DO_1OP(VCLS, vcls) 566DO_1OP_VEC(VABS, vabs, tcg_gen_gvec_abs) 567DO_1OP_VEC(VNEG, vneg, tcg_gen_gvec_neg) 568DO_1OP(VQABS, vqabs) 569DO_1OP(VQNEG, vqneg) 570DO_1OP(VMAXA, vmaxa) 571DO_1OP(VMINA, vmina) 572 573/* 574 * For simple float/int conversions we use the fixed-point 575 * conversion helpers with a zero shift count 576 */ 577#define DO_VCVT(INSN, HFN, SFN) \ 578 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 579 { \ 580 gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \ 581 } \ 582 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 583 { \ 584 gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \ 585 } \ 586 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 587 { \ 588 static MVEGenOneOpFn * const fns[] = { \ 589 NULL, \ 590 gen_##INSN##h, \ 591 gen_##INSN##s, \ 592 NULL, \ 593 }; \ 594 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 595 return false; \ 596 } \ 597 return do_1op(s, a, fns[a->size]); \ 598 } 599 600DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf) 601DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf) 602DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs) 603DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu) 604 605static bool do_vcvt_rmode(DisasContext *s, arg_1op *a, 606 enum arm_fprounding rmode, bool u) 607{ 608 /* 609 * Handle VCVT fp to int with specified rounding mode. 610 * This is a 1op fn but we must pass the rounding mode as 611 * an immediate to the helper. 612 */ 613 TCGv_ptr qd, qm; 614 static MVEGenVCVTRmodeFn * const fns[4][2] = { 615 { NULL, NULL }, 616 { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh }, 617 { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us }, 618 { NULL, NULL }, 619 }; 620 MVEGenVCVTRmodeFn *fn = fns[a->size][u]; 621 622 if (!dc_isar_feature(aa32_mve_fp, s) || 623 !mve_check_qreg_bank(s, a->qd | a->qm) || 624 !fn) { 625 return false; 626 } 627 628 if (!mve_eci_check(s) || !vfp_access_check(s)) { 629 return true; 630 } 631 632 qd = mve_qreg_ptr(a->qd); 633 qm = mve_qreg_ptr(a->qm); 634 fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode))); 635 tcg_temp_free_ptr(qd); 636 tcg_temp_free_ptr(qm); 637 mve_update_eci(s); 638 return true; 639} 640 641#define DO_VCVT_RMODE(INSN, RMODE, U) \ 642 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 643 { \ 644 return do_vcvt_rmode(s, a, RMODE, U); \ 645 } \ 646 647DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false) 648DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true) 649DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false) 650DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true) 651DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false) 652DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true) 653DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false) 654DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true) 655 656#define DO_VCVT_SH(INSN, FN) \ 657 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 658 { \ 659 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 660 return false; \ 661 } \ 662 return do_1op(s, a, gen_helper_mve_##FN); \ 663 } \ 664 665DO_VCVT_SH(VCVTB_SH, vcvtb_sh) 666DO_VCVT_SH(VCVTT_SH, vcvtt_sh) 667DO_VCVT_SH(VCVTB_HS, vcvtb_hs) 668DO_VCVT_SH(VCVTT_HS, vcvtt_hs) 669 670#define DO_VRINT(INSN, RMODE) \ 671 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 672 { \ 673 gen_helper_mve_vrint_rm_h(env, qd, qm, \ 674 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \ 675 } \ 676 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 677 { \ 678 gen_helper_mve_vrint_rm_s(env, qd, qm, \ 679 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \ 680 } \ 681 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 682 { \ 683 static MVEGenOneOpFn * const fns[] = { \ 684 NULL, \ 685 gen_##INSN##h, \ 686 gen_##INSN##s, \ 687 NULL, \ 688 }; \ 689 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 690 return false; \ 691 } \ 692 return do_1op(s, a, fns[a->size]); \ 693 } 694 695DO_VRINT(VRINTN, FPROUNDING_TIEEVEN) 696DO_VRINT(VRINTA, FPROUNDING_TIEAWAY) 697DO_VRINT(VRINTZ, FPROUNDING_ZERO) 698DO_VRINT(VRINTM, FPROUNDING_NEGINF) 699DO_VRINT(VRINTP, FPROUNDING_POSINF) 700 701static bool trans_VRINTX(DisasContext *s, arg_1op *a) 702{ 703 static MVEGenOneOpFn * const fns[] = { 704 NULL, 705 gen_helper_mve_vrintx_h, 706 gen_helper_mve_vrintx_s, 707 NULL, 708 }; 709 if (!dc_isar_feature(aa32_mve_fp, s)) { 710 return false; 711 } 712 return do_1op(s, a, fns[a->size]); 713} 714 715/* Narrowing moves: only size 0 and 1 are valid */ 716#define DO_VMOVN(INSN, FN) \ 717 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 718 { \ 719 static MVEGenOneOpFn * const fns[] = { \ 720 gen_helper_mve_##FN##b, \ 721 gen_helper_mve_##FN##h, \ 722 NULL, \ 723 NULL, \ 724 }; \ 725 return do_1op(s, a, fns[a->size]); \ 726 } 727 728DO_VMOVN(VMOVNB, vmovnb) 729DO_VMOVN(VMOVNT, vmovnt) 730DO_VMOVN(VQMOVUNB, vqmovunb) 731DO_VMOVN(VQMOVUNT, vqmovunt) 732DO_VMOVN(VQMOVN_BS, vqmovnbs) 733DO_VMOVN(VQMOVN_TS, vqmovnts) 734DO_VMOVN(VQMOVN_BU, vqmovnbu) 735DO_VMOVN(VQMOVN_TU, vqmovntu) 736 737static bool trans_VREV16(DisasContext *s, arg_1op *a) 738{ 739 static MVEGenOneOpFn * const fns[] = { 740 gen_helper_mve_vrev16b, 741 NULL, 742 NULL, 743 NULL, 744 }; 745 return do_1op(s, a, fns[a->size]); 746} 747 748static bool trans_VREV32(DisasContext *s, arg_1op *a) 749{ 750 static MVEGenOneOpFn * const fns[] = { 751 gen_helper_mve_vrev32b, 752 gen_helper_mve_vrev32h, 753 NULL, 754 NULL, 755 }; 756 return do_1op(s, a, fns[a->size]); 757} 758 759static bool trans_VREV64(DisasContext *s, arg_1op *a) 760{ 761 static MVEGenOneOpFn * const fns[] = { 762 gen_helper_mve_vrev64b, 763 gen_helper_mve_vrev64h, 764 gen_helper_mve_vrev64w, 765 NULL, 766 }; 767 return do_1op(s, a, fns[a->size]); 768} 769 770static bool trans_VMVN(DisasContext *s, arg_1op *a) 771{ 772 return do_1op_vec(s, a, gen_helper_mve_vmvn, tcg_gen_gvec_not); 773} 774 775static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 776{ 777 static MVEGenOneOpFn * const fns[] = { 778 NULL, 779 gen_helper_mve_vfabsh, 780 gen_helper_mve_vfabss, 781 NULL, 782 }; 783 if (!dc_isar_feature(aa32_mve_fp, s)) { 784 return false; 785 } 786 return do_1op(s, a, fns[a->size]); 787} 788 789static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 790{ 791 static MVEGenOneOpFn * const fns[] = { 792 NULL, 793 gen_helper_mve_vfnegh, 794 gen_helper_mve_vfnegs, 795 NULL, 796 }; 797 if (!dc_isar_feature(aa32_mve_fp, s)) { 798 return false; 799 } 800 return do_1op(s, a, fns[a->size]); 801} 802 803static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn, 804 GVecGen3Fn *vecfn) 805{ 806 TCGv_ptr qd, qn, qm; 807 808 if (!dc_isar_feature(aa32_mve, s) || 809 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 810 !fn) { 811 return false; 812 } 813 if (!mve_eci_check(s) || !vfp_access_check(s)) { 814 return true; 815 } 816 817 if (vecfn && mve_no_predication(s)) { 818 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qn), 819 mve_qreg_offset(a->qm), 16, 16); 820 } else { 821 qd = mve_qreg_ptr(a->qd); 822 qn = mve_qreg_ptr(a->qn); 823 qm = mve_qreg_ptr(a->qm); 824 fn(cpu_env, qd, qn, qm); 825 tcg_temp_free_ptr(qd); 826 tcg_temp_free_ptr(qn); 827 tcg_temp_free_ptr(qm); 828 } 829 mve_update_eci(s); 830 return true; 831} 832 833static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn *fn) 834{ 835 return do_2op_vec(s, a, fn, NULL); 836} 837 838#define DO_LOGIC(INSN, HELPER, VECFN) \ 839 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 840 { \ 841 return do_2op_vec(s, a, HELPER, VECFN); \ 842 } 843 844DO_LOGIC(VAND, gen_helper_mve_vand, tcg_gen_gvec_and) 845DO_LOGIC(VBIC, gen_helper_mve_vbic, tcg_gen_gvec_andc) 846DO_LOGIC(VORR, gen_helper_mve_vorr, tcg_gen_gvec_or) 847DO_LOGIC(VORN, gen_helper_mve_vorn, tcg_gen_gvec_orc) 848DO_LOGIC(VEOR, gen_helper_mve_veor, tcg_gen_gvec_xor) 849 850static bool trans_VPSEL(DisasContext *s, arg_2op *a) 851{ 852 /* This insn updates predication bits */ 853 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 854 return do_2op(s, a, gen_helper_mve_vpsel); 855} 856 857#define DO_2OP_VEC(INSN, FN, VECFN) \ 858 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 859 { \ 860 static MVEGenTwoOpFn * const fns[] = { \ 861 gen_helper_mve_##FN##b, \ 862 gen_helper_mve_##FN##h, \ 863 gen_helper_mve_##FN##w, \ 864 NULL, \ 865 }; \ 866 return do_2op_vec(s, a, fns[a->size], VECFN); \ 867 } 868 869#define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL) 870 871DO_2OP_VEC(VADD, vadd, tcg_gen_gvec_add) 872DO_2OP_VEC(VSUB, vsub, tcg_gen_gvec_sub) 873DO_2OP_VEC(VMUL, vmul, tcg_gen_gvec_mul) 874DO_2OP(VMULH_S, vmulhs) 875DO_2OP(VMULH_U, vmulhu) 876DO_2OP(VRMULH_S, vrmulhs) 877DO_2OP(VRMULH_U, vrmulhu) 878DO_2OP_VEC(VMAX_S, vmaxs, tcg_gen_gvec_smax) 879DO_2OP_VEC(VMAX_U, vmaxu, tcg_gen_gvec_umax) 880DO_2OP_VEC(VMIN_S, vmins, tcg_gen_gvec_smin) 881DO_2OP_VEC(VMIN_U, vminu, tcg_gen_gvec_umin) 882DO_2OP(VABD_S, vabds) 883DO_2OP(VABD_U, vabdu) 884DO_2OP(VHADD_S, vhadds) 885DO_2OP(VHADD_U, vhaddu) 886DO_2OP(VHSUB_S, vhsubs) 887DO_2OP(VHSUB_U, vhsubu) 888DO_2OP(VMULL_BS, vmullbs) 889DO_2OP(VMULL_BU, vmullbu) 890DO_2OP(VMULL_TS, vmullts) 891DO_2OP(VMULL_TU, vmulltu) 892DO_2OP(VQDMULH, vqdmulh) 893DO_2OP(VQRDMULH, vqrdmulh) 894DO_2OP(VQADD_S, vqadds) 895DO_2OP(VQADD_U, vqaddu) 896DO_2OP(VQSUB_S, vqsubs) 897DO_2OP(VQSUB_U, vqsubu) 898DO_2OP(VSHL_S, vshls) 899DO_2OP(VSHL_U, vshlu) 900DO_2OP(VRSHL_S, vrshls) 901DO_2OP(VRSHL_U, vrshlu) 902DO_2OP(VQSHL_S, vqshls) 903DO_2OP(VQSHL_U, vqshlu) 904DO_2OP(VQRSHL_S, vqrshls) 905DO_2OP(VQRSHL_U, vqrshlu) 906DO_2OP(VQDMLADH, vqdmladh) 907DO_2OP(VQDMLADHX, vqdmladhx) 908DO_2OP(VQRDMLADH, vqrdmladh) 909DO_2OP(VQRDMLADHX, vqrdmladhx) 910DO_2OP(VQDMLSDH, vqdmlsdh) 911DO_2OP(VQDMLSDHX, vqdmlsdhx) 912DO_2OP(VQRDMLSDH, vqrdmlsdh) 913DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 914DO_2OP(VRHADD_S, vrhadds) 915DO_2OP(VRHADD_U, vrhaddu) 916/* 917 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 918 * so we can reuse the DO_2OP macro. (Our implementation calculates the 919 * "expected" results in this case.) Similarly for VHCADD. 920 */ 921DO_2OP(VCADD90, vcadd90) 922DO_2OP(VCADD270, vcadd270) 923DO_2OP(VHCADD90, vhcadd90) 924DO_2OP(VHCADD270, vhcadd270) 925 926static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 927{ 928 static MVEGenTwoOpFn * const fns[] = { 929 NULL, 930 gen_helper_mve_vqdmullbh, 931 gen_helper_mve_vqdmullbw, 932 NULL, 933 }; 934 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 935 /* UNPREDICTABLE; we choose to undef */ 936 return false; 937 } 938 return do_2op(s, a, fns[a->size]); 939} 940 941static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 942{ 943 static MVEGenTwoOpFn * const fns[] = { 944 NULL, 945 gen_helper_mve_vqdmullth, 946 gen_helper_mve_vqdmulltw, 947 NULL, 948 }; 949 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 950 /* UNPREDICTABLE; we choose to undef */ 951 return false; 952 } 953 return do_2op(s, a, fns[a->size]); 954} 955 956static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 957{ 958 /* 959 * Note that a->size indicates the output size, ie VMULL.P8 960 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 961 * is the 16x16->32 operation and a->size is MO_32. 962 */ 963 static MVEGenTwoOpFn * const fns[] = { 964 NULL, 965 gen_helper_mve_vmullpbh, 966 gen_helper_mve_vmullpbw, 967 NULL, 968 }; 969 return do_2op(s, a, fns[a->size]); 970} 971 972static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 973{ 974 /* a->size is as for trans_VMULLP_B */ 975 static MVEGenTwoOpFn * const fns[] = { 976 NULL, 977 gen_helper_mve_vmullpth, 978 gen_helper_mve_vmullptw, 979 NULL, 980 }; 981 return do_2op(s, a, fns[a->size]); 982} 983 984/* 985 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 986 * of the 32-bit elements in each lane of the input vectors, where the 987 * carry-out of each add is the carry-in of the next. The initial carry 988 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 989 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 990 * These insns are subject to beat-wise execution. Partial execution 991 * of an I=1 (initial carry input fixed) insn which does not 992 * execute the first beat must start with the current FPSCR.NZCV 993 * value, not the fixed constant input. 994 */ 995static bool trans_VADC(DisasContext *s, arg_2op *a) 996{ 997 return do_2op(s, a, gen_helper_mve_vadc); 998} 999 1000static bool trans_VADCI(DisasContext *s, arg_2op *a) 1001{ 1002 if (mve_skip_first_beat(s)) { 1003 return trans_VADC(s, a); 1004 } 1005 return do_2op(s, a, gen_helper_mve_vadci); 1006} 1007 1008static bool trans_VSBC(DisasContext *s, arg_2op *a) 1009{ 1010 return do_2op(s, a, gen_helper_mve_vsbc); 1011} 1012 1013static bool trans_VSBCI(DisasContext *s, arg_2op *a) 1014{ 1015 if (mve_skip_first_beat(s)) { 1016 return trans_VSBC(s, a); 1017 } 1018 return do_2op(s, a, gen_helper_mve_vsbci); 1019} 1020 1021#define DO_2OP_FP(INSN, FN) \ 1022 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 1023 { \ 1024 static MVEGenTwoOpFn * const fns[] = { \ 1025 NULL, \ 1026 gen_helper_mve_##FN##h, \ 1027 gen_helper_mve_##FN##s, \ 1028 NULL, \ 1029 }; \ 1030 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1031 return false; \ 1032 } \ 1033 return do_2op(s, a, fns[a->size]); \ 1034 } 1035 1036DO_2OP_FP(VADD_fp, vfadd) 1037DO_2OP_FP(VSUB_fp, vfsub) 1038DO_2OP_FP(VMUL_fp, vfmul) 1039DO_2OP_FP(VABD_fp, vfabd) 1040DO_2OP_FP(VMAXNM, vmaxnm) 1041DO_2OP_FP(VMINNM, vminnm) 1042DO_2OP_FP(VCADD90_fp, vfcadd90) 1043DO_2OP_FP(VCADD270_fp, vfcadd270) 1044DO_2OP_FP(VFMA, vfma) 1045DO_2OP_FP(VFMS, vfms) 1046DO_2OP_FP(VCMUL0, vcmul0) 1047DO_2OP_FP(VCMUL90, vcmul90) 1048DO_2OP_FP(VCMUL180, vcmul180) 1049DO_2OP_FP(VCMUL270, vcmul270) 1050DO_2OP_FP(VCMLA0, vcmla0) 1051DO_2OP_FP(VCMLA90, vcmla90) 1052DO_2OP_FP(VCMLA180, vcmla180) 1053DO_2OP_FP(VCMLA270, vcmla270) 1054DO_2OP_FP(VMAXNMA, vmaxnma) 1055DO_2OP_FP(VMINNMA, vminnma) 1056 1057static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 1058 MVEGenTwoOpScalarFn fn) 1059{ 1060 TCGv_ptr qd, qn; 1061 TCGv_i32 rm; 1062 1063 if (!dc_isar_feature(aa32_mve, s) || 1064 !mve_check_qreg_bank(s, a->qd | a->qn) || 1065 !fn) { 1066 return false; 1067 } 1068 if (a->rm == 13 || a->rm == 15) { 1069 /* UNPREDICTABLE */ 1070 return false; 1071 } 1072 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1073 return true; 1074 } 1075 1076 qd = mve_qreg_ptr(a->qd); 1077 qn = mve_qreg_ptr(a->qn); 1078 rm = load_reg(s, a->rm); 1079 fn(cpu_env, qd, qn, rm); 1080 tcg_temp_free_i32(rm); 1081 tcg_temp_free_ptr(qd); 1082 tcg_temp_free_ptr(qn); 1083 mve_update_eci(s); 1084 return true; 1085} 1086 1087#define DO_2OP_SCALAR(INSN, FN) \ 1088 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 1089 { \ 1090 static MVEGenTwoOpScalarFn * const fns[] = { \ 1091 gen_helper_mve_##FN##b, \ 1092 gen_helper_mve_##FN##h, \ 1093 gen_helper_mve_##FN##w, \ 1094 NULL, \ 1095 }; \ 1096 return do_2op_scalar(s, a, fns[a->size]); \ 1097 } 1098 1099DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 1100DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 1101DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 1102DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 1103DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 1104DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 1105DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 1106DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 1107DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 1108DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 1109DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 1110DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 1111DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 1112DO_2OP_SCALAR(VBRSR, vbrsr) 1113DO_2OP_SCALAR(VMLA, vmla) 1114DO_2OP_SCALAR(VMLAS, vmlas) 1115DO_2OP_SCALAR(VQDMLAH, vqdmlah) 1116DO_2OP_SCALAR(VQRDMLAH, vqrdmlah) 1117DO_2OP_SCALAR(VQDMLASH, vqdmlash) 1118DO_2OP_SCALAR(VQRDMLASH, vqrdmlash) 1119 1120static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 1121{ 1122 static MVEGenTwoOpScalarFn * const fns[] = { 1123 NULL, 1124 gen_helper_mve_vqdmullb_scalarh, 1125 gen_helper_mve_vqdmullb_scalarw, 1126 NULL, 1127 }; 1128 if (a->qd == a->qn && a->size == MO_32) { 1129 /* UNPREDICTABLE; we choose to undef */ 1130 return false; 1131 } 1132 return do_2op_scalar(s, a, fns[a->size]); 1133} 1134 1135static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 1136{ 1137 static MVEGenTwoOpScalarFn * const fns[] = { 1138 NULL, 1139 gen_helper_mve_vqdmullt_scalarh, 1140 gen_helper_mve_vqdmullt_scalarw, 1141 NULL, 1142 }; 1143 if (a->qd == a->qn && a->size == MO_32) { 1144 /* UNPREDICTABLE; we choose to undef */ 1145 return false; 1146 } 1147 return do_2op_scalar(s, a, fns[a->size]); 1148} 1149 1150 1151#define DO_2OP_FP_SCALAR(INSN, FN) \ 1152 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 1153 { \ 1154 static MVEGenTwoOpScalarFn * const fns[] = { \ 1155 NULL, \ 1156 gen_helper_mve_##FN##h, \ 1157 gen_helper_mve_##FN##s, \ 1158 NULL, \ 1159 }; \ 1160 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1161 return false; \ 1162 } \ 1163 return do_2op_scalar(s, a, fns[a->size]); \ 1164 } 1165 1166DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar) 1167DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar) 1168DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar) 1169DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar) 1170DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar) 1171 1172static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 1173 MVEGenLongDualAccOpFn *fn) 1174{ 1175 TCGv_ptr qn, qm; 1176 TCGv_i64 rda; 1177 TCGv_i32 rdalo, rdahi; 1178 1179 if (!dc_isar_feature(aa32_mve, s) || 1180 !mve_check_qreg_bank(s, a->qn | a->qm) || 1181 !fn) { 1182 return false; 1183 } 1184 /* 1185 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1186 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1187 */ 1188 if (a->rdahi == 13 || a->rdahi == 15) { 1189 return false; 1190 } 1191 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1192 return true; 1193 } 1194 1195 qn = mve_qreg_ptr(a->qn); 1196 qm = mve_qreg_ptr(a->qm); 1197 1198 /* 1199 * This insn is subject to beat-wise execution. Partial execution 1200 * of an A=0 (no-accumulate) insn which does not execute the first 1201 * beat must start with the current rda value, not 0. 1202 */ 1203 if (a->a || mve_skip_first_beat(s)) { 1204 rda = tcg_temp_new_i64(); 1205 rdalo = load_reg(s, a->rdalo); 1206 rdahi = load_reg(s, a->rdahi); 1207 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1208 tcg_temp_free_i32(rdalo); 1209 tcg_temp_free_i32(rdahi); 1210 } else { 1211 rda = tcg_const_i64(0); 1212 } 1213 1214 fn(rda, cpu_env, qn, qm, rda); 1215 tcg_temp_free_ptr(qn); 1216 tcg_temp_free_ptr(qm); 1217 1218 rdalo = tcg_temp_new_i32(); 1219 rdahi = tcg_temp_new_i32(); 1220 tcg_gen_extrl_i64_i32(rdalo, rda); 1221 tcg_gen_extrh_i64_i32(rdahi, rda); 1222 store_reg(s, a->rdalo, rdalo); 1223 store_reg(s, a->rdahi, rdahi); 1224 tcg_temp_free_i64(rda); 1225 mve_update_eci(s); 1226 return true; 1227} 1228 1229static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 1230{ 1231 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1232 { NULL, NULL }, 1233 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 1234 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 1235 { NULL, NULL }, 1236 }; 1237 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1238} 1239 1240static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 1241{ 1242 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1243 { NULL, NULL }, 1244 { gen_helper_mve_vmlaldavuh, NULL }, 1245 { gen_helper_mve_vmlaldavuw, NULL }, 1246 { NULL, NULL }, 1247 }; 1248 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1249} 1250 1251static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 1252{ 1253 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1254 { NULL, NULL }, 1255 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 1256 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 1257 { NULL, NULL }, 1258 }; 1259 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1260} 1261 1262static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 1263{ 1264 static MVEGenLongDualAccOpFn * const fns[] = { 1265 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 1266 }; 1267 return do_long_dual_acc(s, a, fns[a->x]); 1268} 1269 1270static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 1271{ 1272 static MVEGenLongDualAccOpFn * const fns[] = { 1273 gen_helper_mve_vrmlaldavhuw, NULL, 1274 }; 1275 return do_long_dual_acc(s, a, fns[a->x]); 1276} 1277 1278static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 1279{ 1280 static MVEGenLongDualAccOpFn * const fns[] = { 1281 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 1282 }; 1283 return do_long_dual_acc(s, a, fns[a->x]); 1284} 1285 1286static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 1287{ 1288 TCGv_ptr qn, qm; 1289 TCGv_i32 rda; 1290 1291 if (!dc_isar_feature(aa32_mve, s) || 1292 !mve_check_qreg_bank(s, a->qn) || 1293 !fn) { 1294 return false; 1295 } 1296 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1297 return true; 1298 } 1299 1300 qn = mve_qreg_ptr(a->qn); 1301 qm = mve_qreg_ptr(a->qm); 1302 1303 /* 1304 * This insn is subject to beat-wise execution. Partial execution 1305 * of an A=0 (no-accumulate) insn which does not execute the first 1306 * beat must start with the current rda value, not 0. 1307 */ 1308 if (a->a || mve_skip_first_beat(s)) { 1309 rda = load_reg(s, a->rda); 1310 } else { 1311 rda = tcg_const_i32(0); 1312 } 1313 1314 fn(rda, cpu_env, qn, qm, rda); 1315 store_reg(s, a->rda, rda); 1316 tcg_temp_free_ptr(qn); 1317 tcg_temp_free_ptr(qm); 1318 1319 mve_update_eci(s); 1320 return true; 1321} 1322 1323#define DO_DUAL_ACC(INSN, FN) \ 1324 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 1325 { \ 1326 static MVEGenDualAccOpFn * const fns[4][2] = { \ 1327 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 1328 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 1329 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 1330 { NULL, NULL }, \ 1331 }; \ 1332 return do_dual_acc(s, a, fns[a->size][a->x]); \ 1333 } 1334 1335DO_DUAL_ACC(VMLADAV_S, vmladavs) 1336DO_DUAL_ACC(VMLSDAV, vmlsdav) 1337 1338static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 1339{ 1340 static MVEGenDualAccOpFn * const fns[4][2] = { 1341 { gen_helper_mve_vmladavub, NULL }, 1342 { gen_helper_mve_vmladavuh, NULL }, 1343 { gen_helper_mve_vmladavuw, NULL }, 1344 { NULL, NULL }, 1345 }; 1346 return do_dual_acc(s, a, fns[a->size][a->x]); 1347} 1348 1349static void gen_vpst(DisasContext *s, uint32_t mask) 1350{ 1351 /* 1352 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 1353 * being adjacent fields in the register. 1354 * 1355 * Updating the masks is not predicated, but it is subject to beat-wise 1356 * execution, and the mask is updated on the odd-numbered beats. 1357 * So if PSR.ECI says we should skip beat 1, we mustn't update the 1358 * 01 mask field. 1359 */ 1360 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 1361 switch (s->eci) { 1362 case ECI_NONE: 1363 case ECI_A0: 1364 /* Update both 01 and 23 fields */ 1365 tcg_gen_deposit_i32(vpr, vpr, 1366 tcg_constant_i32(mask | (mask << 4)), 1367 R_V7M_VPR_MASK01_SHIFT, 1368 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 1369 break; 1370 case ECI_A0A1: 1371 case ECI_A0A1A2: 1372 case ECI_A0A1A2B0: 1373 /* Update only the 23 mask field */ 1374 tcg_gen_deposit_i32(vpr, vpr, 1375 tcg_constant_i32(mask), 1376 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 1377 break; 1378 default: 1379 g_assert_not_reached(); 1380 } 1381 store_cpu_field(vpr, v7m.vpr); 1382} 1383 1384static bool trans_VPST(DisasContext *s, arg_VPST *a) 1385{ 1386 /* mask == 0 is a "related encoding" */ 1387 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 1388 return false; 1389 } 1390 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1391 return true; 1392 } 1393 gen_vpst(s, a->mask); 1394 mve_update_and_store_eci(s); 1395 return true; 1396} 1397 1398static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a) 1399{ 1400 /* 1401 * Invert the predicate in VPR.P0. We have call out to 1402 * a helper because this insn itself is beatwise and can 1403 * be predicated. 1404 */ 1405 if (!dc_isar_feature(aa32_mve, s)) { 1406 return false; 1407 } 1408 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1409 return true; 1410 } 1411 1412 gen_helper_mve_vpnot(cpu_env); 1413 /* This insn updates predication bits */ 1414 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1415 mve_update_eci(s); 1416 return true; 1417} 1418 1419static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 1420{ 1421 /* VADDV: vector add across vector */ 1422 static MVEGenVADDVFn * const fns[4][2] = { 1423 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 1424 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 1425 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 1426 { NULL, NULL } 1427 }; 1428 TCGv_ptr qm; 1429 TCGv_i32 rda; 1430 1431 if (!dc_isar_feature(aa32_mve, s) || 1432 a->size == 3) { 1433 return false; 1434 } 1435 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1436 return true; 1437 } 1438 1439 /* 1440 * This insn is subject to beat-wise execution. Partial execution 1441 * of an A=0 (no-accumulate) insn which does not execute the first 1442 * beat must start with the current value of Rda, not zero. 1443 */ 1444 if (a->a || mve_skip_first_beat(s)) { 1445 /* Accumulate input from Rda */ 1446 rda = load_reg(s, a->rda); 1447 } else { 1448 /* Accumulate starting at zero */ 1449 rda = tcg_const_i32(0); 1450 } 1451 1452 qm = mve_qreg_ptr(a->qm); 1453 fns[a->size][a->u](rda, cpu_env, qm, rda); 1454 store_reg(s, a->rda, rda); 1455 tcg_temp_free_ptr(qm); 1456 1457 mve_update_eci(s); 1458 return true; 1459} 1460 1461static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 1462{ 1463 /* 1464 * Vector Add Long Across Vector: accumulate the 32-bit 1465 * elements of the vector into a 64-bit result stored in 1466 * a pair of general-purpose registers. 1467 * No need to check Qm's bank: it is only 3 bits in decode. 1468 */ 1469 TCGv_ptr qm; 1470 TCGv_i64 rda; 1471 TCGv_i32 rdalo, rdahi; 1472 1473 if (!dc_isar_feature(aa32_mve, s)) { 1474 return false; 1475 } 1476 /* 1477 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1478 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1479 */ 1480 if (a->rdahi == 13 || a->rdahi == 15) { 1481 return false; 1482 } 1483 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1484 return true; 1485 } 1486 1487 /* 1488 * This insn is subject to beat-wise execution. Partial execution 1489 * of an A=0 (no-accumulate) insn which does not execute the first 1490 * beat must start with the current value of RdaHi:RdaLo, not zero. 1491 */ 1492 if (a->a || mve_skip_first_beat(s)) { 1493 /* Accumulate input from RdaHi:RdaLo */ 1494 rda = tcg_temp_new_i64(); 1495 rdalo = load_reg(s, a->rdalo); 1496 rdahi = load_reg(s, a->rdahi); 1497 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1498 tcg_temp_free_i32(rdalo); 1499 tcg_temp_free_i32(rdahi); 1500 } else { 1501 /* Accumulate starting at zero */ 1502 rda = tcg_const_i64(0); 1503 } 1504 1505 qm = mve_qreg_ptr(a->qm); 1506 if (a->u) { 1507 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); 1508 } else { 1509 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); 1510 } 1511 tcg_temp_free_ptr(qm); 1512 1513 rdalo = tcg_temp_new_i32(); 1514 rdahi = tcg_temp_new_i32(); 1515 tcg_gen_extrl_i64_i32(rdalo, rda); 1516 tcg_gen_extrh_i64_i32(rdahi, rda); 1517 store_reg(s, a->rdalo, rdalo); 1518 store_reg(s, a->rdahi, rdahi); 1519 tcg_temp_free_i64(rda); 1520 mve_update_eci(s); 1521 return true; 1522} 1523 1524static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn, 1525 GVecGen2iFn *vecfn) 1526{ 1527 TCGv_ptr qd; 1528 uint64_t imm; 1529 1530 if (!dc_isar_feature(aa32_mve, s) || 1531 !mve_check_qreg_bank(s, a->qd) || 1532 !fn) { 1533 return false; 1534 } 1535 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1536 return true; 1537 } 1538 1539 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1540 1541 if (vecfn && mve_no_predication(s)) { 1542 vecfn(MO_64, mve_qreg_offset(a->qd), mve_qreg_offset(a->qd), 1543 imm, 16, 16); 1544 } else { 1545 qd = mve_qreg_ptr(a->qd); 1546 fn(cpu_env, qd, tcg_constant_i64(imm)); 1547 tcg_temp_free_ptr(qd); 1548 } 1549 mve_update_eci(s); 1550 return true; 1551} 1552 1553static void gen_gvec_vmovi(unsigned vece, uint32_t dofs, uint32_t aofs, 1554 int64_t c, uint32_t oprsz, uint32_t maxsz) 1555{ 1556 tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, c); 1557} 1558 1559static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1560{ 1561 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1562 MVEGenOneOpImmFn *fn; 1563 GVecGen2iFn *vecfn; 1564 1565 if ((a->cmode & 1) && a->cmode < 12) { 1566 if (a->op) { 1567 /* 1568 * For op=1, the immediate will be inverted by asimd_imm_const(), 1569 * so the VBIC becomes a logical AND operation. 1570 */ 1571 fn = gen_helper_mve_vandi; 1572 vecfn = tcg_gen_gvec_andi; 1573 } else { 1574 fn = gen_helper_mve_vorri; 1575 vecfn = tcg_gen_gvec_ori; 1576 } 1577 } else { 1578 /* There is one unallocated cmode/op combination in this space */ 1579 if (a->cmode == 15 && a->op == 1) { 1580 return false; 1581 } 1582 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1583 fn = gen_helper_mve_vmovi; 1584 vecfn = gen_gvec_vmovi; 1585 } 1586 return do_1imm(s, a, fn, vecfn); 1587} 1588 1589static bool do_2shift_vec(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1590 bool negateshift, GVecGen2iFn vecfn) 1591{ 1592 TCGv_ptr qd, qm; 1593 int shift = a->shift; 1594 1595 if (!dc_isar_feature(aa32_mve, s) || 1596 !mve_check_qreg_bank(s, a->qd | a->qm) || 1597 !fn) { 1598 return false; 1599 } 1600 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1601 return true; 1602 } 1603 1604 /* 1605 * When we handle a right shift insn using a left-shift helper 1606 * which permits a negative shift count to indicate a right-shift, 1607 * we must negate the shift count. 1608 */ 1609 if (negateshift) { 1610 shift = -shift; 1611 } 1612 1613 if (vecfn && mve_no_predication(s)) { 1614 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 1615 shift, 16, 16); 1616 } else { 1617 qd = mve_qreg_ptr(a->qd); 1618 qm = mve_qreg_ptr(a->qm); 1619 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1620 tcg_temp_free_ptr(qd); 1621 tcg_temp_free_ptr(qm); 1622 } 1623 mve_update_eci(s); 1624 return true; 1625} 1626 1627static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1628 bool negateshift) 1629{ 1630 return do_2shift_vec(s, a, fn, negateshift, NULL); 1631} 1632 1633#define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN) \ 1634 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1635 { \ 1636 static MVEGenTwoOpShiftFn * const fns[] = { \ 1637 gen_helper_mve_##FN##b, \ 1638 gen_helper_mve_##FN##h, \ 1639 gen_helper_mve_##FN##w, \ 1640 NULL, \ 1641 }; \ 1642 return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN); \ 1643 } 1644 1645#define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1646 DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL) 1647 1648static void do_gvec_shri_s(unsigned vece, uint32_t dofs, uint32_t aofs, 1649 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1650{ 1651 /* 1652 * We get here with a negated shift count, and we must handle 1653 * shifts by the element size, which tcg_gen_gvec_sari() does not do. 1654 */ 1655 shift = -shift; 1656 if (shift == (8 << vece)) { 1657 shift--; 1658 } 1659 tcg_gen_gvec_sari(vece, dofs, aofs, shift, oprsz, maxsz); 1660} 1661 1662static void do_gvec_shri_u(unsigned vece, uint32_t dofs, uint32_t aofs, 1663 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1664{ 1665 /* 1666 * We get here with a negated shift count, and we must handle 1667 * shifts by the element size, which tcg_gen_gvec_shri() does not do. 1668 */ 1669 shift = -shift; 1670 if (shift == (8 << vece)) { 1671 tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, 0); 1672 } else { 1673 tcg_gen_gvec_shri(vece, dofs, aofs, shift, oprsz, maxsz); 1674 } 1675} 1676 1677DO_2SHIFT_VEC(VSHLI, vshli_u, false, tcg_gen_gvec_shli) 1678DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1679DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1680DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1681/* These right shifts use a left-shift helper with negated shift count */ 1682DO_2SHIFT_VEC(VSHRI_S, vshli_s, true, do_gvec_shri_s) 1683DO_2SHIFT_VEC(VSHRI_U, vshli_u, true, do_gvec_shri_u) 1684DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1685DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1686 1687DO_2SHIFT_VEC(VSRI, vsri, false, gen_gvec_sri) 1688DO_2SHIFT_VEC(VSLI, vsli, false, gen_gvec_sli) 1689 1690#define DO_2SHIFT_FP(INSN, FN) \ 1691 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1692 { \ 1693 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1694 return false; \ 1695 } \ 1696 return do_2shift(s, a, gen_helper_mve_##FN, false); \ 1697 } 1698 1699DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh) 1700DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh) 1701DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs) 1702DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu) 1703DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf) 1704DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf) 1705DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs) 1706DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu) 1707 1708static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1709 MVEGenTwoOpShiftFn *fn) 1710{ 1711 TCGv_ptr qda; 1712 TCGv_i32 rm; 1713 1714 if (!dc_isar_feature(aa32_mve, s) || 1715 !mve_check_qreg_bank(s, a->qda) || 1716 a->rm == 13 || a->rm == 15 || !fn) { 1717 /* Rm cases are UNPREDICTABLE */ 1718 return false; 1719 } 1720 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1721 return true; 1722 } 1723 1724 qda = mve_qreg_ptr(a->qda); 1725 rm = load_reg(s, a->rm); 1726 fn(cpu_env, qda, qda, rm); 1727 tcg_temp_free_ptr(qda); 1728 tcg_temp_free_i32(rm); 1729 mve_update_eci(s); 1730 return true; 1731} 1732 1733#define DO_2SHIFT_SCALAR(INSN, FN) \ 1734 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1735 { \ 1736 static MVEGenTwoOpShiftFn * const fns[] = { \ 1737 gen_helper_mve_##FN##b, \ 1738 gen_helper_mve_##FN##h, \ 1739 gen_helper_mve_##FN##w, \ 1740 NULL, \ 1741 }; \ 1742 return do_2shift_scalar(s, a, fns[a->size]); \ 1743 } 1744 1745DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1746DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1747DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1748DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1749DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1750DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1751DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1752DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1753 1754#define DO_VSHLL(INSN, FN) \ 1755 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1756 { \ 1757 static MVEGenTwoOpShiftFn * const fns[] = { \ 1758 gen_helper_mve_##FN##b, \ 1759 gen_helper_mve_##FN##h, \ 1760 }; \ 1761 return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN); \ 1762 } 1763 1764/* 1765 * For the VSHLL vector helpers, the vece is the size of the input 1766 * (ie MO_8 or MO_16); the helpers want to work in the output size. 1767 * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.) 1768 */ 1769static void do_gvec_vshllbs(unsigned vece, uint32_t dofs, uint32_t aofs, 1770 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1771{ 1772 unsigned ovece = vece + 1; 1773 unsigned ibits = vece == MO_8 ? 8 : 16; 1774 tcg_gen_gvec_shli(ovece, dofs, aofs, ibits, oprsz, maxsz); 1775 tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); 1776} 1777 1778static void do_gvec_vshllbu(unsigned vece, uint32_t dofs, uint32_t aofs, 1779 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1780{ 1781 unsigned ovece = vece + 1; 1782 tcg_gen_gvec_andi(ovece, dofs, aofs, 1783 ovece == MO_16 ? 0xff : 0xffff, oprsz, maxsz); 1784 tcg_gen_gvec_shli(ovece, dofs, dofs, shift, oprsz, maxsz); 1785} 1786 1787static void do_gvec_vshllts(unsigned vece, uint32_t dofs, uint32_t aofs, 1788 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1789{ 1790 unsigned ovece = vece + 1; 1791 unsigned ibits = vece == MO_8 ? 8 : 16; 1792 if (shift == 0) { 1793 tcg_gen_gvec_sari(ovece, dofs, aofs, ibits, oprsz, maxsz); 1794 } else { 1795 tcg_gen_gvec_andi(ovece, dofs, aofs, 1796 ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz); 1797 tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); 1798 } 1799} 1800 1801static void do_gvec_vshlltu(unsigned vece, uint32_t dofs, uint32_t aofs, 1802 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1803{ 1804 unsigned ovece = vece + 1; 1805 unsigned ibits = vece == MO_8 ? 8 : 16; 1806 if (shift == 0) { 1807 tcg_gen_gvec_shri(ovece, dofs, aofs, ibits, oprsz, maxsz); 1808 } else { 1809 tcg_gen_gvec_andi(ovece, dofs, aofs, 1810 ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz); 1811 tcg_gen_gvec_shri(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); 1812 } 1813} 1814 1815DO_VSHLL(VSHLL_BS, vshllbs) 1816DO_VSHLL(VSHLL_BU, vshllbu) 1817DO_VSHLL(VSHLL_TS, vshllts) 1818DO_VSHLL(VSHLL_TU, vshlltu) 1819 1820#define DO_2SHIFT_N(INSN, FN) \ 1821 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1822 { \ 1823 static MVEGenTwoOpShiftFn * const fns[] = { \ 1824 gen_helper_mve_##FN##b, \ 1825 gen_helper_mve_##FN##h, \ 1826 }; \ 1827 return do_2shift(s, a, fns[a->size], false); \ 1828 } 1829 1830DO_2SHIFT_N(VSHRNB, vshrnb) 1831DO_2SHIFT_N(VSHRNT, vshrnt) 1832DO_2SHIFT_N(VRSHRNB, vrshrnb) 1833DO_2SHIFT_N(VRSHRNT, vrshrnt) 1834DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1835DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1836DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1837DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1838DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1839DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1840DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1841DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1842DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1843DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1844DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1845DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1846 1847static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1848{ 1849 /* 1850 * Whole Vector Left Shift with Carry. The carry is taken 1851 * from a general purpose register and written back there. 1852 * An imm of 0 means "shift by 32". 1853 */ 1854 TCGv_ptr qd; 1855 TCGv_i32 rdm; 1856 1857 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1858 return false; 1859 } 1860 if (a->rdm == 13 || a->rdm == 15) { 1861 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1862 return false; 1863 } 1864 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1865 return true; 1866 } 1867 1868 qd = mve_qreg_ptr(a->qd); 1869 rdm = load_reg(s, a->rdm); 1870 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1871 store_reg(s, a->rdm, rdm); 1872 tcg_temp_free_ptr(qd); 1873 mve_update_eci(s); 1874 return true; 1875} 1876 1877static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1878{ 1879 TCGv_ptr qd; 1880 TCGv_i32 rn; 1881 1882 /* 1883 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1884 * This fills the vector with elements of successively increasing 1885 * or decreasing values, starting from Rn. 1886 */ 1887 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1888 return false; 1889 } 1890 if (a->size == MO_64) { 1891 /* size 0b11 is another encoding */ 1892 return false; 1893 } 1894 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1895 return true; 1896 } 1897 1898 qd = mve_qreg_ptr(a->qd); 1899 rn = load_reg(s, a->rn); 1900 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1901 store_reg(s, a->rn, rn); 1902 tcg_temp_free_ptr(qd); 1903 mve_update_eci(s); 1904 return true; 1905} 1906 1907static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1908{ 1909 TCGv_ptr qd; 1910 TCGv_i32 rn, rm; 1911 1912 /* 1913 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1914 * This fills the vector with elements of successively increasing 1915 * or decreasing values, starting from Rn. Rm specifies a point where 1916 * the count wraps back around to 0. The updated offset is written back 1917 * to Rn. 1918 */ 1919 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1920 return false; 1921 } 1922 if (!fn || a->rm == 13 || a->rm == 15) { 1923 /* 1924 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1925 * Rm == 13 is VIWDUP, VDWDUP. 1926 */ 1927 return false; 1928 } 1929 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1930 return true; 1931 } 1932 1933 qd = mve_qreg_ptr(a->qd); 1934 rn = load_reg(s, a->rn); 1935 rm = load_reg(s, a->rm); 1936 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1937 store_reg(s, a->rn, rn); 1938 tcg_temp_free_ptr(qd); 1939 tcg_temp_free_i32(rm); 1940 mve_update_eci(s); 1941 return true; 1942} 1943 1944static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1945{ 1946 static MVEGenVIDUPFn * const fns[] = { 1947 gen_helper_mve_vidupb, 1948 gen_helper_mve_viduph, 1949 gen_helper_mve_vidupw, 1950 NULL, 1951 }; 1952 return do_vidup(s, a, fns[a->size]); 1953} 1954 1955static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1956{ 1957 static MVEGenVIDUPFn * const fns[] = { 1958 gen_helper_mve_vidupb, 1959 gen_helper_mve_viduph, 1960 gen_helper_mve_vidupw, 1961 NULL, 1962 }; 1963 /* VDDUP is just like VIDUP but with a negative immediate */ 1964 a->imm = -a->imm; 1965 return do_vidup(s, a, fns[a->size]); 1966} 1967 1968static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1969{ 1970 static MVEGenVIWDUPFn * const fns[] = { 1971 gen_helper_mve_viwdupb, 1972 gen_helper_mve_viwduph, 1973 gen_helper_mve_viwdupw, 1974 NULL, 1975 }; 1976 return do_viwdup(s, a, fns[a->size]); 1977} 1978 1979static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1980{ 1981 static MVEGenVIWDUPFn * const fns[] = { 1982 gen_helper_mve_vdwdupb, 1983 gen_helper_mve_vdwduph, 1984 gen_helper_mve_vdwdupw, 1985 NULL, 1986 }; 1987 return do_viwdup(s, a, fns[a->size]); 1988} 1989 1990static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1991{ 1992 TCGv_ptr qn, qm; 1993 1994 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1995 !fn) { 1996 return false; 1997 } 1998 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1999 return true; 2000 } 2001 2002 qn = mve_qreg_ptr(a->qn); 2003 qm = mve_qreg_ptr(a->qm); 2004 fn(cpu_env, qn, qm); 2005 tcg_temp_free_ptr(qn); 2006 tcg_temp_free_ptr(qm); 2007 if (a->mask) { 2008 /* VPT */ 2009 gen_vpst(s, a->mask); 2010 } 2011 /* This insn updates predication bits */ 2012 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2013 mve_update_eci(s); 2014 return true; 2015} 2016 2017static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 2018 MVEGenScalarCmpFn *fn) 2019{ 2020 TCGv_ptr qn; 2021 TCGv_i32 rm; 2022 2023 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 2024 return false; 2025 } 2026 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2027 return true; 2028 } 2029 2030 qn = mve_qreg_ptr(a->qn); 2031 if (a->rm == 15) { 2032 /* Encoding Rm=0b1111 means "constant zero" */ 2033 rm = tcg_constant_i32(0); 2034 } else { 2035 rm = load_reg(s, a->rm); 2036 } 2037 fn(cpu_env, qn, rm); 2038 tcg_temp_free_ptr(qn); 2039 tcg_temp_free_i32(rm); 2040 if (a->mask) { 2041 /* VPT */ 2042 gen_vpst(s, a->mask); 2043 } 2044 /* This insn updates predication bits */ 2045 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2046 mve_update_eci(s); 2047 return true; 2048} 2049 2050#define DO_VCMP(INSN, FN) \ 2051 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 2052 { \ 2053 static MVEGenCmpFn * const fns[] = { \ 2054 gen_helper_mve_##FN##b, \ 2055 gen_helper_mve_##FN##h, \ 2056 gen_helper_mve_##FN##w, \ 2057 NULL, \ 2058 }; \ 2059 return do_vcmp(s, a, fns[a->size]); \ 2060 } \ 2061 static bool trans_##INSN##_scalar(DisasContext *s, \ 2062 arg_vcmp_scalar *a) \ 2063 { \ 2064 static MVEGenScalarCmpFn * const fns[] = { \ 2065 gen_helper_mve_##FN##_scalarb, \ 2066 gen_helper_mve_##FN##_scalarh, \ 2067 gen_helper_mve_##FN##_scalarw, \ 2068 NULL, \ 2069 }; \ 2070 return do_vcmp_scalar(s, a, fns[a->size]); \ 2071 } 2072 2073DO_VCMP(VCMPEQ, vcmpeq) 2074DO_VCMP(VCMPNE, vcmpne) 2075DO_VCMP(VCMPCS, vcmpcs) 2076DO_VCMP(VCMPHI, vcmphi) 2077DO_VCMP(VCMPGE, vcmpge) 2078DO_VCMP(VCMPLT, vcmplt) 2079DO_VCMP(VCMPGT, vcmpgt) 2080DO_VCMP(VCMPLE, vcmple) 2081 2082#define DO_VCMP_FP(INSN, FN) \ 2083 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 2084 { \ 2085 static MVEGenCmpFn * const fns[] = { \ 2086 NULL, \ 2087 gen_helper_mve_##FN##h, \ 2088 gen_helper_mve_##FN##s, \ 2089 NULL, \ 2090 }; \ 2091 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2092 return false; \ 2093 } \ 2094 return do_vcmp(s, a, fns[a->size]); \ 2095 } \ 2096 static bool trans_##INSN##_scalar(DisasContext *s, \ 2097 arg_vcmp_scalar *a) \ 2098 { \ 2099 static MVEGenScalarCmpFn * const fns[] = { \ 2100 NULL, \ 2101 gen_helper_mve_##FN##_scalarh, \ 2102 gen_helper_mve_##FN##_scalars, \ 2103 NULL, \ 2104 }; \ 2105 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2106 return false; \ 2107 } \ 2108 return do_vcmp_scalar(s, a, fns[a->size]); \ 2109 } 2110 2111DO_VCMP_FP(VCMPEQ_fp, vfcmpeq) 2112DO_VCMP_FP(VCMPNE_fp, vfcmpne) 2113DO_VCMP_FP(VCMPGE_fp, vfcmpge) 2114DO_VCMP_FP(VCMPLT_fp, vfcmplt) 2115DO_VCMP_FP(VCMPGT_fp, vfcmpgt) 2116DO_VCMP_FP(VCMPLE_fp, vfcmple) 2117 2118static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 2119{ 2120 /* 2121 * MIN/MAX operations across a vector: compute the min or 2122 * max of the initial value in a general purpose register 2123 * and all the elements in the vector, and store it back 2124 * into the general purpose register. 2125 */ 2126 TCGv_ptr qm; 2127 TCGv_i32 rda; 2128 2129 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 2130 !fn || a->rda == 13 || a->rda == 15) { 2131 /* Rda cases are UNPREDICTABLE */ 2132 return false; 2133 } 2134 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2135 return true; 2136 } 2137 2138 qm = mve_qreg_ptr(a->qm); 2139 rda = load_reg(s, a->rda); 2140 fn(rda, cpu_env, qm, rda); 2141 store_reg(s, a->rda, rda); 2142 tcg_temp_free_ptr(qm); 2143 mve_update_eci(s); 2144 return true; 2145} 2146 2147#define DO_VMAXV(INSN, FN) \ 2148 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 2149 { \ 2150 static MVEGenVADDVFn * const fns[] = { \ 2151 gen_helper_mve_##FN##b, \ 2152 gen_helper_mve_##FN##h, \ 2153 gen_helper_mve_##FN##w, \ 2154 NULL, \ 2155 }; \ 2156 return do_vmaxv(s, a, fns[a->size]); \ 2157 } 2158 2159DO_VMAXV(VMAXV_S, vmaxvs) 2160DO_VMAXV(VMAXV_U, vmaxvu) 2161DO_VMAXV(VMAXAV, vmaxav) 2162DO_VMAXV(VMINV_S, vminvs) 2163DO_VMAXV(VMINV_U, vminvu) 2164DO_VMAXV(VMINAV, vminav) 2165 2166#define DO_VMAXV_FP(INSN, FN) \ 2167 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 2168 { \ 2169 static MVEGenVADDVFn * const fns[] = { \ 2170 NULL, \ 2171 gen_helper_mve_##FN##h, \ 2172 gen_helper_mve_##FN##s, \ 2173 NULL, \ 2174 }; \ 2175 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2176 return false; \ 2177 } \ 2178 return do_vmaxv(s, a, fns[a->size]); \ 2179 } 2180 2181DO_VMAXV_FP(VMAXNMV, vmaxnmv) 2182DO_VMAXV_FP(VMINNMV, vminnmv) 2183DO_VMAXV_FP(VMAXNMAV, vmaxnmav) 2184DO_VMAXV_FP(VMINNMAV, vminnmav) 2185 2186static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 2187{ 2188 /* Absolute difference accumulated across vector */ 2189 TCGv_ptr qn, qm; 2190 TCGv_i32 rda; 2191 2192 if (!dc_isar_feature(aa32_mve, s) || 2193 !mve_check_qreg_bank(s, a->qm | a->qn) || 2194 !fn || a->rda == 13 || a->rda == 15) { 2195 /* Rda cases are UNPREDICTABLE */ 2196 return false; 2197 } 2198 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2199 return true; 2200 } 2201 2202 qm = mve_qreg_ptr(a->qm); 2203 qn = mve_qreg_ptr(a->qn); 2204 rda = load_reg(s, a->rda); 2205 fn(rda, cpu_env, qn, qm, rda); 2206 store_reg(s, a->rda, rda); 2207 tcg_temp_free_ptr(qm); 2208 tcg_temp_free_ptr(qn); 2209 mve_update_eci(s); 2210 return true; 2211} 2212 2213#define DO_VABAV(INSN, FN) \ 2214 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 2215 { \ 2216 static MVEGenVABAVFn * const fns[] = { \ 2217 gen_helper_mve_##FN##b, \ 2218 gen_helper_mve_##FN##h, \ 2219 gen_helper_mve_##FN##w, \ 2220 NULL, \ 2221 }; \ 2222 return do_vabav(s, a, fns[a->size]); \ 2223 } 2224 2225DO_VABAV(VABAV_S, vabavs) 2226DO_VABAV(VABAV_U, vabavu) 2227 2228static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2229{ 2230 /* 2231 * VMOV two 32-bit vector lanes to two general-purpose registers. 2232 * This insn is not predicated but it is subject to beat-wise 2233 * execution if it is not in an IT block. For us this means 2234 * only that if PSR.ECI says we should not be executing the beat 2235 * corresponding to the lane of the vector register being accessed 2236 * then we should skip perfoming the move, and that we need to do 2237 * the usual check for bad ECI state and advance of ECI state. 2238 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2239 */ 2240 TCGv_i32 tmp; 2241 int vd; 2242 2243 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2244 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 || 2245 a->rt == a->rt2) { 2246 /* Rt/Rt2 cases are UNPREDICTABLE */ 2247 return false; 2248 } 2249 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2250 return true; 2251 } 2252 2253 /* Convert Qreg index to Dreg for read_neon_element32() etc */ 2254 vd = a->qd * 2; 2255 2256 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2257 tmp = tcg_temp_new_i32(); 2258 read_neon_element32(tmp, vd, a->idx, MO_32); 2259 store_reg(s, a->rt, tmp); 2260 } 2261 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2262 tmp = tcg_temp_new_i32(); 2263 read_neon_element32(tmp, vd + 1, a->idx, MO_32); 2264 store_reg(s, a->rt2, tmp); 2265 } 2266 2267 mve_update_and_store_eci(s); 2268 return true; 2269} 2270 2271static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2272{ 2273 /* 2274 * VMOV two general-purpose registers to two 32-bit vector lanes. 2275 * This insn is not predicated but it is subject to beat-wise 2276 * execution if it is not in an IT block. For us this means 2277 * only that if PSR.ECI says we should not be executing the beat 2278 * corresponding to the lane of the vector register being accessed 2279 * then we should skip perfoming the move, and that we need to do 2280 * the usual check for bad ECI state and advance of ECI state. 2281 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2282 */ 2283 TCGv_i32 tmp; 2284 int vd; 2285 2286 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2287 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) { 2288 /* Rt/Rt2 cases are UNPREDICTABLE */ 2289 return false; 2290 } 2291 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2292 return true; 2293 } 2294 2295 /* Convert Qreg idx to Dreg for read_neon_element32() etc */ 2296 vd = a->qd * 2; 2297 2298 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2299 tmp = load_reg(s, a->rt); 2300 write_neon_element32(tmp, vd, a->idx, MO_32); 2301 tcg_temp_free_i32(tmp); 2302 } 2303 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2304 tmp = load_reg(s, a->rt2); 2305 write_neon_element32(tmp, vd + 1, a->idx, MO_32); 2306 tcg_temp_free_i32(tmp); 2307 } 2308 2309 mve_update_and_store_eci(s); 2310 return true; 2311}