translate-vfp.c (94448B)
1/* 2 * ARM translation: AArch32 VFP instructions 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2005-2007 CodeSourcery 6 * Copyright (c) 2007 OpenedHand, Ltd. 7 * Copyright (c) 2019 Linaro, Ltd. 8 * 9 * This library is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * This library is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 21 */ 22 23#include "qemu/osdep.h" 24#include "tcg/tcg-op.h" 25#include "tcg/tcg-op-gvec.h" 26#include "exec/exec-all.h" 27#include "exec/gen-icount.h" 28#include "translate.h" 29#include "translate-a32.h" 30 31/* Include the generated VFP decoder */ 32#include "decode-vfp.c.inc" 33#include "decode-vfp-uncond.c.inc" 34 35static inline void vfp_load_reg64(TCGv_i64 var, int reg) 36{ 37 tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg)); 38} 39 40static inline void vfp_store_reg64(TCGv_i64 var, int reg) 41{ 42 tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg)); 43} 44 45static inline void vfp_load_reg32(TCGv_i32 var, int reg) 46{ 47 tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg)); 48} 49 50static inline void vfp_store_reg32(TCGv_i32 var, int reg) 51{ 52 tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg)); 53} 54 55/* 56 * The imm8 encodes the sign bit, enough bits to represent an exponent in 57 * the range 01....1xx to 10....0xx, and the most significant 4 bits of 58 * the mantissa; see VFPExpandImm() in the v8 ARM ARM. 59 */ 60uint64_t vfp_expand_imm(int size, uint8_t imm8) 61{ 62 uint64_t imm; 63 64 switch (size) { 65 case MO_64: 66 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | 67 (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) | 68 extract32(imm8, 0, 6); 69 imm <<= 48; 70 break; 71 case MO_32: 72 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | 73 (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) | 74 (extract32(imm8, 0, 6) << 3); 75 imm <<= 16; 76 break; 77 case MO_16: 78 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | 79 (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) | 80 (extract32(imm8, 0, 6) << 6); 81 break; 82 default: 83 g_assert_not_reached(); 84 } 85 return imm; 86} 87 88/* 89 * Return the offset of a 16-bit half of the specified VFP single-precision 90 * register. If top is true, returns the top 16 bits; otherwise the bottom 91 * 16 bits. 92 */ 93static inline long vfp_f16_offset(unsigned reg, bool top) 94{ 95 long offs = vfp_reg_offset(false, reg); 96#ifdef HOST_WORDS_BIGENDIAN 97 if (!top) { 98 offs += 2; 99 } 100#else 101 if (top) { 102 offs += 2; 103 } 104#endif 105 return offs; 106} 107 108/* 109 * Generate code for M-profile lazy FP state preservation if needed; 110 * this corresponds to the pseudocode PreserveFPState() function. 111 */ 112static void gen_preserve_fp_state(DisasContext *s, bool skip_context_update) 113{ 114 if (s->v7m_lspact) { 115 /* 116 * Lazy state saving affects external memory and also the NVIC, 117 * so we must mark it as an IO operation for icount (and cause 118 * this to be the last insn in the TB). 119 */ 120 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { 121 s->base.is_jmp = DISAS_UPDATE_EXIT; 122 gen_io_start(); 123 } 124 gen_helper_v7m_preserve_fp_state(cpu_env); 125 /* 126 * If the preserve_fp_state helper doesn't throw an exception 127 * then it will clear LSPACT; we don't need to repeat this for 128 * any further FP insns in this TB. 129 */ 130 s->v7m_lspact = false; 131 /* 132 * The helper might have zeroed VPR, so we do not know the 133 * correct value for the MVE_NO_PRED TB flag any more. 134 * If we're about to create a new fp context then that 135 * will precisely determine the MVE_NO_PRED value (see 136 * gen_update_fp_context()). Otherwise, we must: 137 * - set s->mve_no_pred to false, so this instruction 138 * is generated to use helper functions 139 * - end the TB now, without chaining to the next TB 140 */ 141 if (skip_context_update || !s->v7m_new_fp_ctxt_needed) { 142 s->mve_no_pred = false; 143 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 144 } 145 } 146} 147 148/* 149 * Generate code for M-profile FP context handling: update the 150 * ownership of the FP context, and create a new context if 151 * necessary. This corresponds to the parts of the pseudocode 152 * ExecuteFPCheck() after the inital PreserveFPState() call. 153 */ 154static void gen_update_fp_context(DisasContext *s) 155{ 156 /* Update ownership of FP context: set FPCCR.S to match current state */ 157 if (s->v8m_fpccr_s_wrong) { 158 TCGv_i32 tmp; 159 160 tmp = load_cpu_field(v7m.fpccr[M_REG_S]); 161 if (s->v8m_secure) { 162 tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK); 163 } else { 164 tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK); 165 } 166 store_cpu_field(tmp, v7m.fpccr[M_REG_S]); 167 /* Don't need to do this for any further FP insns in this TB */ 168 s->v8m_fpccr_s_wrong = false; 169 } 170 171 if (s->v7m_new_fp_ctxt_needed) { 172 /* 173 * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA, 174 * the FPSCR, and VPR. 175 */ 176 TCGv_i32 control, fpscr; 177 uint32_t bits = R_V7M_CONTROL_FPCA_MASK; 178 179 fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]); 180 gen_helper_vfp_set_fpscr(cpu_env, fpscr); 181 tcg_temp_free_i32(fpscr); 182 if (dc_isar_feature(aa32_mve, s)) { 183 TCGv_i32 z32 = tcg_const_i32(0); 184 store_cpu_field(z32, v7m.vpr); 185 } 186 /* 187 * We just updated the FPSCR and VPR. Some of this state is cached 188 * in the MVE_NO_PRED TB flag. We want to avoid having to end the 189 * TB here, which means we need the new value of the MVE_NO_PRED 190 * flag to be exactly known here and the same for all executions. 191 * Luckily FPDSCR.LTPSIZE is always constant 4 and the VPR is 192 * always set to 0, so the new MVE_NO_PRED flag is always 1 193 * if and only if we have MVE. 194 * 195 * (The other FPSCR state cached in TB flags is VECLEN and VECSTRIDE, 196 * but those do not exist for M-profile, so are not relevant here.) 197 */ 198 s->mve_no_pred = dc_isar_feature(aa32_mve, s); 199 200 if (s->v8m_secure) { 201 bits |= R_V7M_CONTROL_SFPA_MASK; 202 } 203 control = load_cpu_field(v7m.control[M_REG_S]); 204 tcg_gen_ori_i32(control, control, bits); 205 store_cpu_field(control, v7m.control[M_REG_S]); 206 /* Don't need to do this for any further FP insns in this TB */ 207 s->v7m_new_fp_ctxt_needed = false; 208 } 209} 210 211/* 212 * Check that VFP access is enabled, A-profile specific version. 213 * 214 * If VFP is enabled, return true. If not, emit code to generate an 215 * appropriate exception and return false. 216 * The ignore_vfp_enabled argument specifies that we should ignore 217 * whether VFP is enabled via FPEXC.EN: this should be true for FMXR/FMRX 218 * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns. 219 */ 220static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled) 221{ 222 if (s->fp_excp_el) { 223 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, 224 syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); 225 return false; 226 } 227 228 if (!s->vfp_enabled && !ignore_vfp_enabled) { 229 assert(!arm_dc_feature(s, ARM_FEATURE_M)); 230 unallocated_encoding(s); 231 return false; 232 } 233 return true; 234} 235 236/* 237 * Check that VFP access is enabled, M-profile specific version. 238 * 239 * If VFP is enabled, do the necessary M-profile lazy-FP handling and then 240 * return true. If not, emit code to generate an appropriate exception and 241 * return false. 242 * skip_context_update is true to skip the "update FP context" part of this. 243 */ 244bool vfp_access_check_m(DisasContext *s, bool skip_context_update) 245{ 246 if (s->fp_excp_el) { 247 /* 248 * M-profile mostly catches the "FPU disabled" case early, in 249 * disas_m_nocp(), but a few insns (eg LCTP, WLSTP, DLSTP) 250 * which do coprocessor-checks are outside the large ranges of 251 * the encoding space handled by the patterns in m-nocp.decode, 252 * and for them we may need to raise NOCP here. 253 */ 254 gen_exception_insn(s, s->pc_curr, EXCP_NOCP, 255 syn_uncategorized(), s->fp_excp_el); 256 return false; 257 } 258 259 /* Handle M-profile lazy FP state mechanics */ 260 261 /* Trigger lazy-state preservation if necessary */ 262 gen_preserve_fp_state(s, skip_context_update); 263 264 if (!skip_context_update) { 265 /* Update ownership of FP context and create new FP context if needed */ 266 gen_update_fp_context(s); 267 } 268 269 return true; 270} 271 272/* 273 * The most usual kind of VFP access check, for everything except 274 * FMXR/FMRX to the always-available special registers. 275 */ 276bool vfp_access_check(DisasContext *s) 277{ 278 if (arm_dc_feature(s, ARM_FEATURE_M)) { 279 return vfp_access_check_m(s, false); 280 } else { 281 return vfp_access_check_a(s, false); 282 } 283} 284 285static bool trans_VSEL(DisasContext *s, arg_VSEL *a) 286{ 287 uint32_t rd, rn, rm; 288 int sz = a->sz; 289 290 if (!dc_isar_feature(aa32_vsel, s)) { 291 return false; 292 } 293 294 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) { 295 return false; 296 } 297 298 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) { 299 return false; 300 } 301 302 /* UNDEF accesses to D16-D31 if they don't exist */ 303 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && 304 ((a->vm | a->vn | a->vd) & 0x10)) { 305 return false; 306 } 307 308 rd = a->vd; 309 rn = a->vn; 310 rm = a->vm; 311 312 if (!vfp_access_check(s)) { 313 return true; 314 } 315 316 if (sz == 3) { 317 TCGv_i64 frn, frm, dest; 318 TCGv_i64 tmp, zero, zf, nf, vf; 319 320 zero = tcg_const_i64(0); 321 322 frn = tcg_temp_new_i64(); 323 frm = tcg_temp_new_i64(); 324 dest = tcg_temp_new_i64(); 325 326 zf = tcg_temp_new_i64(); 327 nf = tcg_temp_new_i64(); 328 vf = tcg_temp_new_i64(); 329 330 tcg_gen_extu_i32_i64(zf, cpu_ZF); 331 tcg_gen_ext_i32_i64(nf, cpu_NF); 332 tcg_gen_ext_i32_i64(vf, cpu_VF); 333 334 vfp_load_reg64(frn, rn); 335 vfp_load_reg64(frm, rm); 336 switch (a->cc) { 337 case 0: /* eq: Z */ 338 tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero, 339 frn, frm); 340 break; 341 case 1: /* vs: V */ 342 tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero, 343 frn, frm); 344 break; 345 case 2: /* ge: N == V -> N ^ V == 0 */ 346 tmp = tcg_temp_new_i64(); 347 tcg_gen_xor_i64(tmp, vf, nf); 348 tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, 349 frn, frm); 350 tcg_temp_free_i64(tmp); 351 break; 352 case 3: /* gt: !Z && N == V */ 353 tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero, 354 frn, frm); 355 tmp = tcg_temp_new_i64(); 356 tcg_gen_xor_i64(tmp, vf, nf); 357 tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, 358 dest, frm); 359 tcg_temp_free_i64(tmp); 360 break; 361 } 362 vfp_store_reg64(dest, rd); 363 tcg_temp_free_i64(frn); 364 tcg_temp_free_i64(frm); 365 tcg_temp_free_i64(dest); 366 367 tcg_temp_free_i64(zf); 368 tcg_temp_free_i64(nf); 369 tcg_temp_free_i64(vf); 370 371 tcg_temp_free_i64(zero); 372 } else { 373 TCGv_i32 frn, frm, dest; 374 TCGv_i32 tmp, zero; 375 376 zero = tcg_const_i32(0); 377 378 frn = tcg_temp_new_i32(); 379 frm = tcg_temp_new_i32(); 380 dest = tcg_temp_new_i32(); 381 vfp_load_reg32(frn, rn); 382 vfp_load_reg32(frm, rm); 383 switch (a->cc) { 384 case 0: /* eq: Z */ 385 tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero, 386 frn, frm); 387 break; 388 case 1: /* vs: V */ 389 tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero, 390 frn, frm); 391 break; 392 case 2: /* ge: N == V -> N ^ V == 0 */ 393 tmp = tcg_temp_new_i32(); 394 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); 395 tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, 396 frn, frm); 397 tcg_temp_free_i32(tmp); 398 break; 399 case 3: /* gt: !Z && N == V */ 400 tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero, 401 frn, frm); 402 tmp = tcg_temp_new_i32(); 403 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); 404 tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, 405 dest, frm); 406 tcg_temp_free_i32(tmp); 407 break; 408 } 409 /* For fp16 the top half is always zeroes */ 410 if (sz == 1) { 411 tcg_gen_andi_i32(dest, dest, 0xffff); 412 } 413 vfp_store_reg32(dest, rd); 414 tcg_temp_free_i32(frn); 415 tcg_temp_free_i32(frm); 416 tcg_temp_free_i32(dest); 417 418 tcg_temp_free_i32(zero); 419 } 420 421 return true; 422} 423 424/* 425 * Table for converting the most common AArch32 encoding of 426 * rounding mode to arm_fprounding order (which matches the 427 * common AArch64 order); see ARM ARM pseudocode FPDecodeRM(). 428 */ 429static const uint8_t fp_decode_rm[] = { 430 FPROUNDING_TIEAWAY, 431 FPROUNDING_TIEEVEN, 432 FPROUNDING_POSINF, 433 FPROUNDING_NEGINF, 434}; 435 436static bool trans_VRINT(DisasContext *s, arg_VRINT *a) 437{ 438 uint32_t rd, rm; 439 int sz = a->sz; 440 TCGv_ptr fpst; 441 TCGv_i32 tcg_rmode; 442 int rounding = fp_decode_rm[a->rm]; 443 444 if (!dc_isar_feature(aa32_vrint, s)) { 445 return false; 446 } 447 448 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) { 449 return false; 450 } 451 452 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) { 453 return false; 454 } 455 456 /* UNDEF accesses to D16-D31 if they don't exist */ 457 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && 458 ((a->vm | a->vd) & 0x10)) { 459 return false; 460 } 461 462 rd = a->vd; 463 rm = a->vm; 464 465 if (!vfp_access_check(s)) { 466 return true; 467 } 468 469 if (sz == 1) { 470 fpst = fpstatus_ptr(FPST_FPCR_F16); 471 } else { 472 fpst = fpstatus_ptr(FPST_FPCR); 473 } 474 475 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding)); 476 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 477 478 if (sz == 3) { 479 TCGv_i64 tcg_op; 480 TCGv_i64 tcg_res; 481 tcg_op = tcg_temp_new_i64(); 482 tcg_res = tcg_temp_new_i64(); 483 vfp_load_reg64(tcg_op, rm); 484 gen_helper_rintd(tcg_res, tcg_op, fpst); 485 vfp_store_reg64(tcg_res, rd); 486 tcg_temp_free_i64(tcg_op); 487 tcg_temp_free_i64(tcg_res); 488 } else { 489 TCGv_i32 tcg_op; 490 TCGv_i32 tcg_res; 491 tcg_op = tcg_temp_new_i32(); 492 tcg_res = tcg_temp_new_i32(); 493 vfp_load_reg32(tcg_op, rm); 494 if (sz == 1) { 495 gen_helper_rinth(tcg_res, tcg_op, fpst); 496 } else { 497 gen_helper_rints(tcg_res, tcg_op, fpst); 498 } 499 vfp_store_reg32(tcg_res, rd); 500 tcg_temp_free_i32(tcg_op); 501 tcg_temp_free_i32(tcg_res); 502 } 503 504 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 505 tcg_temp_free_i32(tcg_rmode); 506 507 tcg_temp_free_ptr(fpst); 508 return true; 509} 510 511static bool trans_VCVT(DisasContext *s, arg_VCVT *a) 512{ 513 uint32_t rd, rm; 514 int sz = a->sz; 515 TCGv_ptr fpst; 516 TCGv_i32 tcg_rmode, tcg_shift; 517 int rounding = fp_decode_rm[a->rm]; 518 bool is_signed = a->op; 519 520 if (!dc_isar_feature(aa32_vcvt_dr, s)) { 521 return false; 522 } 523 524 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) { 525 return false; 526 } 527 528 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) { 529 return false; 530 } 531 532 /* UNDEF accesses to D16-D31 if they don't exist */ 533 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 534 return false; 535 } 536 537 rd = a->vd; 538 rm = a->vm; 539 540 if (!vfp_access_check(s)) { 541 return true; 542 } 543 544 if (sz == 1) { 545 fpst = fpstatus_ptr(FPST_FPCR_F16); 546 } else { 547 fpst = fpstatus_ptr(FPST_FPCR); 548 } 549 550 tcg_shift = tcg_const_i32(0); 551 552 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding)); 553 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 554 555 if (sz == 3) { 556 TCGv_i64 tcg_double, tcg_res; 557 TCGv_i32 tcg_tmp; 558 tcg_double = tcg_temp_new_i64(); 559 tcg_res = tcg_temp_new_i64(); 560 tcg_tmp = tcg_temp_new_i32(); 561 vfp_load_reg64(tcg_double, rm); 562 if (is_signed) { 563 gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst); 564 } else { 565 gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst); 566 } 567 tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res); 568 vfp_store_reg32(tcg_tmp, rd); 569 tcg_temp_free_i32(tcg_tmp); 570 tcg_temp_free_i64(tcg_res); 571 tcg_temp_free_i64(tcg_double); 572 } else { 573 TCGv_i32 tcg_single, tcg_res; 574 tcg_single = tcg_temp_new_i32(); 575 tcg_res = tcg_temp_new_i32(); 576 vfp_load_reg32(tcg_single, rm); 577 if (sz == 1) { 578 if (is_signed) { 579 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst); 580 } else { 581 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst); 582 } 583 } else { 584 if (is_signed) { 585 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst); 586 } else { 587 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst); 588 } 589 } 590 vfp_store_reg32(tcg_res, rd); 591 tcg_temp_free_i32(tcg_res); 592 tcg_temp_free_i32(tcg_single); 593 } 594 595 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 596 tcg_temp_free_i32(tcg_rmode); 597 598 tcg_temp_free_i32(tcg_shift); 599 600 tcg_temp_free_ptr(fpst); 601 602 return true; 603} 604 605bool mve_skip_vmov(DisasContext *s, int vn, int index, int size) 606{ 607 /* 608 * In a CPU with MVE, the VMOV (vector lane to general-purpose register) 609 * and VMOV (general-purpose register to vector lane) insns are not 610 * predicated, but they are subject to beatwise execution if they are 611 * not in an IT block. 612 * 613 * Since our implementation always executes all 4 beats in one tick, 614 * this means only that if PSR.ECI says we should not be executing 615 * the beat corresponding to the lane of the vector register being 616 * accessed then we should skip performing the move, and that we need 617 * to do the usual check for bad ECI state and advance of ECI state. 618 * 619 * Note that if PSR.ECI is non-zero then we cannot be in an IT block. 620 * 621 * Return true if this VMOV scalar <-> gpreg should be skipped because 622 * the MVE PSR.ECI state says we skip the beat where the store happens. 623 */ 624 625 /* Calculate the byte offset into Qn which we're going to access */ 626 int ofs = (index << size) + ((vn & 1) * 8); 627 628 if (!dc_isar_feature(aa32_mve, s)) { 629 return false; 630 } 631 632 switch (s->eci) { 633 case ECI_NONE: 634 return false; 635 case ECI_A0: 636 return ofs < 4; 637 case ECI_A0A1: 638 return ofs < 8; 639 case ECI_A0A1A2: 640 case ECI_A0A1A2B0: 641 return ofs < 12; 642 default: 643 g_assert_not_reached(); 644 } 645} 646 647static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a) 648{ 649 /* VMOV scalar to general purpose register */ 650 TCGv_i32 tmp; 651 652 /* 653 * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has 654 * all sizes, whether the CPU has fp or not. 655 */ 656 if (!dc_isar_feature(aa32_mve, s)) { 657 if (a->size == MO_32 658 ? !dc_isar_feature(aa32_fpsp_v2, s) 659 : !arm_dc_feature(s, ARM_FEATURE_NEON)) { 660 return false; 661 } 662 } 663 664 /* UNDEF accesses to D16-D31 if they don't exist */ 665 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) { 666 return false; 667 } 668 669 if (dc_isar_feature(aa32_mve, s)) { 670 if (!mve_eci_check(s)) { 671 return true; 672 } 673 } 674 675 if (!vfp_access_check(s)) { 676 return true; 677 } 678 679 if (!mve_skip_vmov(s, a->vn, a->index, a->size)) { 680 tmp = tcg_temp_new_i32(); 681 read_neon_element32(tmp, a->vn, a->index, 682 a->size | (a->u ? 0 : MO_SIGN)); 683 store_reg(s, a->rt, tmp); 684 } 685 686 if (dc_isar_feature(aa32_mve, s)) { 687 mve_update_and_store_eci(s); 688 } 689 return true; 690} 691 692static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a) 693{ 694 /* VMOV general purpose register to scalar */ 695 TCGv_i32 tmp; 696 697 /* 698 * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has 699 * all sizes, whether the CPU has fp or not. 700 */ 701 if (!dc_isar_feature(aa32_mve, s)) { 702 if (a->size == MO_32 703 ? !dc_isar_feature(aa32_fpsp_v2, s) 704 : !arm_dc_feature(s, ARM_FEATURE_NEON)) { 705 return false; 706 } 707 } 708 709 /* UNDEF accesses to D16-D31 if they don't exist */ 710 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) { 711 return false; 712 } 713 714 if (dc_isar_feature(aa32_mve, s)) { 715 if (!mve_eci_check(s)) { 716 return true; 717 } 718 } 719 720 if (!vfp_access_check(s)) { 721 return true; 722 } 723 724 if (!mve_skip_vmov(s, a->vn, a->index, a->size)) { 725 tmp = load_reg(s, a->rt); 726 write_neon_element32(tmp, a->vn, a->index, a->size); 727 tcg_temp_free_i32(tmp); 728 } 729 730 if (dc_isar_feature(aa32_mve, s)) { 731 mve_update_and_store_eci(s); 732 } 733 return true; 734} 735 736static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 737{ 738 /* VDUP (general purpose register) */ 739 TCGv_i32 tmp; 740 int size, vec_size; 741 742 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 743 return false; 744 } 745 746 /* UNDEF accesses to D16-D31 if they don't exist */ 747 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) { 748 return false; 749 } 750 751 if (a->b && a->e) { 752 return false; 753 } 754 755 if (a->q && (a->vn & 1)) { 756 return false; 757 } 758 759 vec_size = a->q ? 16 : 8; 760 if (a->b) { 761 size = 0; 762 } else if (a->e) { 763 size = 1; 764 } else { 765 size = 2; 766 } 767 768 if (!vfp_access_check(s)) { 769 return true; 770 } 771 772 tmp = load_reg(s, a->rt); 773 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn), 774 vec_size, vec_size, tmp); 775 tcg_temp_free_i32(tmp); 776 777 return true; 778} 779 780static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a) 781{ 782 TCGv_i32 tmp; 783 bool ignore_vfp_enabled = false; 784 785 if (arm_dc_feature(s, ARM_FEATURE_M)) { 786 /* M profile version was already handled in m-nocp.decode */ 787 return false; 788 } 789 790 if (!dc_isar_feature(aa32_fpsp_v2, s)) { 791 return false; 792 } 793 794 switch (a->reg) { 795 case ARM_VFP_FPSID: 796 /* 797 * VFPv2 allows access to FPSID from userspace; VFPv3 restricts 798 * all ID registers to privileged access only. 799 */ 800 if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) { 801 return false; 802 } 803 ignore_vfp_enabled = true; 804 break; 805 case ARM_VFP_MVFR0: 806 case ARM_VFP_MVFR1: 807 if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) { 808 return false; 809 } 810 ignore_vfp_enabled = true; 811 break; 812 case ARM_VFP_MVFR2: 813 if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) { 814 return false; 815 } 816 ignore_vfp_enabled = true; 817 break; 818 case ARM_VFP_FPSCR: 819 break; 820 case ARM_VFP_FPEXC: 821 if (IS_USER(s)) { 822 return false; 823 } 824 ignore_vfp_enabled = true; 825 break; 826 case ARM_VFP_FPINST: 827 case ARM_VFP_FPINST2: 828 /* Not present in VFPv3 */ 829 if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) { 830 return false; 831 } 832 break; 833 default: 834 return false; 835 } 836 837 /* 838 * Call vfp_access_check_a() directly, because we need to tell 839 * it to ignore FPEXC.EN for some register accesses. 840 */ 841 if (!vfp_access_check_a(s, ignore_vfp_enabled)) { 842 return true; 843 } 844 845 if (a->l) { 846 /* VMRS, move VFP special register to gp register */ 847 switch (a->reg) { 848 case ARM_VFP_MVFR0: 849 case ARM_VFP_MVFR1: 850 case ARM_VFP_MVFR2: 851 case ARM_VFP_FPSID: 852 if (s->current_el == 1) { 853 TCGv_i32 tcg_reg, tcg_rt; 854 855 gen_set_condexec(s); 856 gen_set_pc_im(s, s->pc_curr); 857 tcg_reg = tcg_const_i32(a->reg); 858 tcg_rt = tcg_const_i32(a->rt); 859 gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg); 860 tcg_temp_free_i32(tcg_reg); 861 tcg_temp_free_i32(tcg_rt); 862 } 863 /* fall through */ 864 case ARM_VFP_FPEXC: 865 case ARM_VFP_FPINST: 866 case ARM_VFP_FPINST2: 867 tmp = load_cpu_field(vfp.xregs[a->reg]); 868 break; 869 case ARM_VFP_FPSCR: 870 if (a->rt == 15) { 871 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]); 872 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK); 873 } else { 874 tmp = tcg_temp_new_i32(); 875 gen_helper_vfp_get_fpscr(tmp, cpu_env); 876 } 877 break; 878 default: 879 g_assert_not_reached(); 880 } 881 882 if (a->rt == 15) { 883 /* Set the 4 flag bits in the CPSR. */ 884 gen_set_nzcv(tmp); 885 tcg_temp_free_i32(tmp); 886 } else { 887 store_reg(s, a->rt, tmp); 888 } 889 } else { 890 /* VMSR, move gp register to VFP special register */ 891 switch (a->reg) { 892 case ARM_VFP_FPSID: 893 case ARM_VFP_MVFR0: 894 case ARM_VFP_MVFR1: 895 case ARM_VFP_MVFR2: 896 /* Writes are ignored. */ 897 break; 898 case ARM_VFP_FPSCR: 899 tmp = load_reg(s, a->rt); 900 gen_helper_vfp_set_fpscr(cpu_env, tmp); 901 tcg_temp_free_i32(tmp); 902 gen_lookup_tb(s); 903 break; 904 case ARM_VFP_FPEXC: 905 /* 906 * TODO: VFP subarchitecture support. 907 * For now, keep the EN bit only 908 */ 909 tmp = load_reg(s, a->rt); 910 tcg_gen_andi_i32(tmp, tmp, 1 << 30); 911 store_cpu_field(tmp, vfp.xregs[a->reg]); 912 gen_lookup_tb(s); 913 break; 914 case ARM_VFP_FPINST: 915 case ARM_VFP_FPINST2: 916 tmp = load_reg(s, a->rt); 917 store_cpu_field(tmp, vfp.xregs[a->reg]); 918 break; 919 default: 920 g_assert_not_reached(); 921 } 922 } 923 924 return true; 925} 926 927 928static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a) 929{ 930 TCGv_i32 tmp; 931 932 if (!dc_isar_feature(aa32_fp16_arith, s)) { 933 return false; 934 } 935 936 if (a->rt == 15) { 937 /* UNPREDICTABLE; we choose to UNDEF */ 938 return false; 939 } 940 941 if (!vfp_access_check(s)) { 942 return true; 943 } 944 945 if (a->l) { 946 /* VFP to general purpose register */ 947 tmp = tcg_temp_new_i32(); 948 vfp_load_reg32(tmp, a->vn); 949 tcg_gen_andi_i32(tmp, tmp, 0xffff); 950 store_reg(s, a->rt, tmp); 951 } else { 952 /* general purpose register to VFP */ 953 tmp = load_reg(s, a->rt); 954 tcg_gen_andi_i32(tmp, tmp, 0xffff); 955 vfp_store_reg32(tmp, a->vn); 956 tcg_temp_free_i32(tmp); 957 } 958 959 return true; 960} 961 962static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a) 963{ 964 TCGv_i32 tmp; 965 966 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 967 return false; 968 } 969 970 if (!vfp_access_check(s)) { 971 return true; 972 } 973 974 if (a->l) { 975 /* VFP to general purpose register */ 976 tmp = tcg_temp_new_i32(); 977 vfp_load_reg32(tmp, a->vn); 978 if (a->rt == 15) { 979 /* Set the 4 flag bits in the CPSR. */ 980 gen_set_nzcv(tmp); 981 tcg_temp_free_i32(tmp); 982 } else { 983 store_reg(s, a->rt, tmp); 984 } 985 } else { 986 /* general purpose register to VFP */ 987 tmp = load_reg(s, a->rt); 988 vfp_store_reg32(tmp, a->vn); 989 tcg_temp_free_i32(tmp); 990 } 991 992 return true; 993} 994 995static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a) 996{ 997 TCGv_i32 tmp; 998 999 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1000 return false; 1001 } 1002 1003 /* 1004 * VMOV between two general-purpose registers and two single precision 1005 * floating point registers 1006 */ 1007 if (!vfp_access_check(s)) { 1008 return true; 1009 } 1010 1011 if (a->op) { 1012 /* fpreg to gpreg */ 1013 tmp = tcg_temp_new_i32(); 1014 vfp_load_reg32(tmp, a->vm); 1015 store_reg(s, a->rt, tmp); 1016 tmp = tcg_temp_new_i32(); 1017 vfp_load_reg32(tmp, a->vm + 1); 1018 store_reg(s, a->rt2, tmp); 1019 } else { 1020 /* gpreg to fpreg */ 1021 tmp = load_reg(s, a->rt); 1022 vfp_store_reg32(tmp, a->vm); 1023 tcg_temp_free_i32(tmp); 1024 tmp = load_reg(s, a->rt2); 1025 vfp_store_reg32(tmp, a->vm + 1); 1026 tcg_temp_free_i32(tmp); 1027 } 1028 1029 return true; 1030} 1031 1032static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a) 1033{ 1034 TCGv_i32 tmp; 1035 1036 /* 1037 * VMOV between two general-purpose registers and one double precision 1038 * floating point register. Note that this does not require support 1039 * for double precision arithmetic. 1040 */ 1041 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1042 return false; 1043 } 1044 1045 /* UNDEF accesses to D16-D31 if they don't exist */ 1046 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 1047 return false; 1048 } 1049 1050 if (!vfp_access_check(s)) { 1051 return true; 1052 } 1053 1054 if (a->op) { 1055 /* fpreg to gpreg */ 1056 tmp = tcg_temp_new_i32(); 1057 vfp_load_reg32(tmp, a->vm * 2); 1058 store_reg(s, a->rt, tmp); 1059 tmp = tcg_temp_new_i32(); 1060 vfp_load_reg32(tmp, a->vm * 2 + 1); 1061 store_reg(s, a->rt2, tmp); 1062 } else { 1063 /* gpreg to fpreg */ 1064 tmp = load_reg(s, a->rt); 1065 vfp_store_reg32(tmp, a->vm * 2); 1066 tcg_temp_free_i32(tmp); 1067 tmp = load_reg(s, a->rt2); 1068 vfp_store_reg32(tmp, a->vm * 2 + 1); 1069 tcg_temp_free_i32(tmp); 1070 } 1071 1072 return true; 1073} 1074 1075static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a) 1076{ 1077 uint32_t offset; 1078 TCGv_i32 addr, tmp; 1079 1080 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1081 return false; 1082 } 1083 1084 if (!vfp_access_check(s)) { 1085 return true; 1086 } 1087 1088 /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */ 1089 offset = a->imm << 1; 1090 if (!a->u) { 1091 offset = -offset; 1092 } 1093 1094 /* For thumb, use of PC is UNPREDICTABLE. */ 1095 addr = add_reg_for_lit(s, a->rn, offset); 1096 tmp = tcg_temp_new_i32(); 1097 if (a->l) { 1098 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN); 1099 vfp_store_reg32(tmp, a->vd); 1100 } else { 1101 vfp_load_reg32(tmp, a->vd); 1102 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN); 1103 } 1104 tcg_temp_free_i32(tmp); 1105 tcg_temp_free_i32(addr); 1106 1107 return true; 1108} 1109 1110static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a) 1111{ 1112 uint32_t offset; 1113 TCGv_i32 addr, tmp; 1114 1115 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1116 return false; 1117 } 1118 1119 if (!vfp_access_check(s)) { 1120 return true; 1121 } 1122 1123 offset = a->imm << 2; 1124 if (!a->u) { 1125 offset = -offset; 1126 } 1127 1128 /* For thumb, use of PC is UNPREDICTABLE. */ 1129 addr = add_reg_for_lit(s, a->rn, offset); 1130 tmp = tcg_temp_new_i32(); 1131 if (a->l) { 1132 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); 1133 vfp_store_reg32(tmp, a->vd); 1134 } else { 1135 vfp_load_reg32(tmp, a->vd); 1136 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); 1137 } 1138 tcg_temp_free_i32(tmp); 1139 tcg_temp_free_i32(addr); 1140 1141 return true; 1142} 1143 1144static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a) 1145{ 1146 uint32_t offset; 1147 TCGv_i32 addr; 1148 TCGv_i64 tmp; 1149 1150 /* Note that this does not require support for double arithmetic. */ 1151 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1152 return false; 1153 } 1154 1155 /* UNDEF accesses to D16-D31 if they don't exist */ 1156 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 1157 return false; 1158 } 1159 1160 if (!vfp_access_check(s)) { 1161 return true; 1162 } 1163 1164 offset = a->imm << 2; 1165 if (!a->u) { 1166 offset = -offset; 1167 } 1168 1169 /* For thumb, use of PC is UNPREDICTABLE. */ 1170 addr = add_reg_for_lit(s, a->rn, offset); 1171 tmp = tcg_temp_new_i64(); 1172 if (a->l) { 1173 gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4); 1174 vfp_store_reg64(tmp, a->vd); 1175 } else { 1176 vfp_load_reg64(tmp, a->vd); 1177 gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4); 1178 } 1179 tcg_temp_free_i64(tmp); 1180 tcg_temp_free_i32(addr); 1181 1182 return true; 1183} 1184 1185static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a) 1186{ 1187 uint32_t offset; 1188 TCGv_i32 addr, tmp; 1189 int i, n; 1190 1191 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1192 return false; 1193 } 1194 1195 n = a->imm; 1196 1197 if (n == 0 || (a->vd + n) > 32) { 1198 /* 1199 * UNPREDICTABLE cases for bad immediates: we choose to 1200 * UNDEF to avoid generating huge numbers of TCG ops 1201 */ 1202 return false; 1203 } 1204 if (a->rn == 15 && a->w) { 1205 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */ 1206 return false; 1207 } 1208 1209 s->eci_handled = true; 1210 1211 if (!vfp_access_check(s)) { 1212 return true; 1213 } 1214 1215 /* For thumb, use of PC is UNPREDICTABLE. */ 1216 addr = add_reg_for_lit(s, a->rn, 0); 1217 if (a->p) { 1218 /* pre-decrement */ 1219 tcg_gen_addi_i32(addr, addr, -(a->imm << 2)); 1220 } 1221 1222 if (s->v8m_stackcheck && a->rn == 13 && a->w) { 1223 /* 1224 * Here 'addr' is the lowest address we will store to, 1225 * and is either the old SP (if post-increment) or 1226 * the new SP (if pre-decrement). For post-increment 1227 * where the old value is below the limit and the new 1228 * value is above, it is UNKNOWN whether the limit check 1229 * triggers; we choose to trigger. 1230 */ 1231 gen_helper_v8m_stackcheck(cpu_env, addr); 1232 } 1233 1234 offset = 4; 1235 tmp = tcg_temp_new_i32(); 1236 for (i = 0; i < n; i++) { 1237 if (a->l) { 1238 /* load */ 1239 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); 1240 vfp_store_reg32(tmp, a->vd + i); 1241 } else { 1242 /* store */ 1243 vfp_load_reg32(tmp, a->vd + i); 1244 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); 1245 } 1246 tcg_gen_addi_i32(addr, addr, offset); 1247 } 1248 tcg_temp_free_i32(tmp); 1249 if (a->w) { 1250 /* writeback */ 1251 if (a->p) { 1252 offset = -offset * n; 1253 tcg_gen_addi_i32(addr, addr, offset); 1254 } 1255 store_reg(s, a->rn, addr); 1256 } else { 1257 tcg_temp_free_i32(addr); 1258 } 1259 1260 clear_eci_state(s); 1261 return true; 1262} 1263 1264static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a) 1265{ 1266 uint32_t offset; 1267 TCGv_i32 addr; 1268 TCGv_i64 tmp; 1269 int i, n; 1270 1271 /* Note that this does not require support for double arithmetic. */ 1272 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1273 return false; 1274 } 1275 1276 n = a->imm >> 1; 1277 1278 if (n == 0 || (a->vd + n) > 32 || n > 16) { 1279 /* 1280 * UNPREDICTABLE cases for bad immediates: we choose to 1281 * UNDEF to avoid generating huge numbers of TCG ops 1282 */ 1283 return false; 1284 } 1285 if (a->rn == 15 && a->w) { 1286 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */ 1287 return false; 1288 } 1289 1290 /* UNDEF accesses to D16-D31 if they don't exist */ 1291 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) { 1292 return false; 1293 } 1294 1295 s->eci_handled = true; 1296 1297 if (!vfp_access_check(s)) { 1298 return true; 1299 } 1300 1301 /* For thumb, use of PC is UNPREDICTABLE. */ 1302 addr = add_reg_for_lit(s, a->rn, 0); 1303 if (a->p) { 1304 /* pre-decrement */ 1305 tcg_gen_addi_i32(addr, addr, -(a->imm << 2)); 1306 } 1307 1308 if (s->v8m_stackcheck && a->rn == 13 && a->w) { 1309 /* 1310 * Here 'addr' is the lowest address we will store to, 1311 * and is either the old SP (if post-increment) or 1312 * the new SP (if pre-decrement). For post-increment 1313 * where the old value is below the limit and the new 1314 * value is above, it is UNKNOWN whether the limit check 1315 * triggers; we choose to trigger. 1316 */ 1317 gen_helper_v8m_stackcheck(cpu_env, addr); 1318 } 1319 1320 offset = 8; 1321 tmp = tcg_temp_new_i64(); 1322 for (i = 0; i < n; i++) { 1323 if (a->l) { 1324 /* load */ 1325 gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4); 1326 vfp_store_reg64(tmp, a->vd + i); 1327 } else { 1328 /* store */ 1329 vfp_load_reg64(tmp, a->vd + i); 1330 gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4); 1331 } 1332 tcg_gen_addi_i32(addr, addr, offset); 1333 } 1334 tcg_temp_free_i64(tmp); 1335 if (a->w) { 1336 /* writeback */ 1337 if (a->p) { 1338 offset = -offset * n; 1339 } else if (a->imm & 1) { 1340 offset = 4; 1341 } else { 1342 offset = 0; 1343 } 1344 1345 if (offset != 0) { 1346 tcg_gen_addi_i32(addr, addr, offset); 1347 } 1348 store_reg(s, a->rn, addr); 1349 } else { 1350 tcg_temp_free_i32(addr); 1351 } 1352 1353 clear_eci_state(s); 1354 return true; 1355} 1356 1357/* 1358 * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp(). 1359 * The callback should emit code to write a value to vd. If 1360 * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd 1361 * will contain the old value of the relevant VFP register; 1362 * otherwise it must be written to only. 1363 */ 1364typedef void VFPGen3OpSPFn(TCGv_i32 vd, 1365 TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst); 1366typedef void VFPGen3OpDPFn(TCGv_i64 vd, 1367 TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst); 1368 1369/* 1370 * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp(). 1371 * The callback should emit code to write a value to vd (which 1372 * should be written to only). 1373 */ 1374typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm); 1375typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm); 1376 1377/* 1378 * Return true if the specified S reg is in a scalar bank 1379 * (ie if it is s0..s7) 1380 */ 1381static inline bool vfp_sreg_is_scalar(int reg) 1382{ 1383 return (reg & 0x18) == 0; 1384} 1385 1386/* 1387 * Return true if the specified D reg is in a scalar bank 1388 * (ie if it is d0..d3 or d16..d19) 1389 */ 1390static inline bool vfp_dreg_is_scalar(int reg) 1391{ 1392 return (reg & 0xc) == 0; 1393} 1394 1395/* 1396 * Advance the S reg number forwards by delta within its bank 1397 * (ie increment the low 3 bits but leave the rest the same) 1398 */ 1399static inline int vfp_advance_sreg(int reg, int delta) 1400{ 1401 return ((reg + delta) & 0x7) | (reg & ~0x7); 1402} 1403 1404/* 1405 * Advance the D reg number forwards by delta within its bank 1406 * (ie increment the low 2 bits but leave the rest the same) 1407 */ 1408static inline int vfp_advance_dreg(int reg, int delta) 1409{ 1410 return ((reg + delta) & 0x3) | (reg & ~0x3); 1411} 1412 1413/* 1414 * Perform a 3-operand VFP data processing instruction. fn is the 1415 * callback to do the actual operation; this function deals with the 1416 * code to handle looping around for VFP vector processing. 1417 */ 1418static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn, 1419 int vd, int vn, int vm, bool reads_vd) 1420{ 1421 uint32_t delta_m = 0; 1422 uint32_t delta_d = 0; 1423 int veclen = s->vec_len; 1424 TCGv_i32 f0, f1, fd; 1425 TCGv_ptr fpst; 1426 1427 if (!dc_isar_feature(aa32_fpsp_v2, s)) { 1428 return false; 1429 } 1430 1431 if (!dc_isar_feature(aa32_fpshvec, s) && 1432 (veclen != 0 || s->vec_stride != 0)) { 1433 return false; 1434 } 1435 1436 if (!vfp_access_check(s)) { 1437 return true; 1438 } 1439 1440 if (veclen > 0) { 1441 /* Figure out what type of vector operation this is. */ 1442 if (vfp_sreg_is_scalar(vd)) { 1443 /* scalar */ 1444 veclen = 0; 1445 } else { 1446 delta_d = s->vec_stride + 1; 1447 1448 if (vfp_sreg_is_scalar(vm)) { 1449 /* mixed scalar/vector */ 1450 delta_m = 0; 1451 } else { 1452 /* vector */ 1453 delta_m = delta_d; 1454 } 1455 } 1456 } 1457 1458 f0 = tcg_temp_new_i32(); 1459 f1 = tcg_temp_new_i32(); 1460 fd = tcg_temp_new_i32(); 1461 fpst = fpstatus_ptr(FPST_FPCR); 1462 1463 vfp_load_reg32(f0, vn); 1464 vfp_load_reg32(f1, vm); 1465 1466 for (;;) { 1467 if (reads_vd) { 1468 vfp_load_reg32(fd, vd); 1469 } 1470 fn(fd, f0, f1, fpst); 1471 vfp_store_reg32(fd, vd); 1472 1473 if (veclen == 0) { 1474 break; 1475 } 1476 1477 /* Set up the operands for the next iteration */ 1478 veclen--; 1479 vd = vfp_advance_sreg(vd, delta_d); 1480 vn = vfp_advance_sreg(vn, delta_d); 1481 vfp_load_reg32(f0, vn); 1482 if (delta_m) { 1483 vm = vfp_advance_sreg(vm, delta_m); 1484 vfp_load_reg32(f1, vm); 1485 } 1486 } 1487 1488 tcg_temp_free_i32(f0); 1489 tcg_temp_free_i32(f1); 1490 tcg_temp_free_i32(fd); 1491 tcg_temp_free_ptr(fpst); 1492 1493 return true; 1494} 1495 1496static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn, 1497 int vd, int vn, int vm, bool reads_vd) 1498{ 1499 /* 1500 * Do a half-precision operation. Functionally this is 1501 * the same as do_vfp_3op_sp(), except: 1502 * - it uses the FPST_FPCR_F16 1503 * - it doesn't need the VFP vector handling (fp16 is a 1504 * v8 feature, and in v8 VFP vectors don't exist) 1505 * - it does the aa32_fp16_arith feature test 1506 */ 1507 TCGv_i32 f0, f1, fd; 1508 TCGv_ptr fpst; 1509 1510 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1511 return false; 1512 } 1513 1514 if (s->vec_len != 0 || s->vec_stride != 0) { 1515 return false; 1516 } 1517 1518 if (!vfp_access_check(s)) { 1519 return true; 1520 } 1521 1522 f0 = tcg_temp_new_i32(); 1523 f1 = tcg_temp_new_i32(); 1524 fd = tcg_temp_new_i32(); 1525 fpst = fpstatus_ptr(FPST_FPCR_F16); 1526 1527 vfp_load_reg32(f0, vn); 1528 vfp_load_reg32(f1, vm); 1529 1530 if (reads_vd) { 1531 vfp_load_reg32(fd, vd); 1532 } 1533 fn(fd, f0, f1, fpst); 1534 vfp_store_reg32(fd, vd); 1535 1536 tcg_temp_free_i32(f0); 1537 tcg_temp_free_i32(f1); 1538 tcg_temp_free_i32(fd); 1539 tcg_temp_free_ptr(fpst); 1540 1541 return true; 1542} 1543 1544static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn, 1545 int vd, int vn, int vm, bool reads_vd) 1546{ 1547 uint32_t delta_m = 0; 1548 uint32_t delta_d = 0; 1549 int veclen = s->vec_len; 1550 TCGv_i64 f0, f1, fd; 1551 TCGv_ptr fpst; 1552 1553 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 1554 return false; 1555 } 1556 1557 /* UNDEF accesses to D16-D31 if they don't exist */ 1558 if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) { 1559 return false; 1560 } 1561 1562 if (!dc_isar_feature(aa32_fpshvec, s) && 1563 (veclen != 0 || s->vec_stride != 0)) { 1564 return false; 1565 } 1566 1567 if (!vfp_access_check(s)) { 1568 return true; 1569 } 1570 1571 if (veclen > 0) { 1572 /* Figure out what type of vector operation this is. */ 1573 if (vfp_dreg_is_scalar(vd)) { 1574 /* scalar */ 1575 veclen = 0; 1576 } else { 1577 delta_d = (s->vec_stride >> 1) + 1; 1578 1579 if (vfp_dreg_is_scalar(vm)) { 1580 /* mixed scalar/vector */ 1581 delta_m = 0; 1582 } else { 1583 /* vector */ 1584 delta_m = delta_d; 1585 } 1586 } 1587 } 1588 1589 f0 = tcg_temp_new_i64(); 1590 f1 = tcg_temp_new_i64(); 1591 fd = tcg_temp_new_i64(); 1592 fpst = fpstatus_ptr(FPST_FPCR); 1593 1594 vfp_load_reg64(f0, vn); 1595 vfp_load_reg64(f1, vm); 1596 1597 for (;;) { 1598 if (reads_vd) { 1599 vfp_load_reg64(fd, vd); 1600 } 1601 fn(fd, f0, f1, fpst); 1602 vfp_store_reg64(fd, vd); 1603 1604 if (veclen == 0) { 1605 break; 1606 } 1607 /* Set up the operands for the next iteration */ 1608 veclen--; 1609 vd = vfp_advance_dreg(vd, delta_d); 1610 vn = vfp_advance_dreg(vn, delta_d); 1611 vfp_load_reg64(f0, vn); 1612 if (delta_m) { 1613 vm = vfp_advance_dreg(vm, delta_m); 1614 vfp_load_reg64(f1, vm); 1615 } 1616 } 1617 1618 tcg_temp_free_i64(f0); 1619 tcg_temp_free_i64(f1); 1620 tcg_temp_free_i64(fd); 1621 tcg_temp_free_ptr(fpst); 1622 1623 return true; 1624} 1625 1626static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) 1627{ 1628 uint32_t delta_m = 0; 1629 uint32_t delta_d = 0; 1630 int veclen = s->vec_len; 1631 TCGv_i32 f0, fd; 1632 1633 /* Note that the caller must check the aa32_fpsp_v2 feature. */ 1634 1635 if (!dc_isar_feature(aa32_fpshvec, s) && 1636 (veclen != 0 || s->vec_stride != 0)) { 1637 return false; 1638 } 1639 1640 if (!vfp_access_check(s)) { 1641 return true; 1642 } 1643 1644 if (veclen > 0) { 1645 /* Figure out what type of vector operation this is. */ 1646 if (vfp_sreg_is_scalar(vd)) { 1647 /* scalar */ 1648 veclen = 0; 1649 } else { 1650 delta_d = s->vec_stride + 1; 1651 1652 if (vfp_sreg_is_scalar(vm)) { 1653 /* mixed scalar/vector */ 1654 delta_m = 0; 1655 } else { 1656 /* vector */ 1657 delta_m = delta_d; 1658 } 1659 } 1660 } 1661 1662 f0 = tcg_temp_new_i32(); 1663 fd = tcg_temp_new_i32(); 1664 1665 vfp_load_reg32(f0, vm); 1666 1667 for (;;) { 1668 fn(fd, f0); 1669 vfp_store_reg32(fd, vd); 1670 1671 if (veclen == 0) { 1672 break; 1673 } 1674 1675 if (delta_m == 0) { 1676 /* single source one-many */ 1677 while (veclen--) { 1678 vd = vfp_advance_sreg(vd, delta_d); 1679 vfp_store_reg32(fd, vd); 1680 } 1681 break; 1682 } 1683 1684 /* Set up the operands for the next iteration */ 1685 veclen--; 1686 vd = vfp_advance_sreg(vd, delta_d); 1687 vm = vfp_advance_sreg(vm, delta_m); 1688 vfp_load_reg32(f0, vm); 1689 } 1690 1691 tcg_temp_free_i32(f0); 1692 tcg_temp_free_i32(fd); 1693 1694 return true; 1695} 1696 1697static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) 1698{ 1699 /* 1700 * Do a half-precision operation. Functionally this is 1701 * the same as do_vfp_2op_sp(), except: 1702 * - it doesn't need the VFP vector handling (fp16 is a 1703 * v8 feature, and in v8 VFP vectors don't exist) 1704 * - it does the aa32_fp16_arith feature test 1705 */ 1706 TCGv_i32 f0; 1707 1708 /* Note that the caller must check the aa32_fp16_arith feature */ 1709 1710 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1711 return false; 1712 } 1713 1714 if (s->vec_len != 0 || s->vec_stride != 0) { 1715 return false; 1716 } 1717 1718 if (!vfp_access_check(s)) { 1719 return true; 1720 } 1721 1722 f0 = tcg_temp_new_i32(); 1723 vfp_load_reg32(f0, vm); 1724 fn(f0, f0); 1725 vfp_store_reg32(f0, vd); 1726 tcg_temp_free_i32(f0); 1727 1728 return true; 1729} 1730 1731static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm) 1732{ 1733 uint32_t delta_m = 0; 1734 uint32_t delta_d = 0; 1735 int veclen = s->vec_len; 1736 TCGv_i64 f0, fd; 1737 1738 /* Note that the caller must check the aa32_fpdp_v2 feature. */ 1739 1740 /* UNDEF accesses to D16-D31 if they don't exist */ 1741 if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) { 1742 return false; 1743 } 1744 1745 if (!dc_isar_feature(aa32_fpshvec, s) && 1746 (veclen != 0 || s->vec_stride != 0)) { 1747 return false; 1748 } 1749 1750 if (!vfp_access_check(s)) { 1751 return true; 1752 } 1753 1754 if (veclen > 0) { 1755 /* Figure out what type of vector operation this is. */ 1756 if (vfp_dreg_is_scalar(vd)) { 1757 /* scalar */ 1758 veclen = 0; 1759 } else { 1760 delta_d = (s->vec_stride >> 1) + 1; 1761 1762 if (vfp_dreg_is_scalar(vm)) { 1763 /* mixed scalar/vector */ 1764 delta_m = 0; 1765 } else { 1766 /* vector */ 1767 delta_m = delta_d; 1768 } 1769 } 1770 } 1771 1772 f0 = tcg_temp_new_i64(); 1773 fd = tcg_temp_new_i64(); 1774 1775 vfp_load_reg64(f0, vm); 1776 1777 for (;;) { 1778 fn(fd, f0); 1779 vfp_store_reg64(fd, vd); 1780 1781 if (veclen == 0) { 1782 break; 1783 } 1784 1785 if (delta_m == 0) { 1786 /* single source one-many */ 1787 while (veclen--) { 1788 vd = vfp_advance_dreg(vd, delta_d); 1789 vfp_store_reg64(fd, vd); 1790 } 1791 break; 1792 } 1793 1794 /* Set up the operands for the next iteration */ 1795 veclen--; 1796 vd = vfp_advance_dreg(vd, delta_d); 1797 vd = vfp_advance_dreg(vm, delta_m); 1798 vfp_load_reg64(f0, vm); 1799 } 1800 1801 tcg_temp_free_i64(f0); 1802 tcg_temp_free_i64(fd); 1803 1804 return true; 1805} 1806 1807static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1808{ 1809 /* Note that order of inputs to the add matters for NaNs */ 1810 TCGv_i32 tmp = tcg_temp_new_i32(); 1811 1812 gen_helper_vfp_mulh(tmp, vn, vm, fpst); 1813 gen_helper_vfp_addh(vd, vd, tmp, fpst); 1814 tcg_temp_free_i32(tmp); 1815} 1816 1817static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a) 1818{ 1819 return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true); 1820} 1821 1822static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1823{ 1824 /* Note that order of inputs to the add matters for NaNs */ 1825 TCGv_i32 tmp = tcg_temp_new_i32(); 1826 1827 gen_helper_vfp_muls(tmp, vn, vm, fpst); 1828 gen_helper_vfp_adds(vd, vd, tmp, fpst); 1829 tcg_temp_free_i32(tmp); 1830} 1831 1832static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a) 1833{ 1834 return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true); 1835} 1836 1837static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) 1838{ 1839 /* Note that order of inputs to the add matters for NaNs */ 1840 TCGv_i64 tmp = tcg_temp_new_i64(); 1841 1842 gen_helper_vfp_muld(tmp, vn, vm, fpst); 1843 gen_helper_vfp_addd(vd, vd, tmp, fpst); 1844 tcg_temp_free_i64(tmp); 1845} 1846 1847static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a) 1848{ 1849 return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true); 1850} 1851 1852static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1853{ 1854 /* 1855 * VMLS: vd = vd + -(vn * vm) 1856 * Note that order of inputs to the add matters for NaNs. 1857 */ 1858 TCGv_i32 tmp = tcg_temp_new_i32(); 1859 1860 gen_helper_vfp_mulh(tmp, vn, vm, fpst); 1861 gen_helper_vfp_negh(tmp, tmp); 1862 gen_helper_vfp_addh(vd, vd, tmp, fpst); 1863 tcg_temp_free_i32(tmp); 1864} 1865 1866static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a) 1867{ 1868 return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true); 1869} 1870 1871static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1872{ 1873 /* 1874 * VMLS: vd = vd + -(vn * vm) 1875 * Note that order of inputs to the add matters for NaNs. 1876 */ 1877 TCGv_i32 tmp = tcg_temp_new_i32(); 1878 1879 gen_helper_vfp_muls(tmp, vn, vm, fpst); 1880 gen_helper_vfp_negs(tmp, tmp); 1881 gen_helper_vfp_adds(vd, vd, tmp, fpst); 1882 tcg_temp_free_i32(tmp); 1883} 1884 1885static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a) 1886{ 1887 return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true); 1888} 1889 1890static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) 1891{ 1892 /* 1893 * VMLS: vd = vd + -(vn * vm) 1894 * Note that order of inputs to the add matters for NaNs. 1895 */ 1896 TCGv_i64 tmp = tcg_temp_new_i64(); 1897 1898 gen_helper_vfp_muld(tmp, vn, vm, fpst); 1899 gen_helper_vfp_negd(tmp, tmp); 1900 gen_helper_vfp_addd(vd, vd, tmp, fpst); 1901 tcg_temp_free_i64(tmp); 1902} 1903 1904static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a) 1905{ 1906 return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true); 1907} 1908 1909static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1910{ 1911 /* 1912 * VNMLS: -fd + (fn * fm) 1913 * Note that it isn't valid to replace (-A + B) with (B - A) or similar 1914 * plausible looking simplifications because this will give wrong results 1915 * for NaNs. 1916 */ 1917 TCGv_i32 tmp = tcg_temp_new_i32(); 1918 1919 gen_helper_vfp_mulh(tmp, vn, vm, fpst); 1920 gen_helper_vfp_negh(vd, vd); 1921 gen_helper_vfp_addh(vd, vd, tmp, fpst); 1922 tcg_temp_free_i32(tmp); 1923} 1924 1925static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a) 1926{ 1927 return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true); 1928} 1929 1930static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1931{ 1932 /* 1933 * VNMLS: -fd + (fn * fm) 1934 * Note that it isn't valid to replace (-A + B) with (B - A) or similar 1935 * plausible looking simplifications because this will give wrong results 1936 * for NaNs. 1937 */ 1938 TCGv_i32 tmp = tcg_temp_new_i32(); 1939 1940 gen_helper_vfp_muls(tmp, vn, vm, fpst); 1941 gen_helper_vfp_negs(vd, vd); 1942 gen_helper_vfp_adds(vd, vd, tmp, fpst); 1943 tcg_temp_free_i32(tmp); 1944} 1945 1946static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a) 1947{ 1948 return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true); 1949} 1950 1951static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) 1952{ 1953 /* 1954 * VNMLS: -fd + (fn * fm) 1955 * Note that it isn't valid to replace (-A + B) with (B - A) or similar 1956 * plausible looking simplifications because this will give wrong results 1957 * for NaNs. 1958 */ 1959 TCGv_i64 tmp = tcg_temp_new_i64(); 1960 1961 gen_helper_vfp_muld(tmp, vn, vm, fpst); 1962 gen_helper_vfp_negd(vd, vd); 1963 gen_helper_vfp_addd(vd, vd, tmp, fpst); 1964 tcg_temp_free_i64(tmp); 1965} 1966 1967static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a) 1968{ 1969 return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true); 1970} 1971 1972static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1973{ 1974 /* VNMLA: -fd + -(fn * fm) */ 1975 TCGv_i32 tmp = tcg_temp_new_i32(); 1976 1977 gen_helper_vfp_mulh(tmp, vn, vm, fpst); 1978 gen_helper_vfp_negh(tmp, tmp); 1979 gen_helper_vfp_negh(vd, vd); 1980 gen_helper_vfp_addh(vd, vd, tmp, fpst); 1981 tcg_temp_free_i32(tmp); 1982} 1983 1984static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a) 1985{ 1986 return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true); 1987} 1988 1989static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1990{ 1991 /* VNMLA: -fd + -(fn * fm) */ 1992 TCGv_i32 tmp = tcg_temp_new_i32(); 1993 1994 gen_helper_vfp_muls(tmp, vn, vm, fpst); 1995 gen_helper_vfp_negs(tmp, tmp); 1996 gen_helper_vfp_negs(vd, vd); 1997 gen_helper_vfp_adds(vd, vd, tmp, fpst); 1998 tcg_temp_free_i32(tmp); 1999} 2000 2001static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a) 2002{ 2003 return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true); 2004} 2005 2006static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) 2007{ 2008 /* VNMLA: -fd + (fn * fm) */ 2009 TCGv_i64 tmp = tcg_temp_new_i64(); 2010 2011 gen_helper_vfp_muld(tmp, vn, vm, fpst); 2012 gen_helper_vfp_negd(tmp, tmp); 2013 gen_helper_vfp_negd(vd, vd); 2014 gen_helper_vfp_addd(vd, vd, tmp, fpst); 2015 tcg_temp_free_i64(tmp); 2016} 2017 2018static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a) 2019{ 2020 return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true); 2021} 2022 2023static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a) 2024{ 2025 return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false); 2026} 2027 2028static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a) 2029{ 2030 return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false); 2031} 2032 2033static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a) 2034{ 2035 return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false); 2036} 2037 2038static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 2039{ 2040 /* VNMUL: -(fn * fm) */ 2041 gen_helper_vfp_mulh(vd, vn, vm, fpst); 2042 gen_helper_vfp_negh(vd, vd); 2043} 2044 2045static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a) 2046{ 2047 return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false); 2048} 2049 2050static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 2051{ 2052 /* VNMUL: -(fn * fm) */ 2053 gen_helper_vfp_muls(vd, vn, vm, fpst); 2054 gen_helper_vfp_negs(vd, vd); 2055} 2056 2057static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a) 2058{ 2059 return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false); 2060} 2061 2062static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) 2063{ 2064 /* VNMUL: -(fn * fm) */ 2065 gen_helper_vfp_muld(vd, vn, vm, fpst); 2066 gen_helper_vfp_negd(vd, vd); 2067} 2068 2069static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a) 2070{ 2071 return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false); 2072} 2073 2074static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a) 2075{ 2076 return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false); 2077} 2078 2079static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a) 2080{ 2081 return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false); 2082} 2083 2084static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a) 2085{ 2086 return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false); 2087} 2088 2089static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a) 2090{ 2091 return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false); 2092} 2093 2094static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a) 2095{ 2096 return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false); 2097} 2098 2099static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a) 2100{ 2101 return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false); 2102} 2103 2104static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a) 2105{ 2106 return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false); 2107} 2108 2109static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a) 2110{ 2111 return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false); 2112} 2113 2114static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a) 2115{ 2116 return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false); 2117} 2118 2119static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a) 2120{ 2121 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2122 return false; 2123 } 2124 return do_vfp_3op_hp(s, gen_helper_vfp_minnumh, 2125 a->vd, a->vn, a->vm, false); 2126} 2127 2128static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a) 2129{ 2130 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2131 return false; 2132 } 2133 return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh, 2134 a->vd, a->vn, a->vm, false); 2135} 2136 2137static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a) 2138{ 2139 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2140 return false; 2141 } 2142 return do_vfp_3op_sp(s, gen_helper_vfp_minnums, 2143 a->vd, a->vn, a->vm, false); 2144} 2145 2146static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a) 2147{ 2148 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2149 return false; 2150 } 2151 return do_vfp_3op_sp(s, gen_helper_vfp_maxnums, 2152 a->vd, a->vn, a->vm, false); 2153} 2154 2155static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a) 2156{ 2157 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2158 return false; 2159 } 2160 return do_vfp_3op_dp(s, gen_helper_vfp_minnumd, 2161 a->vd, a->vn, a->vm, false); 2162} 2163 2164static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a) 2165{ 2166 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2167 return false; 2168 } 2169 return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd, 2170 a->vd, a->vn, a->vm, false); 2171} 2172 2173static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) 2174{ 2175 /* 2176 * VFNMA : fd = muladd(-fd, fn, fm) 2177 * VFNMS : fd = muladd(-fd, -fn, fm) 2178 * VFMA : fd = muladd( fd, fn, fm) 2179 * VFMS : fd = muladd( fd, -fn, fm) 2180 * 2181 * These are fused multiply-add, and must be done as one floating 2182 * point operation with no rounding between the multiplication and 2183 * addition steps. NB that doing the negations here as separate 2184 * steps is correct : an input NaN should come out with its sign 2185 * bit flipped if it is a negated-input. 2186 */ 2187 TCGv_ptr fpst; 2188 TCGv_i32 vn, vm, vd; 2189 2190 /* 2191 * Present in VFPv4 only, and only with the FP16 extension. 2192 * Note that we can't rely on the SIMDFMAC check alone, because 2193 * in a Neon-no-VFP core that ID register field will be non-zero. 2194 */ 2195 if (!dc_isar_feature(aa32_fp16_arith, s) || 2196 !dc_isar_feature(aa32_simdfmac, s) || 2197 !dc_isar_feature(aa32_fpsp_v2, s)) { 2198 return false; 2199 } 2200 2201 if (s->vec_len != 0 || s->vec_stride != 0) { 2202 return false; 2203 } 2204 2205 if (!vfp_access_check(s)) { 2206 return true; 2207 } 2208 2209 vn = tcg_temp_new_i32(); 2210 vm = tcg_temp_new_i32(); 2211 vd = tcg_temp_new_i32(); 2212 2213 vfp_load_reg32(vn, a->vn); 2214 vfp_load_reg32(vm, a->vm); 2215 if (neg_n) { 2216 /* VFNMS, VFMS */ 2217 gen_helper_vfp_negh(vn, vn); 2218 } 2219 vfp_load_reg32(vd, a->vd); 2220 if (neg_d) { 2221 /* VFNMA, VFNMS */ 2222 gen_helper_vfp_negh(vd, vd); 2223 } 2224 fpst = fpstatus_ptr(FPST_FPCR_F16); 2225 gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst); 2226 vfp_store_reg32(vd, a->vd); 2227 2228 tcg_temp_free_ptr(fpst); 2229 tcg_temp_free_i32(vn); 2230 tcg_temp_free_i32(vm); 2231 tcg_temp_free_i32(vd); 2232 2233 return true; 2234} 2235 2236static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) 2237{ 2238 /* 2239 * VFNMA : fd = muladd(-fd, fn, fm) 2240 * VFNMS : fd = muladd(-fd, -fn, fm) 2241 * VFMA : fd = muladd( fd, fn, fm) 2242 * VFMS : fd = muladd( fd, -fn, fm) 2243 * 2244 * These are fused multiply-add, and must be done as one floating 2245 * point operation with no rounding between the multiplication and 2246 * addition steps. NB that doing the negations here as separate 2247 * steps is correct : an input NaN should come out with its sign 2248 * bit flipped if it is a negated-input. 2249 */ 2250 TCGv_ptr fpst; 2251 TCGv_i32 vn, vm, vd; 2252 2253 /* 2254 * Present in VFPv4 only. 2255 * Note that we can't rely on the SIMDFMAC check alone, because 2256 * in a Neon-no-VFP core that ID register field will be non-zero. 2257 */ 2258 if (!dc_isar_feature(aa32_simdfmac, s) || 2259 !dc_isar_feature(aa32_fpsp_v2, s)) { 2260 return false; 2261 } 2262 /* 2263 * In v7A, UNPREDICTABLE with non-zero vector length/stride; from 2264 * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A. 2265 */ 2266 if (s->vec_len != 0 || s->vec_stride != 0) { 2267 return false; 2268 } 2269 2270 if (!vfp_access_check(s)) { 2271 return true; 2272 } 2273 2274 vn = tcg_temp_new_i32(); 2275 vm = tcg_temp_new_i32(); 2276 vd = tcg_temp_new_i32(); 2277 2278 vfp_load_reg32(vn, a->vn); 2279 vfp_load_reg32(vm, a->vm); 2280 if (neg_n) { 2281 /* VFNMS, VFMS */ 2282 gen_helper_vfp_negs(vn, vn); 2283 } 2284 vfp_load_reg32(vd, a->vd); 2285 if (neg_d) { 2286 /* VFNMA, VFNMS */ 2287 gen_helper_vfp_negs(vd, vd); 2288 } 2289 fpst = fpstatus_ptr(FPST_FPCR); 2290 gen_helper_vfp_muladds(vd, vn, vm, vd, fpst); 2291 vfp_store_reg32(vd, a->vd); 2292 2293 tcg_temp_free_ptr(fpst); 2294 tcg_temp_free_i32(vn); 2295 tcg_temp_free_i32(vm); 2296 tcg_temp_free_i32(vd); 2297 2298 return true; 2299} 2300 2301static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) 2302{ 2303 /* 2304 * VFNMA : fd = muladd(-fd, fn, fm) 2305 * VFNMS : fd = muladd(-fd, -fn, fm) 2306 * VFMA : fd = muladd( fd, fn, fm) 2307 * VFMS : fd = muladd( fd, -fn, fm) 2308 * 2309 * These are fused multiply-add, and must be done as one floating 2310 * point operation with no rounding between the multiplication and 2311 * addition steps. NB that doing the negations here as separate 2312 * steps is correct : an input NaN should come out with its sign 2313 * bit flipped if it is a negated-input. 2314 */ 2315 TCGv_ptr fpst; 2316 TCGv_i64 vn, vm, vd; 2317 2318 /* 2319 * Present in VFPv4 only. 2320 * Note that we can't rely on the SIMDFMAC check alone, because 2321 * in a Neon-no-VFP core that ID register field will be non-zero. 2322 */ 2323 if (!dc_isar_feature(aa32_simdfmac, s) || 2324 !dc_isar_feature(aa32_fpdp_v2, s)) { 2325 return false; 2326 } 2327 /* 2328 * In v7A, UNPREDICTABLE with non-zero vector length/stride; from 2329 * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A. 2330 */ 2331 if (s->vec_len != 0 || s->vec_stride != 0) { 2332 return false; 2333 } 2334 2335 /* UNDEF accesses to D16-D31 if they don't exist. */ 2336 if (!dc_isar_feature(aa32_simd_r32, s) && 2337 ((a->vd | a->vn | a->vm) & 0x10)) { 2338 return false; 2339 } 2340 2341 if (!vfp_access_check(s)) { 2342 return true; 2343 } 2344 2345 vn = tcg_temp_new_i64(); 2346 vm = tcg_temp_new_i64(); 2347 vd = tcg_temp_new_i64(); 2348 2349 vfp_load_reg64(vn, a->vn); 2350 vfp_load_reg64(vm, a->vm); 2351 if (neg_n) { 2352 /* VFNMS, VFMS */ 2353 gen_helper_vfp_negd(vn, vn); 2354 } 2355 vfp_load_reg64(vd, a->vd); 2356 if (neg_d) { 2357 /* VFNMA, VFNMS */ 2358 gen_helper_vfp_negd(vd, vd); 2359 } 2360 fpst = fpstatus_ptr(FPST_FPCR); 2361 gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst); 2362 vfp_store_reg64(vd, a->vd); 2363 2364 tcg_temp_free_ptr(fpst); 2365 tcg_temp_free_i64(vn); 2366 tcg_temp_free_i64(vm); 2367 tcg_temp_free_i64(vd); 2368 2369 return true; 2370} 2371 2372#define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD) \ 2373 static bool trans_##INSN##_##PREC(DisasContext *s, \ 2374 arg_##INSN##_##PREC *a) \ 2375 { \ 2376 return do_vfm_##PREC(s, a, NEGN, NEGD); \ 2377 } 2378 2379#define MAKE_VFM_TRANS_FNS(PREC) \ 2380 MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \ 2381 MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \ 2382 MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \ 2383 MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true) 2384 2385MAKE_VFM_TRANS_FNS(hp) 2386MAKE_VFM_TRANS_FNS(sp) 2387MAKE_VFM_TRANS_FNS(dp) 2388 2389static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a) 2390{ 2391 TCGv_i32 fd; 2392 2393 if (!dc_isar_feature(aa32_fp16_arith, s)) { 2394 return false; 2395 } 2396 2397 if (s->vec_len != 0 || s->vec_stride != 0) { 2398 return false; 2399 } 2400 2401 if (!vfp_access_check(s)) { 2402 return true; 2403 } 2404 2405 fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm)); 2406 vfp_store_reg32(fd, a->vd); 2407 tcg_temp_free_i32(fd); 2408 return true; 2409} 2410 2411static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a) 2412{ 2413 uint32_t delta_d = 0; 2414 int veclen = s->vec_len; 2415 TCGv_i32 fd; 2416 uint32_t vd; 2417 2418 vd = a->vd; 2419 2420 if (!dc_isar_feature(aa32_fpsp_v3, s)) { 2421 return false; 2422 } 2423 2424 if (!dc_isar_feature(aa32_fpshvec, s) && 2425 (veclen != 0 || s->vec_stride != 0)) { 2426 return false; 2427 } 2428 2429 if (!vfp_access_check(s)) { 2430 return true; 2431 } 2432 2433 if (veclen > 0) { 2434 /* Figure out what type of vector operation this is. */ 2435 if (vfp_sreg_is_scalar(vd)) { 2436 /* scalar */ 2437 veclen = 0; 2438 } else { 2439 delta_d = s->vec_stride + 1; 2440 } 2441 } 2442 2443 fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm)); 2444 2445 for (;;) { 2446 vfp_store_reg32(fd, vd); 2447 2448 if (veclen == 0) { 2449 break; 2450 } 2451 2452 /* Set up the operands for the next iteration */ 2453 veclen--; 2454 vd = vfp_advance_sreg(vd, delta_d); 2455 } 2456 2457 tcg_temp_free_i32(fd); 2458 return true; 2459} 2460 2461static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a) 2462{ 2463 uint32_t delta_d = 0; 2464 int veclen = s->vec_len; 2465 TCGv_i64 fd; 2466 uint32_t vd; 2467 2468 vd = a->vd; 2469 2470 if (!dc_isar_feature(aa32_fpdp_v3, s)) { 2471 return false; 2472 } 2473 2474 /* UNDEF accesses to D16-D31 if they don't exist. */ 2475 if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) { 2476 return false; 2477 } 2478 2479 if (!dc_isar_feature(aa32_fpshvec, s) && 2480 (veclen != 0 || s->vec_stride != 0)) { 2481 return false; 2482 } 2483 2484 if (!vfp_access_check(s)) { 2485 return true; 2486 } 2487 2488 if (veclen > 0) { 2489 /* Figure out what type of vector operation this is. */ 2490 if (vfp_dreg_is_scalar(vd)) { 2491 /* scalar */ 2492 veclen = 0; 2493 } else { 2494 delta_d = (s->vec_stride >> 1) + 1; 2495 } 2496 } 2497 2498 fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm)); 2499 2500 for (;;) { 2501 vfp_store_reg64(fd, vd); 2502 2503 if (veclen == 0) { 2504 break; 2505 } 2506 2507 /* Set up the operands for the next iteration */ 2508 veclen--; 2509 vd = vfp_advance_dreg(vd, delta_d); 2510 } 2511 2512 tcg_temp_free_i64(fd); 2513 return true; 2514} 2515 2516#define DO_VFP_2OP(INSN, PREC, FN, CHECK) \ 2517 static bool trans_##INSN##_##PREC(DisasContext *s, \ 2518 arg_##INSN##_##PREC *a) \ 2519 { \ 2520 if (!dc_isar_feature(CHECK, s)) { \ 2521 return false; \ 2522 } \ 2523 return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \ 2524 } 2525 2526#define DO_VFP_VMOV(INSN, PREC, FN) \ 2527 static bool trans_##INSN##_##PREC(DisasContext *s, \ 2528 arg_##INSN##_##PREC *a) \ 2529 { \ 2530 if (!dc_isar_feature(aa32_fp##PREC##_v2, s) && \ 2531 !dc_isar_feature(aa32_mve, s)) { \ 2532 return false; \ 2533 } \ 2534 return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \ 2535 } 2536 2537DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32) 2538DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64) 2539 2540DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith) 2541DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2) 2542DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd, aa32_fpdp_v2) 2543 2544DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh, aa32_fp16_arith) 2545DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs, aa32_fpsp_v2) 2546DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2) 2547 2548static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm) 2549{ 2550 gen_helper_vfp_sqrth(vd, vm, cpu_env); 2551} 2552 2553static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm) 2554{ 2555 gen_helper_vfp_sqrts(vd, vm, cpu_env); 2556} 2557 2558static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm) 2559{ 2560 gen_helper_vfp_sqrtd(vd, vm, cpu_env); 2561} 2562 2563DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith) 2564DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp, aa32_fpsp_v2) 2565DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp, aa32_fpdp_v2) 2566 2567static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a) 2568{ 2569 TCGv_i32 vd, vm; 2570 2571 if (!dc_isar_feature(aa32_fp16_arith, s)) { 2572 return false; 2573 } 2574 2575 /* Vm/M bits must be zero for the Z variant */ 2576 if (a->z && a->vm != 0) { 2577 return false; 2578 } 2579 2580 if (!vfp_access_check(s)) { 2581 return true; 2582 } 2583 2584 vd = tcg_temp_new_i32(); 2585 vm = tcg_temp_new_i32(); 2586 2587 vfp_load_reg32(vd, a->vd); 2588 if (a->z) { 2589 tcg_gen_movi_i32(vm, 0); 2590 } else { 2591 vfp_load_reg32(vm, a->vm); 2592 } 2593 2594 if (a->e) { 2595 gen_helper_vfp_cmpeh(vd, vm, cpu_env); 2596 } else { 2597 gen_helper_vfp_cmph(vd, vm, cpu_env); 2598 } 2599 2600 tcg_temp_free_i32(vd); 2601 tcg_temp_free_i32(vm); 2602 2603 return true; 2604} 2605 2606static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a) 2607{ 2608 TCGv_i32 vd, vm; 2609 2610 if (!dc_isar_feature(aa32_fpsp_v2, s)) { 2611 return false; 2612 } 2613 2614 /* Vm/M bits must be zero for the Z variant */ 2615 if (a->z && a->vm != 0) { 2616 return false; 2617 } 2618 2619 if (!vfp_access_check(s)) { 2620 return true; 2621 } 2622 2623 vd = tcg_temp_new_i32(); 2624 vm = tcg_temp_new_i32(); 2625 2626 vfp_load_reg32(vd, a->vd); 2627 if (a->z) { 2628 tcg_gen_movi_i32(vm, 0); 2629 } else { 2630 vfp_load_reg32(vm, a->vm); 2631 } 2632 2633 if (a->e) { 2634 gen_helper_vfp_cmpes(vd, vm, cpu_env); 2635 } else { 2636 gen_helper_vfp_cmps(vd, vm, cpu_env); 2637 } 2638 2639 tcg_temp_free_i32(vd); 2640 tcg_temp_free_i32(vm); 2641 2642 return true; 2643} 2644 2645static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a) 2646{ 2647 TCGv_i64 vd, vm; 2648 2649 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2650 return false; 2651 } 2652 2653 /* Vm/M bits must be zero for the Z variant */ 2654 if (a->z && a->vm != 0) { 2655 return false; 2656 } 2657 2658 /* UNDEF accesses to D16-D31 if they don't exist. */ 2659 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { 2660 return false; 2661 } 2662 2663 if (!vfp_access_check(s)) { 2664 return true; 2665 } 2666 2667 vd = tcg_temp_new_i64(); 2668 vm = tcg_temp_new_i64(); 2669 2670 vfp_load_reg64(vd, a->vd); 2671 if (a->z) { 2672 tcg_gen_movi_i64(vm, 0); 2673 } else { 2674 vfp_load_reg64(vm, a->vm); 2675 } 2676 2677 if (a->e) { 2678 gen_helper_vfp_cmped(vd, vm, cpu_env); 2679 } else { 2680 gen_helper_vfp_cmpd(vd, vm, cpu_env); 2681 } 2682 2683 tcg_temp_free_i64(vd); 2684 tcg_temp_free_i64(vm); 2685 2686 return true; 2687} 2688 2689static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a) 2690{ 2691 TCGv_ptr fpst; 2692 TCGv_i32 ahp_mode; 2693 TCGv_i32 tmp; 2694 2695 if (!dc_isar_feature(aa32_fp16_spconv, s)) { 2696 return false; 2697 } 2698 2699 if (!vfp_access_check(s)) { 2700 return true; 2701 } 2702 2703 fpst = fpstatus_ptr(FPST_FPCR); 2704 ahp_mode = get_ahp_flag(); 2705 tmp = tcg_temp_new_i32(); 2706 /* The T bit tells us if we want the low or high 16 bits of Vm */ 2707 tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t)); 2708 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode); 2709 vfp_store_reg32(tmp, a->vd); 2710 tcg_temp_free_i32(ahp_mode); 2711 tcg_temp_free_ptr(fpst); 2712 tcg_temp_free_i32(tmp); 2713 return true; 2714} 2715 2716static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a) 2717{ 2718 TCGv_ptr fpst; 2719 TCGv_i32 ahp_mode; 2720 TCGv_i32 tmp; 2721 TCGv_i64 vd; 2722 2723 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2724 return false; 2725 } 2726 2727 if (!dc_isar_feature(aa32_fp16_dpconv, s)) { 2728 return false; 2729 } 2730 2731 /* UNDEF accesses to D16-D31 if they don't exist. */ 2732 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 2733 return false; 2734 } 2735 2736 if (!vfp_access_check(s)) { 2737 return true; 2738 } 2739 2740 fpst = fpstatus_ptr(FPST_FPCR); 2741 ahp_mode = get_ahp_flag(); 2742 tmp = tcg_temp_new_i32(); 2743 /* The T bit tells us if we want the low or high 16 bits of Vm */ 2744 tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t)); 2745 vd = tcg_temp_new_i64(); 2746 gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode); 2747 vfp_store_reg64(vd, a->vd); 2748 tcg_temp_free_i32(ahp_mode); 2749 tcg_temp_free_ptr(fpst); 2750 tcg_temp_free_i32(tmp); 2751 tcg_temp_free_i64(vd); 2752 return true; 2753} 2754 2755static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a) 2756{ 2757 TCGv_ptr fpst; 2758 TCGv_i32 tmp; 2759 2760 if (!dc_isar_feature(aa32_bf16, s)) { 2761 return false; 2762 } 2763 2764 if (!vfp_access_check(s)) { 2765 return true; 2766 } 2767 2768 fpst = fpstatus_ptr(FPST_FPCR); 2769 tmp = tcg_temp_new_i32(); 2770 2771 vfp_load_reg32(tmp, a->vm); 2772 gen_helper_bfcvt(tmp, tmp, fpst); 2773 tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t)); 2774 tcg_temp_free_ptr(fpst); 2775 tcg_temp_free_i32(tmp); 2776 return true; 2777} 2778 2779static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a) 2780{ 2781 TCGv_ptr fpst; 2782 TCGv_i32 ahp_mode; 2783 TCGv_i32 tmp; 2784 2785 if (!dc_isar_feature(aa32_fp16_spconv, s)) { 2786 return false; 2787 } 2788 2789 if (!vfp_access_check(s)) { 2790 return true; 2791 } 2792 2793 fpst = fpstatus_ptr(FPST_FPCR); 2794 ahp_mode = get_ahp_flag(); 2795 tmp = tcg_temp_new_i32(); 2796 2797 vfp_load_reg32(tmp, a->vm); 2798 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode); 2799 tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t)); 2800 tcg_temp_free_i32(ahp_mode); 2801 tcg_temp_free_ptr(fpst); 2802 tcg_temp_free_i32(tmp); 2803 return true; 2804} 2805 2806static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a) 2807{ 2808 TCGv_ptr fpst; 2809 TCGv_i32 ahp_mode; 2810 TCGv_i32 tmp; 2811 TCGv_i64 vm; 2812 2813 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2814 return false; 2815 } 2816 2817 if (!dc_isar_feature(aa32_fp16_dpconv, s)) { 2818 return false; 2819 } 2820 2821 /* UNDEF accesses to D16-D31 if they don't exist. */ 2822 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 2823 return false; 2824 } 2825 2826 if (!vfp_access_check(s)) { 2827 return true; 2828 } 2829 2830 fpst = fpstatus_ptr(FPST_FPCR); 2831 ahp_mode = get_ahp_flag(); 2832 tmp = tcg_temp_new_i32(); 2833 vm = tcg_temp_new_i64(); 2834 2835 vfp_load_reg64(vm, a->vm); 2836 gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode); 2837 tcg_temp_free_i64(vm); 2838 tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t)); 2839 tcg_temp_free_i32(ahp_mode); 2840 tcg_temp_free_ptr(fpst); 2841 tcg_temp_free_i32(tmp); 2842 return true; 2843} 2844 2845static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a) 2846{ 2847 TCGv_ptr fpst; 2848 TCGv_i32 tmp; 2849 2850 if (!dc_isar_feature(aa32_fp16_arith, s)) { 2851 return false; 2852 } 2853 2854 if (!vfp_access_check(s)) { 2855 return true; 2856 } 2857 2858 tmp = tcg_temp_new_i32(); 2859 vfp_load_reg32(tmp, a->vm); 2860 fpst = fpstatus_ptr(FPST_FPCR_F16); 2861 gen_helper_rinth(tmp, tmp, fpst); 2862 vfp_store_reg32(tmp, a->vd); 2863 tcg_temp_free_ptr(fpst); 2864 tcg_temp_free_i32(tmp); 2865 return true; 2866} 2867 2868static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a) 2869{ 2870 TCGv_ptr fpst; 2871 TCGv_i32 tmp; 2872 2873 if (!dc_isar_feature(aa32_vrint, s)) { 2874 return false; 2875 } 2876 2877 if (!vfp_access_check(s)) { 2878 return true; 2879 } 2880 2881 tmp = tcg_temp_new_i32(); 2882 vfp_load_reg32(tmp, a->vm); 2883 fpst = fpstatus_ptr(FPST_FPCR); 2884 gen_helper_rints(tmp, tmp, fpst); 2885 vfp_store_reg32(tmp, a->vd); 2886 tcg_temp_free_ptr(fpst); 2887 tcg_temp_free_i32(tmp); 2888 return true; 2889} 2890 2891static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a) 2892{ 2893 TCGv_ptr fpst; 2894 TCGv_i64 tmp; 2895 2896 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2897 return false; 2898 } 2899 2900 if (!dc_isar_feature(aa32_vrint, s)) { 2901 return false; 2902 } 2903 2904 /* UNDEF accesses to D16-D31 if they don't exist. */ 2905 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { 2906 return false; 2907 } 2908 2909 if (!vfp_access_check(s)) { 2910 return true; 2911 } 2912 2913 tmp = tcg_temp_new_i64(); 2914 vfp_load_reg64(tmp, a->vm); 2915 fpst = fpstatus_ptr(FPST_FPCR); 2916 gen_helper_rintd(tmp, tmp, fpst); 2917 vfp_store_reg64(tmp, a->vd); 2918 tcg_temp_free_ptr(fpst); 2919 tcg_temp_free_i64(tmp); 2920 return true; 2921} 2922 2923static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a) 2924{ 2925 TCGv_ptr fpst; 2926 TCGv_i32 tmp; 2927 TCGv_i32 tcg_rmode; 2928 2929 if (!dc_isar_feature(aa32_fp16_arith, s)) { 2930 return false; 2931 } 2932 2933 if (!vfp_access_check(s)) { 2934 return true; 2935 } 2936 2937 tmp = tcg_temp_new_i32(); 2938 vfp_load_reg32(tmp, a->vm); 2939 fpst = fpstatus_ptr(FPST_FPCR_F16); 2940 tcg_rmode = tcg_const_i32(float_round_to_zero); 2941 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 2942 gen_helper_rinth(tmp, tmp, fpst); 2943 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 2944 vfp_store_reg32(tmp, a->vd); 2945 tcg_temp_free_ptr(fpst); 2946 tcg_temp_free_i32(tcg_rmode); 2947 tcg_temp_free_i32(tmp); 2948 return true; 2949} 2950 2951static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a) 2952{ 2953 TCGv_ptr fpst; 2954 TCGv_i32 tmp; 2955 TCGv_i32 tcg_rmode; 2956 2957 if (!dc_isar_feature(aa32_vrint, s)) { 2958 return false; 2959 } 2960 2961 if (!vfp_access_check(s)) { 2962 return true; 2963 } 2964 2965 tmp = tcg_temp_new_i32(); 2966 vfp_load_reg32(tmp, a->vm); 2967 fpst = fpstatus_ptr(FPST_FPCR); 2968 tcg_rmode = tcg_const_i32(float_round_to_zero); 2969 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 2970 gen_helper_rints(tmp, tmp, fpst); 2971 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 2972 vfp_store_reg32(tmp, a->vd); 2973 tcg_temp_free_ptr(fpst); 2974 tcg_temp_free_i32(tcg_rmode); 2975 tcg_temp_free_i32(tmp); 2976 return true; 2977} 2978 2979static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a) 2980{ 2981 TCGv_ptr fpst; 2982 TCGv_i64 tmp; 2983 TCGv_i32 tcg_rmode; 2984 2985 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2986 return false; 2987 } 2988 2989 if (!dc_isar_feature(aa32_vrint, s)) { 2990 return false; 2991 } 2992 2993 /* UNDEF accesses to D16-D31 if they don't exist. */ 2994 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { 2995 return false; 2996 } 2997 2998 if (!vfp_access_check(s)) { 2999 return true; 3000 } 3001 3002 tmp = tcg_temp_new_i64(); 3003 vfp_load_reg64(tmp, a->vm); 3004 fpst = fpstatus_ptr(FPST_FPCR); 3005 tcg_rmode = tcg_const_i32(float_round_to_zero); 3006 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 3007 gen_helper_rintd(tmp, tmp, fpst); 3008 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 3009 vfp_store_reg64(tmp, a->vd); 3010 tcg_temp_free_ptr(fpst); 3011 tcg_temp_free_i64(tmp); 3012 tcg_temp_free_i32(tcg_rmode); 3013 return true; 3014} 3015 3016static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a) 3017{ 3018 TCGv_ptr fpst; 3019 TCGv_i32 tmp; 3020 3021 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3022 return false; 3023 } 3024 3025 if (!vfp_access_check(s)) { 3026 return true; 3027 } 3028 3029 tmp = tcg_temp_new_i32(); 3030 vfp_load_reg32(tmp, a->vm); 3031 fpst = fpstatus_ptr(FPST_FPCR_F16); 3032 gen_helper_rinth_exact(tmp, tmp, fpst); 3033 vfp_store_reg32(tmp, a->vd); 3034 tcg_temp_free_ptr(fpst); 3035 tcg_temp_free_i32(tmp); 3036 return true; 3037} 3038 3039static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a) 3040{ 3041 TCGv_ptr fpst; 3042 TCGv_i32 tmp; 3043 3044 if (!dc_isar_feature(aa32_vrint, s)) { 3045 return false; 3046 } 3047 3048 if (!vfp_access_check(s)) { 3049 return true; 3050 } 3051 3052 tmp = tcg_temp_new_i32(); 3053 vfp_load_reg32(tmp, a->vm); 3054 fpst = fpstatus_ptr(FPST_FPCR); 3055 gen_helper_rints_exact(tmp, tmp, fpst); 3056 vfp_store_reg32(tmp, a->vd); 3057 tcg_temp_free_ptr(fpst); 3058 tcg_temp_free_i32(tmp); 3059 return true; 3060} 3061 3062static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a) 3063{ 3064 TCGv_ptr fpst; 3065 TCGv_i64 tmp; 3066 3067 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 3068 return false; 3069 } 3070 3071 if (!dc_isar_feature(aa32_vrint, s)) { 3072 return false; 3073 } 3074 3075 /* UNDEF accesses to D16-D31 if they don't exist. */ 3076 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { 3077 return false; 3078 } 3079 3080 if (!vfp_access_check(s)) { 3081 return true; 3082 } 3083 3084 tmp = tcg_temp_new_i64(); 3085 vfp_load_reg64(tmp, a->vm); 3086 fpst = fpstatus_ptr(FPST_FPCR); 3087 gen_helper_rintd_exact(tmp, tmp, fpst); 3088 vfp_store_reg64(tmp, a->vd); 3089 tcg_temp_free_ptr(fpst); 3090 tcg_temp_free_i64(tmp); 3091 return true; 3092} 3093 3094static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a) 3095{ 3096 TCGv_i64 vd; 3097 TCGv_i32 vm; 3098 3099 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 3100 return false; 3101 } 3102 3103 /* UNDEF accesses to D16-D31 if they don't exist. */ 3104 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 3105 return false; 3106 } 3107 3108 if (!vfp_access_check(s)) { 3109 return true; 3110 } 3111 3112 vm = tcg_temp_new_i32(); 3113 vd = tcg_temp_new_i64(); 3114 vfp_load_reg32(vm, a->vm); 3115 gen_helper_vfp_fcvtds(vd, vm, cpu_env); 3116 vfp_store_reg64(vd, a->vd); 3117 tcg_temp_free_i32(vm); 3118 tcg_temp_free_i64(vd); 3119 return true; 3120} 3121 3122static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a) 3123{ 3124 TCGv_i64 vm; 3125 TCGv_i32 vd; 3126 3127 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 3128 return false; 3129 } 3130 3131 /* UNDEF accesses to D16-D31 if they don't exist. */ 3132 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 3133 return false; 3134 } 3135 3136 if (!vfp_access_check(s)) { 3137 return true; 3138 } 3139 3140 vd = tcg_temp_new_i32(); 3141 vm = tcg_temp_new_i64(); 3142 vfp_load_reg64(vm, a->vm); 3143 gen_helper_vfp_fcvtsd(vd, vm, cpu_env); 3144 vfp_store_reg32(vd, a->vd); 3145 tcg_temp_free_i32(vd); 3146 tcg_temp_free_i64(vm); 3147 return true; 3148} 3149 3150static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a) 3151{ 3152 TCGv_i32 vm; 3153 TCGv_ptr fpst; 3154 3155 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3156 return false; 3157 } 3158 3159 if (!vfp_access_check(s)) { 3160 return true; 3161 } 3162 3163 vm = tcg_temp_new_i32(); 3164 vfp_load_reg32(vm, a->vm); 3165 fpst = fpstatus_ptr(FPST_FPCR_F16); 3166 if (a->s) { 3167 /* i32 -> f16 */ 3168 gen_helper_vfp_sitoh(vm, vm, fpst); 3169 } else { 3170 /* u32 -> f16 */ 3171 gen_helper_vfp_uitoh(vm, vm, fpst); 3172 } 3173 vfp_store_reg32(vm, a->vd); 3174 tcg_temp_free_i32(vm); 3175 tcg_temp_free_ptr(fpst); 3176 return true; 3177} 3178 3179static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a) 3180{ 3181 TCGv_i32 vm; 3182 TCGv_ptr fpst; 3183 3184 if (!dc_isar_feature(aa32_fpsp_v2, s)) { 3185 return false; 3186 } 3187 3188 if (!vfp_access_check(s)) { 3189 return true; 3190 } 3191 3192 vm = tcg_temp_new_i32(); 3193 vfp_load_reg32(vm, a->vm); 3194 fpst = fpstatus_ptr(FPST_FPCR); 3195 if (a->s) { 3196 /* i32 -> f32 */ 3197 gen_helper_vfp_sitos(vm, vm, fpst); 3198 } else { 3199 /* u32 -> f32 */ 3200 gen_helper_vfp_uitos(vm, vm, fpst); 3201 } 3202 vfp_store_reg32(vm, a->vd); 3203 tcg_temp_free_i32(vm); 3204 tcg_temp_free_ptr(fpst); 3205 return true; 3206} 3207 3208static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a) 3209{ 3210 TCGv_i32 vm; 3211 TCGv_i64 vd; 3212 TCGv_ptr fpst; 3213 3214 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 3215 return false; 3216 } 3217 3218 /* UNDEF accesses to D16-D31 if they don't exist. */ 3219 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 3220 return false; 3221 } 3222 3223 if (!vfp_access_check(s)) { 3224 return true; 3225 } 3226 3227 vm = tcg_temp_new_i32(); 3228 vd = tcg_temp_new_i64(); 3229 vfp_load_reg32(vm, a->vm); 3230 fpst = fpstatus_ptr(FPST_FPCR); 3231 if (a->s) { 3232 /* i32 -> f64 */ 3233 gen_helper_vfp_sitod(vd, vm, fpst); 3234 } else { 3235 /* u32 -> f64 */ 3236 gen_helper_vfp_uitod(vd, vm, fpst); 3237 } 3238 vfp_store_reg64(vd, a->vd); 3239 tcg_temp_free_i32(vm); 3240 tcg_temp_free_i64(vd); 3241 tcg_temp_free_ptr(fpst); 3242 return true; 3243} 3244 3245static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a) 3246{ 3247 TCGv_i32 vd; 3248 TCGv_i64 vm; 3249 3250 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 3251 return false; 3252 } 3253 3254 if (!dc_isar_feature(aa32_jscvt, s)) { 3255 return false; 3256 } 3257 3258 /* UNDEF accesses to D16-D31 if they don't exist. */ 3259 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 3260 return false; 3261 } 3262 3263 if (!vfp_access_check(s)) { 3264 return true; 3265 } 3266 3267 vm = tcg_temp_new_i64(); 3268 vd = tcg_temp_new_i32(); 3269 vfp_load_reg64(vm, a->vm); 3270 gen_helper_vjcvt(vd, vm, cpu_env); 3271 vfp_store_reg32(vd, a->vd); 3272 tcg_temp_free_i64(vm); 3273 tcg_temp_free_i32(vd); 3274 return true; 3275} 3276 3277static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a) 3278{ 3279 TCGv_i32 vd, shift; 3280 TCGv_ptr fpst; 3281 int frac_bits; 3282 3283 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3284 return false; 3285 } 3286 3287 if (!vfp_access_check(s)) { 3288 return true; 3289 } 3290 3291 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm); 3292 3293 vd = tcg_temp_new_i32(); 3294 vfp_load_reg32(vd, a->vd); 3295 3296 fpst = fpstatus_ptr(FPST_FPCR_F16); 3297 shift = tcg_const_i32(frac_bits); 3298 3299 /* Switch on op:U:sx bits */ 3300 switch (a->opc) { 3301 case 0: 3302 gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst); 3303 break; 3304 case 1: 3305 gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst); 3306 break; 3307 case 2: 3308 gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst); 3309 break; 3310 case 3: 3311 gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst); 3312 break; 3313 case 4: 3314 gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst); 3315 break; 3316 case 5: 3317 gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst); 3318 break; 3319 case 6: 3320 gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst); 3321 break; 3322 case 7: 3323 gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst); 3324 break; 3325 default: 3326 g_assert_not_reached(); 3327 } 3328 3329 vfp_store_reg32(vd, a->vd); 3330 tcg_temp_free_i32(vd); 3331 tcg_temp_free_i32(shift); 3332 tcg_temp_free_ptr(fpst); 3333 return true; 3334} 3335 3336static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a) 3337{ 3338 TCGv_i32 vd, shift; 3339 TCGv_ptr fpst; 3340 int frac_bits; 3341 3342 if (!dc_isar_feature(aa32_fpsp_v3, s)) { 3343 return false; 3344 } 3345 3346 if (!vfp_access_check(s)) { 3347 return true; 3348 } 3349 3350 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm); 3351 3352 vd = tcg_temp_new_i32(); 3353 vfp_load_reg32(vd, a->vd); 3354 3355 fpst = fpstatus_ptr(FPST_FPCR); 3356 shift = tcg_const_i32(frac_bits); 3357 3358 /* Switch on op:U:sx bits */ 3359 switch (a->opc) { 3360 case 0: 3361 gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst); 3362 break; 3363 case 1: 3364 gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst); 3365 break; 3366 case 2: 3367 gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst); 3368 break; 3369 case 3: 3370 gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst); 3371 break; 3372 case 4: 3373 gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst); 3374 break; 3375 case 5: 3376 gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst); 3377 break; 3378 case 6: 3379 gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst); 3380 break; 3381 case 7: 3382 gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst); 3383 break; 3384 default: 3385 g_assert_not_reached(); 3386 } 3387 3388 vfp_store_reg32(vd, a->vd); 3389 tcg_temp_free_i32(vd); 3390 tcg_temp_free_i32(shift); 3391 tcg_temp_free_ptr(fpst); 3392 return true; 3393} 3394 3395static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a) 3396{ 3397 TCGv_i64 vd; 3398 TCGv_i32 shift; 3399 TCGv_ptr fpst; 3400 int frac_bits; 3401 3402 if (!dc_isar_feature(aa32_fpdp_v3, s)) { 3403 return false; 3404 } 3405 3406 /* UNDEF accesses to D16-D31 if they don't exist. */ 3407 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 3408 return false; 3409 } 3410 3411 if (!vfp_access_check(s)) { 3412 return true; 3413 } 3414 3415 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm); 3416 3417 vd = tcg_temp_new_i64(); 3418 vfp_load_reg64(vd, a->vd); 3419 3420 fpst = fpstatus_ptr(FPST_FPCR); 3421 shift = tcg_const_i32(frac_bits); 3422 3423 /* Switch on op:U:sx bits */ 3424 switch (a->opc) { 3425 case 0: 3426 gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst); 3427 break; 3428 case 1: 3429 gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst); 3430 break; 3431 case 2: 3432 gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst); 3433 break; 3434 case 3: 3435 gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst); 3436 break; 3437 case 4: 3438 gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst); 3439 break; 3440 case 5: 3441 gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst); 3442 break; 3443 case 6: 3444 gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst); 3445 break; 3446 case 7: 3447 gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst); 3448 break; 3449 default: 3450 g_assert_not_reached(); 3451 } 3452 3453 vfp_store_reg64(vd, a->vd); 3454 tcg_temp_free_i64(vd); 3455 tcg_temp_free_i32(shift); 3456 tcg_temp_free_ptr(fpst); 3457 return true; 3458} 3459 3460static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a) 3461{ 3462 TCGv_i32 vm; 3463 TCGv_ptr fpst; 3464 3465 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3466 return false; 3467 } 3468 3469 if (!vfp_access_check(s)) { 3470 return true; 3471 } 3472 3473 fpst = fpstatus_ptr(FPST_FPCR_F16); 3474 vm = tcg_temp_new_i32(); 3475 vfp_load_reg32(vm, a->vm); 3476 3477 if (a->s) { 3478 if (a->rz) { 3479 gen_helper_vfp_tosizh(vm, vm, fpst); 3480 } else { 3481 gen_helper_vfp_tosih(vm, vm, fpst); 3482 } 3483 } else { 3484 if (a->rz) { 3485 gen_helper_vfp_touizh(vm, vm, fpst); 3486 } else { 3487 gen_helper_vfp_touih(vm, vm, fpst); 3488 } 3489 } 3490 vfp_store_reg32(vm, a->vd); 3491 tcg_temp_free_i32(vm); 3492 tcg_temp_free_ptr(fpst); 3493 return true; 3494} 3495 3496static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a) 3497{ 3498 TCGv_i32 vm; 3499 TCGv_ptr fpst; 3500 3501 if (!dc_isar_feature(aa32_fpsp_v2, s)) { 3502 return false; 3503 } 3504 3505 if (!vfp_access_check(s)) { 3506 return true; 3507 } 3508 3509 fpst = fpstatus_ptr(FPST_FPCR); 3510 vm = tcg_temp_new_i32(); 3511 vfp_load_reg32(vm, a->vm); 3512 3513 if (a->s) { 3514 if (a->rz) { 3515 gen_helper_vfp_tosizs(vm, vm, fpst); 3516 } else { 3517 gen_helper_vfp_tosis(vm, vm, fpst); 3518 } 3519 } else { 3520 if (a->rz) { 3521 gen_helper_vfp_touizs(vm, vm, fpst); 3522 } else { 3523 gen_helper_vfp_touis(vm, vm, fpst); 3524 } 3525 } 3526 vfp_store_reg32(vm, a->vd); 3527 tcg_temp_free_i32(vm); 3528 tcg_temp_free_ptr(fpst); 3529 return true; 3530} 3531 3532static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a) 3533{ 3534 TCGv_i32 vd; 3535 TCGv_i64 vm; 3536 TCGv_ptr fpst; 3537 3538 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 3539 return false; 3540 } 3541 3542 /* UNDEF accesses to D16-D31 if they don't exist. */ 3543 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 3544 return false; 3545 } 3546 3547 if (!vfp_access_check(s)) { 3548 return true; 3549 } 3550 3551 fpst = fpstatus_ptr(FPST_FPCR); 3552 vm = tcg_temp_new_i64(); 3553 vd = tcg_temp_new_i32(); 3554 vfp_load_reg64(vm, a->vm); 3555 3556 if (a->s) { 3557 if (a->rz) { 3558 gen_helper_vfp_tosizd(vd, vm, fpst); 3559 } else { 3560 gen_helper_vfp_tosid(vd, vm, fpst); 3561 } 3562 } else { 3563 if (a->rz) { 3564 gen_helper_vfp_touizd(vd, vm, fpst); 3565 } else { 3566 gen_helper_vfp_touid(vd, vm, fpst); 3567 } 3568 } 3569 vfp_store_reg32(vd, a->vd); 3570 tcg_temp_free_i32(vd); 3571 tcg_temp_free_i64(vm); 3572 tcg_temp_free_ptr(fpst); 3573 return true; 3574} 3575 3576static bool trans_VINS(DisasContext *s, arg_VINS *a) 3577{ 3578 TCGv_i32 rd, rm; 3579 3580 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3581 return false; 3582 } 3583 3584 if (s->vec_len != 0 || s->vec_stride != 0) { 3585 return false; 3586 } 3587 3588 if (!vfp_access_check(s)) { 3589 return true; 3590 } 3591 3592 /* Insert low half of Vm into high half of Vd */ 3593 rm = tcg_temp_new_i32(); 3594 rd = tcg_temp_new_i32(); 3595 vfp_load_reg32(rm, a->vm); 3596 vfp_load_reg32(rd, a->vd); 3597 tcg_gen_deposit_i32(rd, rd, rm, 16, 16); 3598 vfp_store_reg32(rd, a->vd); 3599 tcg_temp_free_i32(rm); 3600 tcg_temp_free_i32(rd); 3601 return true; 3602} 3603 3604static bool trans_VMOVX(DisasContext *s, arg_VINS *a) 3605{ 3606 TCGv_i32 rm; 3607 3608 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3609 return false; 3610 } 3611 3612 if (s->vec_len != 0 || s->vec_stride != 0) { 3613 return false; 3614 } 3615 3616 if (!vfp_access_check(s)) { 3617 return true; 3618 } 3619 3620 /* Set Vd to high half of Vm */ 3621 rm = tcg_temp_new_i32(); 3622 vfp_load_reg32(rm, a->vm); 3623 tcg_gen_shri_i32(rm, rm, 16); 3624 vfp_store_reg32(rm, a->vd); 3625 tcg_temp_free_i32(rm); 3626 return true; 3627}