helper-a64.c (33771B)
1/* 2 * AArch64 specific helpers 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20#include "qemu/osdep.h" 21#include "qemu/units.h" 22#include "cpu.h" 23#include "exec/gdbstub.h" 24#include "exec/helper-proto.h" 25#include "qemu/host-utils.h" 26#include "qemu/log.h" 27#include "qemu/main-loop.h" 28#include "qemu/bitops.h" 29#include "internals.h" 30#include "qemu/crc32c.h" 31#include "exec/exec-all.h" 32#include "exec/cpu_ldst.h" 33#include "qemu/int128.h" 34#include "qemu/atomic128.h" 35#include "tcg/tcg.h" 36#include "fpu/softfloat.h" 37#include <zlib.h> /* For crc32 */ 38 39/* C2.4.7 Multiply and divide */ 40/* special cases for 0 and LLONG_MIN are mandated by the standard */ 41uint64_t HELPER(udiv64)(uint64_t num, uint64_t den) 42{ 43 if (den == 0) { 44 return 0; 45 } 46 return num / den; 47} 48 49int64_t HELPER(sdiv64)(int64_t num, int64_t den) 50{ 51 if (den == 0) { 52 return 0; 53 } 54 if (num == LLONG_MIN && den == -1) { 55 return LLONG_MIN; 56 } 57 return num / den; 58} 59 60uint64_t HELPER(rbit64)(uint64_t x) 61{ 62 return revbit64(x); 63} 64 65void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm) 66{ 67 update_spsel(env, imm); 68} 69 70static void daif_check(CPUARMState *env, uint32_t op, 71 uint32_t imm, uintptr_t ra) 72{ 73 /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set. */ 74 if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) { 75 raise_exception_ra(env, EXCP_UDEF, 76 syn_aa64_sysregtrap(0, extract32(op, 0, 3), 77 extract32(op, 3, 3), 4, 78 imm, 0x1f, 0), 79 exception_target_el(env), ra); 80 } 81} 82 83void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm) 84{ 85 daif_check(env, 0x1e, imm, GETPC()); 86 env->daif |= (imm << 6) & PSTATE_DAIF; 87} 88 89void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm) 90{ 91 daif_check(env, 0x1f, imm, GETPC()); 92 env->daif &= ~((imm << 6) & PSTATE_DAIF); 93} 94 95/* Convert a softfloat float_relation_ (as returned by 96 * the float*_compare functions) to the correct ARM 97 * NZCV flag state. 98 */ 99static inline uint32_t float_rel_to_flags(int res) 100{ 101 uint64_t flags; 102 switch (res) { 103 case float_relation_equal: 104 flags = PSTATE_Z | PSTATE_C; 105 break; 106 case float_relation_less: 107 flags = PSTATE_N; 108 break; 109 case float_relation_greater: 110 flags = PSTATE_C; 111 break; 112 case float_relation_unordered: 113 default: 114 flags = PSTATE_C | PSTATE_V; 115 break; 116 } 117 return flags; 118} 119 120uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, void *fp_status) 121{ 122 return float_rel_to_flags(float16_compare_quiet(x, y, fp_status)); 123} 124 125uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, void *fp_status) 126{ 127 return float_rel_to_flags(float16_compare(x, y, fp_status)); 128} 129 130uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status) 131{ 132 return float_rel_to_flags(float32_compare_quiet(x, y, fp_status)); 133} 134 135uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status) 136{ 137 return float_rel_to_flags(float32_compare(x, y, fp_status)); 138} 139 140uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status) 141{ 142 return float_rel_to_flags(float64_compare_quiet(x, y, fp_status)); 143} 144 145uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status) 146{ 147 return float_rel_to_flags(float64_compare(x, y, fp_status)); 148} 149 150float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp) 151{ 152 float_status *fpst = fpstp; 153 154 a = float32_squash_input_denormal(a, fpst); 155 b = float32_squash_input_denormal(b, fpst); 156 157 if ((float32_is_zero(a) && float32_is_infinity(b)) || 158 (float32_is_infinity(a) && float32_is_zero(b))) { 159 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 160 return make_float32((1U << 30) | 161 ((float32_val(a) ^ float32_val(b)) & (1U << 31))); 162 } 163 return float32_mul(a, b, fpst); 164} 165 166float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp) 167{ 168 float_status *fpst = fpstp; 169 170 a = float64_squash_input_denormal(a, fpst); 171 b = float64_squash_input_denormal(b, fpst); 172 173 if ((float64_is_zero(a) && float64_is_infinity(b)) || 174 (float64_is_infinity(a) && float64_is_zero(b))) { 175 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 176 return make_float64((1ULL << 62) | 177 ((float64_val(a) ^ float64_val(b)) & (1ULL << 63))); 178 } 179 return float64_mul(a, b, fpst); 180} 181 182/* 64bit/double versions of the neon float compare functions */ 183uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp) 184{ 185 float_status *fpst = fpstp; 186 return -float64_eq_quiet(a, b, fpst); 187} 188 189uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp) 190{ 191 float_status *fpst = fpstp; 192 return -float64_le(b, a, fpst); 193} 194 195uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp) 196{ 197 float_status *fpst = fpstp; 198 return -float64_lt(b, a, fpst); 199} 200 201/* Reciprocal step and sqrt step. Note that unlike the A32/T32 202 * versions, these do a fully fused multiply-add or 203 * multiply-add-and-halve. 204 */ 205 206uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp) 207{ 208 float_status *fpst = fpstp; 209 210 a = float16_squash_input_denormal(a, fpst); 211 b = float16_squash_input_denormal(b, fpst); 212 213 a = float16_chs(a); 214 if ((float16_is_infinity(a) && float16_is_zero(b)) || 215 (float16_is_infinity(b) && float16_is_zero(a))) { 216 return float16_two; 217 } 218 return float16_muladd(a, b, float16_two, 0, fpst); 219} 220 221float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp) 222{ 223 float_status *fpst = fpstp; 224 225 a = float32_squash_input_denormal(a, fpst); 226 b = float32_squash_input_denormal(b, fpst); 227 228 a = float32_chs(a); 229 if ((float32_is_infinity(a) && float32_is_zero(b)) || 230 (float32_is_infinity(b) && float32_is_zero(a))) { 231 return float32_two; 232 } 233 return float32_muladd(a, b, float32_two, 0, fpst); 234} 235 236float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp) 237{ 238 float_status *fpst = fpstp; 239 240 a = float64_squash_input_denormal(a, fpst); 241 b = float64_squash_input_denormal(b, fpst); 242 243 a = float64_chs(a); 244 if ((float64_is_infinity(a) && float64_is_zero(b)) || 245 (float64_is_infinity(b) && float64_is_zero(a))) { 246 return float64_two; 247 } 248 return float64_muladd(a, b, float64_two, 0, fpst); 249} 250 251uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp) 252{ 253 float_status *fpst = fpstp; 254 255 a = float16_squash_input_denormal(a, fpst); 256 b = float16_squash_input_denormal(b, fpst); 257 258 a = float16_chs(a); 259 if ((float16_is_infinity(a) && float16_is_zero(b)) || 260 (float16_is_infinity(b) && float16_is_zero(a))) { 261 return float16_one_point_five; 262 } 263 return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst); 264} 265 266float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp) 267{ 268 float_status *fpst = fpstp; 269 270 a = float32_squash_input_denormal(a, fpst); 271 b = float32_squash_input_denormal(b, fpst); 272 273 a = float32_chs(a); 274 if ((float32_is_infinity(a) && float32_is_zero(b)) || 275 (float32_is_infinity(b) && float32_is_zero(a))) { 276 return float32_one_point_five; 277 } 278 return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst); 279} 280 281float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp) 282{ 283 float_status *fpst = fpstp; 284 285 a = float64_squash_input_denormal(a, fpst); 286 b = float64_squash_input_denormal(b, fpst); 287 288 a = float64_chs(a); 289 if ((float64_is_infinity(a) && float64_is_zero(b)) || 290 (float64_is_infinity(b) && float64_is_zero(a))) { 291 return float64_one_point_five; 292 } 293 return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst); 294} 295 296/* Pairwise long add: add pairs of adjacent elements into 297 * double-width elements in the result (eg _s8 is an 8x8->16 op) 298 */ 299uint64_t HELPER(neon_addlp_s8)(uint64_t a) 300{ 301 uint64_t nsignmask = 0x0080008000800080ULL; 302 uint64_t wsignmask = 0x8000800080008000ULL; 303 uint64_t elementmask = 0x00ff00ff00ff00ffULL; 304 uint64_t tmp1, tmp2; 305 uint64_t res, signres; 306 307 /* Extract odd elements, sign extend each to a 16 bit field */ 308 tmp1 = a & elementmask; 309 tmp1 ^= nsignmask; 310 tmp1 |= wsignmask; 311 tmp1 = (tmp1 - nsignmask) ^ wsignmask; 312 /* Ditto for the even elements */ 313 tmp2 = (a >> 8) & elementmask; 314 tmp2 ^= nsignmask; 315 tmp2 |= wsignmask; 316 tmp2 = (tmp2 - nsignmask) ^ wsignmask; 317 318 /* calculate the result by summing bits 0..14, 16..22, etc, 319 * and then adjusting the sign bits 15, 23, etc manually. 320 * This ensures the addition can't overflow the 16 bit field. 321 */ 322 signres = (tmp1 ^ tmp2) & wsignmask; 323 res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask); 324 res ^= signres; 325 326 return res; 327} 328 329uint64_t HELPER(neon_addlp_u8)(uint64_t a) 330{ 331 uint64_t tmp; 332 333 tmp = a & 0x00ff00ff00ff00ffULL; 334 tmp += (a >> 8) & 0x00ff00ff00ff00ffULL; 335 return tmp; 336} 337 338uint64_t HELPER(neon_addlp_s16)(uint64_t a) 339{ 340 int32_t reslo, reshi; 341 342 reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16); 343 reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48); 344 345 return (uint32_t)reslo | (((uint64_t)reshi) << 32); 346} 347 348uint64_t HELPER(neon_addlp_u16)(uint64_t a) 349{ 350 uint64_t tmp; 351 352 tmp = a & 0x0000ffff0000ffffULL; 353 tmp += (a >> 16) & 0x0000ffff0000ffffULL; 354 return tmp; 355} 356 357/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ 358uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp) 359{ 360 float_status *fpst = fpstp; 361 uint16_t val16, sbit; 362 int16_t exp; 363 364 if (float16_is_any_nan(a)) { 365 float16 nan = a; 366 if (float16_is_signaling_nan(a, fpst)) { 367 float_raise(float_flag_invalid, fpst); 368 if (!fpst->default_nan_mode) { 369 nan = float16_silence_nan(a, fpst); 370 } 371 } 372 if (fpst->default_nan_mode) { 373 nan = float16_default_nan(fpst); 374 } 375 return nan; 376 } 377 378 a = float16_squash_input_denormal(a, fpst); 379 380 val16 = float16_val(a); 381 sbit = 0x8000 & val16; 382 exp = extract32(val16, 10, 5); 383 384 if (exp == 0) { 385 return make_float16(deposit32(sbit, 10, 5, 0x1e)); 386 } else { 387 return make_float16(deposit32(sbit, 10, 5, ~exp)); 388 } 389} 390 391float32 HELPER(frecpx_f32)(float32 a, void *fpstp) 392{ 393 float_status *fpst = fpstp; 394 uint32_t val32, sbit; 395 int32_t exp; 396 397 if (float32_is_any_nan(a)) { 398 float32 nan = a; 399 if (float32_is_signaling_nan(a, fpst)) { 400 float_raise(float_flag_invalid, fpst); 401 if (!fpst->default_nan_mode) { 402 nan = float32_silence_nan(a, fpst); 403 } 404 } 405 if (fpst->default_nan_mode) { 406 nan = float32_default_nan(fpst); 407 } 408 return nan; 409 } 410 411 a = float32_squash_input_denormal(a, fpst); 412 413 val32 = float32_val(a); 414 sbit = 0x80000000ULL & val32; 415 exp = extract32(val32, 23, 8); 416 417 if (exp == 0) { 418 return make_float32(sbit | (0xfe << 23)); 419 } else { 420 return make_float32(sbit | (~exp & 0xff) << 23); 421 } 422} 423 424float64 HELPER(frecpx_f64)(float64 a, void *fpstp) 425{ 426 float_status *fpst = fpstp; 427 uint64_t val64, sbit; 428 int64_t exp; 429 430 if (float64_is_any_nan(a)) { 431 float64 nan = a; 432 if (float64_is_signaling_nan(a, fpst)) { 433 float_raise(float_flag_invalid, fpst); 434 if (!fpst->default_nan_mode) { 435 nan = float64_silence_nan(a, fpst); 436 } 437 } 438 if (fpst->default_nan_mode) { 439 nan = float64_default_nan(fpst); 440 } 441 return nan; 442 } 443 444 a = float64_squash_input_denormal(a, fpst); 445 446 val64 = float64_val(a); 447 sbit = 0x8000000000000000ULL & val64; 448 exp = extract64(float64_val(a), 52, 11); 449 450 if (exp == 0) { 451 return make_float64(sbit | (0x7feULL << 52)); 452 } else { 453 return make_float64(sbit | (~exp & 0x7ffULL) << 52); 454 } 455} 456 457float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env) 458{ 459 /* Von Neumann rounding is implemented by using round-to-zero 460 * and then setting the LSB of the result if Inexact was raised. 461 */ 462 float32 r; 463 float_status *fpst = &env->vfp.fp_status; 464 float_status tstat = *fpst; 465 int exflags; 466 467 set_float_rounding_mode(float_round_to_zero, &tstat); 468 set_float_exception_flags(0, &tstat); 469 r = float64_to_float32(a, &tstat); 470 exflags = get_float_exception_flags(&tstat); 471 if (exflags & float_flag_inexact) { 472 r = make_float32(float32_val(r) | 1); 473 } 474 exflags |= get_float_exception_flags(fpst); 475 set_float_exception_flags(exflags, fpst); 476 return r; 477} 478 479/* 64-bit versions of the CRC helpers. Note that although the operation 480 * (and the prototypes of crc32c() and crc32() mean that only the bottom 481 * 32 bits of the accumulator and result are used, we pass and return 482 * uint64_t for convenience of the generated code. Unlike the 32-bit 483 * instruction set versions, val may genuinely have 64 bits of data in it. 484 * The upper bytes of val (above the number specified by 'bytes') must have 485 * been zeroed out by the caller. 486 */ 487uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes) 488{ 489 uint8_t buf[8]; 490 491 stq_le_p(buf, val); 492 493 /* zlib crc32 converts the accumulator and output to one's complement. */ 494 return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; 495} 496 497uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) 498{ 499 uint8_t buf[8]; 500 501 stq_le_p(buf, val); 502 503 /* Linux crc32c converts the output to one's complement. */ 504 return crc32c(acc, buf, bytes) ^ 0xffffffff; 505} 506 507uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, 508 uint64_t new_lo, uint64_t new_hi) 509{ 510 Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); 511 Int128 newv = int128_make128(new_lo, new_hi); 512 Int128 oldv; 513 uintptr_t ra = GETPC(); 514 uint64_t o0, o1; 515 bool success; 516 517#ifdef CONFIG_USER_ONLY 518 /* ??? Enforce alignment. */ 519 uint64_t *haddr = g2h(env_cpu(env), addr); 520 521 set_helper_retaddr(ra); 522 o0 = ldq_le_p(haddr + 0); 523 o1 = ldq_le_p(haddr + 1); 524 oldv = int128_make128(o0, o1); 525 526 success = int128_eq(oldv, cmpv); 527 if (success) { 528 stq_le_p(haddr + 0, int128_getlo(newv)); 529 stq_le_p(haddr + 1, int128_gethi(newv)); 530 } 531 clear_helper_retaddr(); 532#else 533 int mem_idx = cpu_mmu_index(env, false); 534 MemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); 535 MemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); 536 537 o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); 538 o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); 539 oldv = int128_make128(o0, o1); 540 541 success = int128_eq(oldv, cmpv); 542 if (success) { 543 helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); 544 helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); 545 } 546#endif 547 548 return !success; 549} 550 551uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr, 552 uint64_t new_lo, uint64_t new_hi) 553{ 554 Int128 oldv, cmpv, newv; 555 uintptr_t ra = GETPC(); 556 bool success; 557 int mem_idx; 558 MemOpIdx oi; 559 560 assert(HAVE_CMPXCHG128); 561 562 mem_idx = cpu_mmu_index(env, false); 563 oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); 564 565 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); 566 newv = int128_make128(new_lo, new_hi); 567 oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); 568 569 success = int128_eq(oldv, cmpv); 570 return !success; 571} 572 573uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, 574 uint64_t new_lo, uint64_t new_hi) 575{ 576 /* 577 * High and low need to be switched here because this is not actually a 578 * 128bit store but two doublewords stored consecutively 579 */ 580 Int128 cmpv = int128_make128(env->exclusive_high, env->exclusive_val); 581 Int128 newv = int128_make128(new_hi, new_lo); 582 Int128 oldv; 583 uintptr_t ra = GETPC(); 584 uint64_t o0, o1; 585 bool success; 586 587#ifdef CONFIG_USER_ONLY 588 /* ??? Enforce alignment. */ 589 uint64_t *haddr = g2h(env_cpu(env), addr); 590 591 set_helper_retaddr(ra); 592 o1 = ldq_be_p(haddr + 0); 593 o0 = ldq_be_p(haddr + 1); 594 oldv = int128_make128(o0, o1); 595 596 success = int128_eq(oldv, cmpv); 597 if (success) { 598 stq_be_p(haddr + 0, int128_gethi(newv)); 599 stq_be_p(haddr + 1, int128_getlo(newv)); 600 } 601 clear_helper_retaddr(); 602#else 603 int mem_idx = cpu_mmu_index(env, false); 604 MemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); 605 MemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); 606 607 o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); 608 o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); 609 oldv = int128_make128(o0, o1); 610 611 success = int128_eq(oldv, cmpv); 612 if (success) { 613 helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); 614 helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); 615 } 616#endif 617 618 return !success; 619} 620 621uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr, 622 uint64_t new_lo, uint64_t new_hi) 623{ 624 Int128 oldv, cmpv, newv; 625 uintptr_t ra = GETPC(); 626 bool success; 627 int mem_idx; 628 MemOpIdx oi; 629 630 assert(HAVE_CMPXCHG128); 631 632 mem_idx = cpu_mmu_index(env, false); 633 oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); 634 635 /* 636 * High and low need to be switched here because this is not actually a 637 * 128bit store but two doublewords stored consecutively 638 */ 639 cmpv = int128_make128(env->exclusive_high, env->exclusive_val); 640 newv = int128_make128(new_hi, new_lo); 641 oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); 642 643 success = int128_eq(oldv, cmpv); 644 return !success; 645} 646 647/* Writes back the old data into Rs. */ 648void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, 649 uint64_t new_lo, uint64_t new_hi) 650{ 651 Int128 oldv, cmpv, newv; 652 uintptr_t ra = GETPC(); 653 int mem_idx; 654 MemOpIdx oi; 655 656 assert(HAVE_CMPXCHG128); 657 658 mem_idx = cpu_mmu_index(env, false); 659 oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); 660 661 cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]); 662 newv = int128_make128(new_lo, new_hi); 663 oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); 664 665 env->xregs[rs] = int128_getlo(oldv); 666 env->xregs[rs + 1] = int128_gethi(oldv); 667} 668 669void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, 670 uint64_t new_hi, uint64_t new_lo) 671{ 672 Int128 oldv, cmpv, newv; 673 uintptr_t ra = GETPC(); 674 int mem_idx; 675 MemOpIdx oi; 676 677 assert(HAVE_CMPXCHG128); 678 679 mem_idx = cpu_mmu_index(env, false); 680 oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); 681 682 cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]); 683 newv = int128_make128(new_lo, new_hi); 684 oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); 685 686 env->xregs[rs + 1] = int128_getlo(oldv); 687 env->xregs[rs] = int128_gethi(oldv); 688} 689 690/* 691 * AdvSIMD half-precision 692 */ 693 694#define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix)) 695 696#define ADVSIMD_HALFOP(name) \ 697uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \ 698{ \ 699 float_status *fpst = fpstp; \ 700 return float16_ ## name(a, b, fpst); \ 701} 702 703ADVSIMD_HALFOP(add) 704ADVSIMD_HALFOP(sub) 705ADVSIMD_HALFOP(mul) 706ADVSIMD_HALFOP(div) 707ADVSIMD_HALFOP(min) 708ADVSIMD_HALFOP(max) 709ADVSIMD_HALFOP(minnum) 710ADVSIMD_HALFOP(maxnum) 711 712#define ADVSIMD_TWOHALFOP(name) \ 713uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \ 714{ \ 715 float16 a1, a2, b1, b2; \ 716 uint32_t r1, r2; \ 717 float_status *fpst = fpstp; \ 718 a1 = extract32(two_a, 0, 16); \ 719 a2 = extract32(two_a, 16, 16); \ 720 b1 = extract32(two_b, 0, 16); \ 721 b2 = extract32(two_b, 16, 16); \ 722 r1 = float16_ ## name(a1, b1, fpst); \ 723 r2 = float16_ ## name(a2, b2, fpst); \ 724 return deposit32(r1, 16, 16, r2); \ 725} 726 727ADVSIMD_TWOHALFOP(add) 728ADVSIMD_TWOHALFOP(sub) 729ADVSIMD_TWOHALFOP(mul) 730ADVSIMD_TWOHALFOP(div) 731ADVSIMD_TWOHALFOP(min) 732ADVSIMD_TWOHALFOP(max) 733ADVSIMD_TWOHALFOP(minnum) 734ADVSIMD_TWOHALFOP(maxnum) 735 736/* Data processing - scalar floating-point and advanced SIMD */ 737static float16 float16_mulx(float16 a, float16 b, void *fpstp) 738{ 739 float_status *fpst = fpstp; 740 741 a = float16_squash_input_denormal(a, fpst); 742 b = float16_squash_input_denormal(b, fpst); 743 744 if ((float16_is_zero(a) && float16_is_infinity(b)) || 745 (float16_is_infinity(a) && float16_is_zero(b))) { 746 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ 747 return make_float16((1U << 14) | 748 ((float16_val(a) ^ float16_val(b)) & (1U << 15))); 749 } 750 return float16_mul(a, b, fpst); 751} 752 753ADVSIMD_HALFOP(mulx) 754ADVSIMD_TWOHALFOP(mulx) 755 756/* fused multiply-accumulate */ 757uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c, 758 void *fpstp) 759{ 760 float_status *fpst = fpstp; 761 return float16_muladd(a, b, c, 0, fpst); 762} 763 764uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b, 765 uint32_t two_c, void *fpstp) 766{ 767 float_status *fpst = fpstp; 768 float16 a1, a2, b1, b2, c1, c2; 769 uint32_t r1, r2; 770 a1 = extract32(two_a, 0, 16); 771 a2 = extract32(two_a, 16, 16); 772 b1 = extract32(two_b, 0, 16); 773 b2 = extract32(two_b, 16, 16); 774 c1 = extract32(two_c, 0, 16); 775 c2 = extract32(two_c, 16, 16); 776 r1 = float16_muladd(a1, b1, c1, 0, fpst); 777 r2 = float16_muladd(a2, b2, c2, 0, fpst); 778 return deposit32(r1, 16, 16, r2); 779} 780 781/* 782 * Floating point comparisons produce an integer result. Softfloat 783 * routines return float_relation types which we convert to the 0/-1 784 * Neon requires. 785 */ 786 787#define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0 788 789uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp) 790{ 791 float_status *fpst = fpstp; 792 int compare = float16_compare_quiet(a, b, fpst); 793 return ADVSIMD_CMPRES(compare == float_relation_equal); 794} 795 796uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp) 797{ 798 float_status *fpst = fpstp; 799 int compare = float16_compare(a, b, fpst); 800 return ADVSIMD_CMPRES(compare == float_relation_greater || 801 compare == float_relation_equal); 802} 803 804uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp) 805{ 806 float_status *fpst = fpstp; 807 int compare = float16_compare(a, b, fpst); 808 return ADVSIMD_CMPRES(compare == float_relation_greater); 809} 810 811uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp) 812{ 813 float_status *fpst = fpstp; 814 float16 f0 = float16_abs(a); 815 float16 f1 = float16_abs(b); 816 int compare = float16_compare(f0, f1, fpst); 817 return ADVSIMD_CMPRES(compare == float_relation_greater || 818 compare == float_relation_equal); 819} 820 821uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp) 822{ 823 float_status *fpst = fpstp; 824 float16 f0 = float16_abs(a); 825 float16 f1 = float16_abs(b); 826 int compare = float16_compare(f0, f1, fpst); 827 return ADVSIMD_CMPRES(compare == float_relation_greater); 828} 829 830/* round to integral */ 831uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status) 832{ 833 return float16_round_to_int(x, fp_status); 834} 835 836uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status) 837{ 838 int old_flags = get_float_exception_flags(fp_status), new_flags; 839 float16 ret; 840 841 ret = float16_round_to_int(x, fp_status); 842 843 /* Suppress any inexact exceptions the conversion produced */ 844 if (!(old_flags & float_flag_inexact)) { 845 new_flags = get_float_exception_flags(fp_status); 846 set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); 847 } 848 849 return ret; 850} 851 852/* 853 * Half-precision floating point conversion functions 854 * 855 * There are a multitude of conversion functions with various 856 * different rounding modes. This is dealt with by the calling code 857 * setting the mode appropriately before calling the helper. 858 */ 859 860uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp) 861{ 862 float_status *fpst = fpstp; 863 864 /* Invalid if we are passed a NaN */ 865 if (float16_is_any_nan(a)) { 866 float_raise(float_flag_invalid, fpst); 867 return 0; 868 } 869 return float16_to_int16(a, fpst); 870} 871 872uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp) 873{ 874 float_status *fpst = fpstp; 875 876 /* Invalid if we are passed a NaN */ 877 if (float16_is_any_nan(a)) { 878 float_raise(float_flag_invalid, fpst); 879 return 0; 880 } 881 return float16_to_uint16(a, fpst); 882} 883 884static int el_from_spsr(uint32_t spsr) 885{ 886 /* Return the exception level that this SPSR is requesting a return to, 887 * or -1 if it is invalid (an illegal return) 888 */ 889 if (spsr & PSTATE_nRW) { 890 switch (spsr & CPSR_M) { 891 case ARM_CPU_MODE_USR: 892 return 0; 893 case ARM_CPU_MODE_HYP: 894 return 2; 895 case ARM_CPU_MODE_FIQ: 896 case ARM_CPU_MODE_IRQ: 897 case ARM_CPU_MODE_SVC: 898 case ARM_CPU_MODE_ABT: 899 case ARM_CPU_MODE_UND: 900 case ARM_CPU_MODE_SYS: 901 return 1; 902 case ARM_CPU_MODE_MON: 903 /* Returning to Mon from AArch64 is never possible, 904 * so this is an illegal return. 905 */ 906 default: 907 return -1; 908 } 909 } else { 910 if (extract32(spsr, 1, 1)) { 911 /* Return with reserved M[1] bit set */ 912 return -1; 913 } 914 if (extract32(spsr, 0, 4) == 1) { 915 /* return to EL0 with M[0] bit set */ 916 return -1; 917 } 918 return extract32(spsr, 2, 2); 919 } 920} 921 922static void cpsr_write_from_spsr_elx(CPUARMState *env, 923 uint32_t val) 924{ 925 uint32_t mask; 926 927 /* Save SPSR_ELx.SS into PSTATE. */ 928 env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS); 929 val &= ~PSTATE_SS; 930 931 /* Move DIT to the correct location for CPSR */ 932 if (val & PSTATE_DIT) { 933 val &= ~PSTATE_DIT; 934 val |= CPSR_DIT; 935 } 936 937 mask = aarch32_cpsr_valid_mask(env->features, \ 938 &env_archcpu(env)->isar); 939 cpsr_write(env, val, mask, CPSRWriteRaw); 940} 941 942void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) 943{ 944 int cur_el = arm_current_el(env); 945 unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); 946 uint32_t spsr = env->banked_spsr[spsr_idx]; 947 int new_el; 948 bool return_to_aa64 = (spsr & PSTATE_nRW) == 0; 949 950 aarch64_save_sp(env, cur_el); 951 952 arm_clear_exclusive(env); 953 954 /* We must squash the PSTATE.SS bit to zero unless both of the 955 * following hold: 956 * 1. debug exceptions are currently disabled 957 * 2. singlestep will be active in the EL we return to 958 * We check 1 here and 2 after we've done the pstate/cpsr write() to 959 * transition to the EL we're going to. 960 */ 961 if (arm_generate_debug_exceptions(env)) { 962 spsr &= ~PSTATE_SS; 963 } 964 965 new_el = el_from_spsr(spsr); 966 if (new_el == -1) { 967 goto illegal_return; 968 } 969 if (new_el > cur_el || (new_el == 2 && !arm_is_el2_enabled(env))) { 970 /* Disallow return to an EL which is unimplemented or higher 971 * than the current one. 972 */ 973 goto illegal_return; 974 } 975 976 if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) { 977 /* Return to an EL which is configured for a different register width */ 978 goto illegal_return; 979 } 980 981 if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) { 982 goto illegal_return; 983 } 984 985 qemu_mutex_lock_iothread(); 986 arm_call_pre_el_change_hook(env_archcpu(env)); 987 qemu_mutex_unlock_iothread(); 988 989 if (!return_to_aa64) { 990 env->aarch64 = 0; 991 /* We do a raw CPSR write because aarch64_sync_64_to_32() 992 * will sort the register banks out for us, and we've already 993 * caught all the bad-mode cases in el_from_spsr(). 994 */ 995 cpsr_write_from_spsr_elx(env, spsr); 996 if (!arm_singlestep_active(env)) { 997 env->pstate &= ~PSTATE_SS; 998 } 999 aarch64_sync_64_to_32(env); 1000 1001 if (spsr & CPSR_T) { 1002 env->regs[15] = new_pc & ~0x1; 1003 } else { 1004 env->regs[15] = new_pc & ~0x3; 1005 } 1006 helper_rebuild_hflags_a32(env, new_el); 1007 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 1008 "AArch32 EL%d PC 0x%" PRIx32 "\n", 1009 cur_el, new_el, env->regs[15]); 1010 } else { 1011 int tbii; 1012 1013 env->aarch64 = 1; 1014 spsr &= aarch64_pstate_valid_mask(&env_archcpu(env)->isar); 1015 pstate_write(env, spsr); 1016 if (!arm_singlestep_active(env)) { 1017 env->pstate &= ~PSTATE_SS; 1018 } 1019 aarch64_restore_sp(env, new_el); 1020 helper_rebuild_hflags_a64(env, new_el); 1021 1022 /* 1023 * Apply TBI to the exception return address. We had to delay this 1024 * until after we selected the new EL, so that we could select the 1025 * correct TBI+TBID bits. This is made easier by waiting until after 1026 * the hflags rebuild, since we can pull the composite TBII field 1027 * from there. 1028 */ 1029 tbii = EX_TBFLAG_A64(env->hflags, TBII); 1030 if ((tbii >> extract64(new_pc, 55, 1)) & 1) { 1031 /* TBI is enabled. */ 1032 int core_mmu_idx = cpu_mmu_index(env, false); 1033 if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) { 1034 new_pc = sextract64(new_pc, 0, 56); 1035 } else { 1036 new_pc = extract64(new_pc, 0, 56); 1037 } 1038 } 1039 env->pc = new_pc; 1040 1041 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " 1042 "AArch64 EL%d PC 0x%" PRIx64 "\n", 1043 cur_el, new_el, env->pc); 1044 } 1045 1046 /* 1047 * Note that cur_el can never be 0. If new_el is 0, then 1048 * el0_a64 is return_to_aa64, else el0_a64 is ignored. 1049 */ 1050 aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); 1051 1052 qemu_mutex_lock_iothread(); 1053 arm_call_el_change_hook(env_archcpu(env)); 1054 qemu_mutex_unlock_iothread(); 1055 1056 return; 1057 1058illegal_return: 1059 /* Illegal return events of various kinds have architecturally 1060 * mandated behaviour: 1061 * restore NZCV and DAIF from SPSR_ELx 1062 * set PSTATE.IL 1063 * restore PC from ELR_ELx 1064 * no change to exception level, execution state or stack pointer 1065 */ 1066 env->pstate |= PSTATE_IL; 1067 env->pc = new_pc; 1068 spsr &= PSTATE_NZCV | PSTATE_DAIF; 1069 spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF); 1070 pstate_write(env, spsr); 1071 if (!arm_singlestep_active(env)) { 1072 env->pstate &= ~PSTATE_SS; 1073 } 1074 helper_rebuild_hflags_a64(env, cur_el); 1075 qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: " 1076 "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); 1077} 1078 1079/* 1080 * Square Root and Reciprocal square root 1081 */ 1082 1083uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp) 1084{ 1085 float_status *s = fpstp; 1086 1087 return float16_sqrt(a, s); 1088} 1089 1090void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) 1091{ 1092 /* 1093 * Implement DC ZVA, which zeroes a fixed-length block of memory. 1094 * Note that we do not implement the (architecturally mandated) 1095 * alignment fault for attempts to use this on Device memory 1096 * (which matches the usual QEMU behaviour of not implementing either 1097 * alignment faults or any memory attribute handling). 1098 */ 1099 int blocklen = 4 << env_archcpu(env)->dcz_blocksize; 1100 uint64_t vaddr = vaddr_in & ~(blocklen - 1); 1101 int mmu_idx = cpu_mmu_index(env, false); 1102 void *mem; 1103 1104 /* 1105 * Trapless lookup. In addition to actual invalid page, may 1106 * return NULL for I/O, watchpoints, clean pages, etc. 1107 */ 1108 mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); 1109 1110#ifndef CONFIG_USER_ONLY 1111 if (unlikely(!mem)) { 1112 uintptr_t ra = GETPC(); 1113 1114 /* 1115 * Trap if accessing an invalid page. DC_ZVA requires that we supply 1116 * the original pointer for an invalid page. But watchpoints require 1117 * that we probe the actual space. So do both. 1118 */ 1119 (void) probe_write(env, vaddr_in, 1, mmu_idx, ra); 1120 mem = probe_write(env, vaddr, blocklen, mmu_idx, ra); 1121 1122 if (unlikely(!mem)) { 1123 /* 1124 * The only remaining reason for mem == NULL is I/O. 1125 * Just do a series of byte writes as the architecture demands. 1126 */ 1127 for (int i = 0; i < blocklen; i++) { 1128 cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra); 1129 } 1130 return; 1131 } 1132 } 1133#endif 1134 1135 memset(mem, 0, blocklen); 1136}