mte_helper.c (30488B)
1/* 2 * ARM v8.5-MemTag Operations 3 * 4 * Copyright (c) 2020 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20#include "qemu/osdep.h" 21#include "cpu.h" 22#include "internals.h" 23#include "exec/exec-all.h" 24#include "exec/ram_addr.h" 25#include "exec/cpu_ldst.h" 26#include "exec/helper-proto.h" 27#include "qapi/error.h" 28#include "qemu/guest-random.h" 29 30 31static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude) 32{ 33 if (exclude == 0xffff) { 34 return 0; 35 } 36 if (offset == 0) { 37 while (exclude & (1 << tag)) { 38 tag = (tag + 1) & 15; 39 } 40 } else { 41 do { 42 do { 43 tag = (tag + 1) & 15; 44 } while (exclude & (1 << tag)); 45 } while (--offset > 0); 46 } 47 return tag; 48} 49 50/** 51 * allocation_tag_mem: 52 * @env: the cpu environment 53 * @ptr_mmu_idx: the addressing regime to use for the virtual address 54 * @ptr: the virtual address for which to look up tag memory 55 * @ptr_access: the access to use for the virtual address 56 * @ptr_size: the number of bytes in the normal memory access 57 * @tag_access: the access to use for the tag memory 58 * @tag_size: the number of bytes in the tag memory access 59 * @ra: the return address for exception handling 60 * 61 * Our tag memory is formatted as a sequence of little-endian nibbles. 62 * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two 63 * tags, with the tag at [3:0] for the lower addr and the tag at [7:4] 64 * for the higher addr. 65 * 66 * Here, resolve the physical address from the virtual address, and return 67 * a pointer to the corresponding tag byte. Exit with exception if the 68 * virtual address is not accessible for @ptr_access. 69 * 70 * The @ptr_size and @tag_size values may not have an obvious relation 71 * due to the alignment of @ptr, and the number of tag checks required. 72 * 73 * If there is no tag storage corresponding to @ptr, return NULL. 74 */ 75static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, 76 uint64_t ptr, MMUAccessType ptr_access, 77 int ptr_size, MMUAccessType tag_access, 78 int tag_size, uintptr_t ra) 79{ 80#ifdef CONFIG_USER_ONLY 81 uint64_t clean_ptr = useronly_clean_ptr(ptr); 82 int flags = page_get_flags(clean_ptr); 83 uint8_t *tags; 84 uintptr_t index; 85 86 if (!(flags & (ptr_access == MMU_DATA_STORE ? PAGE_WRITE_ORG : PAGE_READ))) { 87 /* SIGSEGV */ 88 arm_cpu_tlb_fill(env_cpu(env), ptr, ptr_size, ptr_access, 89 ptr_mmu_idx, false, ra); 90 g_assert_not_reached(); 91 } 92 93 /* Require both MAP_ANON and PROT_MTE for the page. */ 94 if (!(flags & PAGE_ANON) || !(flags & PAGE_MTE)) { 95 return NULL; 96 } 97 98 tags = page_get_target_data(clean_ptr); 99 if (tags == NULL) { 100 size_t alloc_size = TARGET_PAGE_SIZE >> (LOG2_TAG_GRANULE + 1); 101 tags = page_alloc_target_data(clean_ptr, alloc_size); 102 assert(tags != NULL); 103 } 104 105 index = extract32(ptr, LOG2_TAG_GRANULE + 1, 106 TARGET_PAGE_BITS - LOG2_TAG_GRANULE - 1); 107 return tags + index; 108#else 109 uintptr_t index; 110 CPUIOTLBEntry *iotlbentry; 111 int in_page, flags; 112 ram_addr_t ptr_ra; 113 hwaddr ptr_paddr, tag_paddr, xlat; 114 MemoryRegion *mr; 115 ARMASIdx tag_asi; 116 AddressSpace *tag_as; 117 void *host; 118 119 /* 120 * Probe the first byte of the virtual address. This raises an 121 * exception for inaccessible pages, and resolves the virtual address 122 * into the softmmu tlb. 123 * 124 * When RA == 0, this is for mte_probe. The page is expected to be 125 * valid. Indicate to probe_access_flags no-fault, then assert that 126 * we received a valid page. 127 */ 128 flags = probe_access_flags(env, ptr, ptr_access, ptr_mmu_idx, 129 ra == 0, &host, ra); 130 assert(!(flags & TLB_INVALID_MASK)); 131 132 /* 133 * Find the iotlbentry for ptr. This *must* be present in the TLB 134 * because we just found the mapping. 135 * TODO: Perhaps there should be a cputlb helper that returns a 136 * matching tlb entry + iotlb entry. 137 */ 138 index = tlb_index(env, ptr_mmu_idx, ptr); 139# ifdef CONFIG_DEBUG_TCG 140 { 141 CPUTLBEntry *entry = tlb_entry(env, ptr_mmu_idx, ptr); 142 target_ulong comparator = (ptr_access == MMU_DATA_LOAD 143 ? entry->addr_read 144 : tlb_addr_write(entry)); 145 g_assert(tlb_hit(comparator, ptr)); 146 } 147# endif 148 iotlbentry = &env_tlb(env)->d[ptr_mmu_idx].iotlb[index]; 149 150 /* If the virtual page MemAttr != Tagged, access unchecked. */ 151 if (!arm_tlb_mte_tagged(&iotlbentry->attrs)) { 152 return NULL; 153 } 154 155 /* 156 * If not backed by host ram, there is no tag storage: access unchecked. 157 * This is probably a guest os bug though, so log it. 158 */ 159 if (unlikely(flags & TLB_MMIO)) { 160 qemu_log_mask(LOG_GUEST_ERROR, 161 "Page @ 0x%" PRIx64 " indicates Tagged Normal memory " 162 "but is not backed by host ram\n", ptr); 163 return NULL; 164 } 165 166 /* 167 * The Normal memory access can extend to the next page. E.g. a single 168 * 8-byte access to the last byte of a page will check only the last 169 * tag on the first page. 170 * Any page access exception has priority over tag check exception. 171 */ 172 in_page = -(ptr | TARGET_PAGE_MASK); 173 if (unlikely(ptr_size > in_page)) { 174 void *ignore; 175 flags |= probe_access_flags(env, ptr + in_page, ptr_access, 176 ptr_mmu_idx, ra == 0, &ignore, ra); 177 assert(!(flags & TLB_INVALID_MASK)); 178 } 179 180 /* Any debug exception has priority over a tag check exception. */ 181 if (unlikely(flags & TLB_WATCHPOINT)) { 182 int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE; 183 assert(ra != 0); 184 cpu_check_watchpoint(env_cpu(env), ptr, ptr_size, 185 iotlbentry->attrs, wp, ra); 186 } 187 188 /* 189 * Find the physical address within the normal mem space. 190 * The memory region lookup must succeed because TLB_MMIO was 191 * not set in the cputlb lookup above. 192 */ 193 mr = memory_region_from_host(host, &ptr_ra); 194 tcg_debug_assert(mr != NULL); 195 tcg_debug_assert(memory_region_is_ram(mr)); 196 ptr_paddr = ptr_ra; 197 do { 198 ptr_paddr += mr->addr; 199 mr = mr->container; 200 } while (mr); 201 202 /* Convert to the physical address in tag space. */ 203 tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1); 204 205 /* Look up the address in tag space. */ 206 tag_asi = iotlbentry->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS; 207 tag_as = cpu_get_address_space(env_cpu(env), tag_asi); 208 mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL, 209 tag_access == MMU_DATA_STORE, 210 iotlbentry->attrs); 211 212 /* 213 * Note that @mr will never be NULL. If there is nothing in the address 214 * space at @tag_paddr, the translation will return the unallocated memory 215 * region. For our purposes, the result must be ram. 216 */ 217 if (unlikely(!memory_region_is_ram(mr))) { 218 /* ??? Failure is a board configuration error. */ 219 qemu_log_mask(LOG_UNIMP, 220 "Tag Memory @ 0x%" HWADDR_PRIx " not found for " 221 "Normal Memory @ 0x%" HWADDR_PRIx "\n", 222 tag_paddr, ptr_paddr); 223 return NULL; 224 } 225 226 /* 227 * Ensure the tag memory is dirty on write, for migration. 228 * Tag memory can never contain code or display memory (vga). 229 */ 230 if (tag_access == MMU_DATA_STORE) { 231 ram_addr_t tag_ra = memory_region_get_ram_addr(mr) + xlat; 232 cpu_physical_memory_set_dirty_flag(tag_ra, DIRTY_MEMORY_MIGRATION); 233 } 234 235 return memory_region_get_ram_ptr(mr) + xlat; 236#endif 237} 238 239uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm) 240{ 241 uint16_t exclude = extract32(rm | env->cp15.gcr_el1, 0, 16); 242 int rrnd = extract32(env->cp15.gcr_el1, 16, 1); 243 int start = extract32(env->cp15.rgsr_el1, 0, 4); 244 int seed = extract32(env->cp15.rgsr_el1, 8, 16); 245 int offset, i, rtag; 246 247 /* 248 * Our IMPDEF choice for GCR_EL1.RRND==1 is to continue to use the 249 * deterministic algorithm. Except that with RRND==1 the kernel is 250 * not required to have set RGSR_EL1.SEED != 0, which is required for 251 * the deterministic algorithm to function. So we force a non-zero 252 * SEED for that case. 253 */ 254 if (unlikely(seed == 0) && rrnd) { 255 do { 256 Error *err = NULL; 257 uint16_t two; 258 259 if (qemu_guest_getrandom(&two, sizeof(two), &err) < 0) { 260 /* 261 * Failed, for unknown reasons in the crypto subsystem. 262 * Best we can do is log the reason and use a constant seed. 263 */ 264 qemu_log_mask(LOG_UNIMP, "IRG: Crypto failure: %s\n", 265 error_get_pretty(err)); 266 error_free(err); 267 two = 1; 268 } 269 seed = two; 270 } while (seed == 0); 271 } 272 273 /* RandomTag */ 274 for (i = offset = 0; i < 4; ++i) { 275 /* NextRandomTagBit */ 276 int top = (extract32(seed, 5, 1) ^ extract32(seed, 3, 1) ^ 277 extract32(seed, 2, 1) ^ extract32(seed, 0, 1)); 278 seed = (top << 15) | (seed >> 1); 279 offset |= top << i; 280 } 281 rtag = choose_nonexcluded_tag(start, offset, exclude); 282 env->cp15.rgsr_el1 = rtag | (seed << 8); 283 284 return address_with_allocation_tag(rn, rtag); 285} 286 287uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t ptr, 288 int32_t offset, uint32_t tag_offset) 289{ 290 int start_tag = allocation_tag_from_addr(ptr); 291 uint16_t exclude = extract32(env->cp15.gcr_el1, 0, 16); 292 int rtag = choose_nonexcluded_tag(start_tag, tag_offset, exclude); 293 294 return address_with_allocation_tag(ptr + offset, rtag); 295} 296 297static int load_tag1(uint64_t ptr, uint8_t *mem) 298{ 299 int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; 300 return extract32(*mem, ofs, 4); 301} 302 303uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt) 304{ 305 int mmu_idx = cpu_mmu_index(env, false); 306 uint8_t *mem; 307 int rtag = 0; 308 309 /* Trap if accessing an invalid page. */ 310 mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, 1, 311 MMU_DATA_LOAD, 1, GETPC()); 312 313 /* Load if page supports tags. */ 314 if (mem) { 315 rtag = load_tag1(ptr, mem); 316 } 317 318 return address_with_allocation_tag(xt, rtag); 319} 320 321static void check_tag_aligned(CPUARMState *env, uint64_t ptr, uintptr_t ra) 322{ 323 if (unlikely(!QEMU_IS_ALIGNED(ptr, TAG_GRANULE))) { 324 arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, 325 cpu_mmu_index(env, false), ra); 326 g_assert_not_reached(); 327 } 328} 329 330/* For use in a non-parallel context, store to the given nibble. */ 331static void store_tag1(uint64_t ptr, uint8_t *mem, int tag) 332{ 333 int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; 334 *mem = deposit32(*mem, ofs, 4, tag); 335} 336 337/* For use in a parallel context, atomically store to the given nibble. */ 338static void store_tag1_parallel(uint64_t ptr, uint8_t *mem, int tag) 339{ 340 int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; 341 uint8_t old = qatomic_read(mem); 342 343 while (1) { 344 uint8_t new = deposit32(old, ofs, 4, tag); 345 uint8_t cmp = qatomic_cmpxchg(mem, old, new); 346 if (likely(cmp == old)) { 347 return; 348 } 349 old = cmp; 350 } 351} 352 353typedef void stg_store1(uint64_t, uint8_t *, int); 354 355static inline void do_stg(CPUARMState *env, uint64_t ptr, uint64_t xt, 356 uintptr_t ra, stg_store1 store1) 357{ 358 int mmu_idx = cpu_mmu_index(env, false); 359 uint8_t *mem; 360 361 check_tag_aligned(env, ptr, ra); 362 363 /* Trap if accessing an invalid page. */ 364 mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, TAG_GRANULE, 365 MMU_DATA_STORE, 1, ra); 366 367 /* Store if page supports tags. */ 368 if (mem) { 369 store1(ptr, mem, allocation_tag_from_addr(xt)); 370 } 371} 372 373void HELPER(stg)(CPUARMState *env, uint64_t ptr, uint64_t xt) 374{ 375 do_stg(env, ptr, xt, GETPC(), store_tag1); 376} 377 378void HELPER(stg_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt) 379{ 380 do_stg(env, ptr, xt, GETPC(), store_tag1_parallel); 381} 382 383void HELPER(stg_stub)(CPUARMState *env, uint64_t ptr) 384{ 385 int mmu_idx = cpu_mmu_index(env, false); 386 uintptr_t ra = GETPC(); 387 388 check_tag_aligned(env, ptr, ra); 389 probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra); 390} 391 392static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt, 393 uintptr_t ra, stg_store1 store1) 394{ 395 int mmu_idx = cpu_mmu_index(env, false); 396 int tag = allocation_tag_from_addr(xt); 397 uint8_t *mem1, *mem2; 398 399 check_tag_aligned(env, ptr, ra); 400 401 /* 402 * Trap if accessing an invalid page(s). 403 * This takes priority over !allocation_tag_access_enabled. 404 */ 405 if (ptr & TAG_GRANULE) { 406 /* Two stores unaligned mod TAG_GRANULE*2 -- modify two bytes. */ 407 mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, 408 TAG_GRANULE, MMU_DATA_STORE, 1, ra); 409 mem2 = allocation_tag_mem(env, mmu_idx, ptr + TAG_GRANULE, 410 MMU_DATA_STORE, TAG_GRANULE, 411 MMU_DATA_STORE, 1, ra); 412 413 /* Store if page(s) support tags. */ 414 if (mem1) { 415 store1(TAG_GRANULE, mem1, tag); 416 } 417 if (mem2) { 418 store1(0, mem2, tag); 419 } 420 } else { 421 /* Two stores aligned mod TAG_GRANULE*2 -- modify one byte. */ 422 mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, 423 2 * TAG_GRANULE, MMU_DATA_STORE, 1, ra); 424 if (mem1) { 425 tag |= tag << 4; 426 qatomic_set(mem1, tag); 427 } 428 } 429} 430 431void HELPER(st2g)(CPUARMState *env, uint64_t ptr, uint64_t xt) 432{ 433 do_st2g(env, ptr, xt, GETPC(), store_tag1); 434} 435 436void HELPER(st2g_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt) 437{ 438 do_st2g(env, ptr, xt, GETPC(), store_tag1_parallel); 439} 440 441void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr) 442{ 443 int mmu_idx = cpu_mmu_index(env, false); 444 uintptr_t ra = GETPC(); 445 int in_page = -(ptr | TARGET_PAGE_MASK); 446 447 check_tag_aligned(env, ptr, ra); 448 449 if (likely(in_page >= 2 * TAG_GRANULE)) { 450 probe_write(env, ptr, 2 * TAG_GRANULE, mmu_idx, ra); 451 } else { 452 probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra); 453 probe_write(env, ptr + TAG_GRANULE, TAG_GRANULE, mmu_idx, ra); 454 } 455} 456 457#define LDGM_STGM_SIZE (4 << GMID_EL1_BS) 458 459uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr) 460{ 461 int mmu_idx = cpu_mmu_index(env, false); 462 uintptr_t ra = GETPC(); 463 void *tag_mem; 464 465 ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE); 466 467 /* Trap if accessing an invalid page. */ 468 tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, 469 LDGM_STGM_SIZE, MMU_DATA_LOAD, 470 LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra); 471 472 /* The tag is squashed to zero if the page does not support tags. */ 473 if (!tag_mem) { 474 return 0; 475 } 476 477 QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6); 478 /* 479 * We are loading 64-bits worth of tags. The ordering of elements 480 * within the word corresponds to a 64-bit little-endian operation. 481 */ 482 return ldq_le_p(tag_mem); 483} 484 485void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val) 486{ 487 int mmu_idx = cpu_mmu_index(env, false); 488 uintptr_t ra = GETPC(); 489 void *tag_mem; 490 491 ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE); 492 493 /* Trap if accessing an invalid page. */ 494 tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, 495 LDGM_STGM_SIZE, MMU_DATA_LOAD, 496 LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra); 497 498 /* 499 * Tag store only happens if the page support tags, 500 * and if the OS has enabled access to the tags. 501 */ 502 if (!tag_mem) { 503 return; 504 } 505 506 QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6); 507 /* 508 * We are storing 64-bits worth of tags. The ordering of elements 509 * within the word corresponds to a 64-bit little-endian operation. 510 */ 511 stq_le_p(tag_mem, val); 512} 513 514void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val) 515{ 516 uintptr_t ra = GETPC(); 517 int mmu_idx = cpu_mmu_index(env, false); 518 int log2_dcz_bytes, log2_tag_bytes; 519 intptr_t dcz_bytes, tag_bytes; 520 uint8_t *mem; 521 522 /* 523 * In arm_cpu_realizefn, we assert that dcz > LOG2_TAG_GRANULE+1, 524 * i.e. 32 bytes, which is an unreasonably small dcz anyway, 525 * to make sure that we can access one complete tag byte here. 526 */ 527 log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2; 528 log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1); 529 dcz_bytes = (intptr_t)1 << log2_dcz_bytes; 530 tag_bytes = (intptr_t)1 << log2_tag_bytes; 531 ptr &= -dcz_bytes; 532 533 mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes, 534 MMU_DATA_STORE, tag_bytes, ra); 535 if (mem) { 536 int tag_pair = (val & 0xf) * 0x11; 537 memset(mem, tag_pair, tag_bytes); 538 } 539} 540 541static void mte_sync_check_fail(CPUARMState *env, uint32_t desc, 542 uint64_t dirty_ptr, uintptr_t ra) 543{ 544 int is_write, syn; 545 546 env->exception.vaddress = dirty_ptr; 547 548 is_write = FIELD_EX32(desc, MTEDESC, WRITE); 549 syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0, is_write, 550 0x11); 551 raise_exception_ra(env, EXCP_DATA_ABORT, syn, exception_target_el(env), ra); 552 g_assert_not_reached(); 553} 554 555static void mte_async_check_fail(CPUARMState *env, uint64_t dirty_ptr, 556 uintptr_t ra, ARMMMUIdx arm_mmu_idx, int el) 557{ 558 int select; 559 560 if (regime_has_2_ranges(arm_mmu_idx)) { 561 select = extract64(dirty_ptr, 55, 1); 562 } else { 563 select = 0; 564 } 565 env->cp15.tfsr_el[el] |= 1 << select; 566#ifdef CONFIG_USER_ONLY 567 /* 568 * Stand in for a timer irq, setting _TIF_MTE_ASYNC_FAULT, 569 * which then sends a SIGSEGV when the thread is next scheduled. 570 * This cpu will return to the main loop at the end of the TB, 571 * which is rather sooner than "normal". But the alternative 572 * is waiting until the next syscall. 573 */ 574 qemu_cpu_kick(env_cpu(env)); 575#endif 576} 577 578/* Record a tag check failure. */ 579static void mte_check_fail(CPUARMState *env, uint32_t desc, 580 uint64_t dirty_ptr, uintptr_t ra) 581{ 582 int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); 583 ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx); 584 int el, reg_el, tcf; 585 uint64_t sctlr; 586 587 reg_el = regime_el(env, arm_mmu_idx); 588 sctlr = env->cp15.sctlr_el[reg_el]; 589 590 switch (arm_mmu_idx) { 591 case ARMMMUIdx_E10_0: 592 case ARMMMUIdx_E20_0: 593 el = 0; 594 tcf = extract64(sctlr, 38, 2); 595 break; 596 default: 597 el = reg_el; 598 tcf = extract64(sctlr, 40, 2); 599 } 600 601 switch (tcf) { 602 case 1: 603 /* Tag check fail causes a synchronous exception. */ 604 mte_sync_check_fail(env, desc, dirty_ptr, ra); 605 break; 606 607 case 0: 608 /* 609 * Tag check fail does not affect the PE. 610 * We eliminate this case by not setting MTE_ACTIVE 611 * in tb_flags, so that we never make this runtime call. 612 */ 613 g_assert_not_reached(); 614 615 case 2: 616 /* Tag check fail causes asynchronous flag set. */ 617 mte_async_check_fail(env, dirty_ptr, ra, arm_mmu_idx, el); 618 break; 619 620 case 3: 621 /* 622 * Tag check fail causes asynchronous flag set for stores, or 623 * a synchronous exception for loads. 624 */ 625 if (FIELD_EX32(desc, MTEDESC, WRITE)) { 626 mte_async_check_fail(env, dirty_ptr, ra, arm_mmu_idx, el); 627 } else { 628 mte_sync_check_fail(env, desc, dirty_ptr, ra); 629 } 630 break; 631 } 632} 633 634/** 635 * checkN: 636 * @tag: tag memory to test 637 * @odd: true to begin testing at tags at odd nibble 638 * @cmp: the tag to compare against 639 * @count: number of tags to test 640 * 641 * Return the number of successful tests. 642 * Thus a return value < @count indicates a failure. 643 * 644 * A note about sizes: count is expected to be small. 645 * 646 * The most common use will be LDP/STP of two integer registers, 647 * which means 16 bytes of memory touching at most 2 tags, but 648 * often the access is aligned and thus just 1 tag. 649 * 650 * Using AdvSIMD LD/ST (multiple), one can access 64 bytes of memory, 651 * touching at most 5 tags. SVE LDR/STR (vector) with the default 652 * vector length is also 64 bytes; the maximum architectural length 653 * is 256 bytes touching at most 9 tags. 654 * 655 * The loop below uses 7 logical operations and 1 memory operation 656 * per tag pair. An implementation that loads an aligned word and 657 * uses masking to ignore adjacent tags requires 18 logical operations 658 * and thus does not begin to pay off until 6 tags. 659 * Which, according to the survey above, is unlikely to be common. 660 */ 661static int checkN(uint8_t *mem, int odd, int cmp, int count) 662{ 663 int n = 0, diff; 664 665 /* Replicate the test tag and compare. */ 666 cmp *= 0x11; 667 diff = *mem++ ^ cmp; 668 669 if (odd) { 670 goto start_odd; 671 } 672 673 while (1) { 674 /* Test even tag. */ 675 if (unlikely((diff) & 0x0f)) { 676 break; 677 } 678 if (++n == count) { 679 break; 680 } 681 682 start_odd: 683 /* Test odd tag. */ 684 if (unlikely((diff) & 0xf0)) { 685 break; 686 } 687 if (++n == count) { 688 break; 689 } 690 691 diff = *mem++ ^ cmp; 692 } 693 return n; 694} 695 696/** 697 * mte_probe_int() - helper for mte_probe and mte_check 698 * @env: CPU environment 699 * @desc: MTEDESC descriptor 700 * @ptr: virtual address of the base of the access 701 * @fault: return virtual address of the first check failure 702 * 703 * Internal routine for both mte_probe and mte_check. 704 * Return zero on failure, filling in *fault. 705 * Return negative on trivial success for tbi disabled. 706 * Return positive on success with tbi enabled. 707 */ 708static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr, 709 uintptr_t ra, uint64_t *fault) 710{ 711 int mmu_idx, ptr_tag, bit55; 712 uint64_t ptr_last, prev_page, next_page; 713 uint64_t tag_first, tag_last; 714 uint64_t tag_byte_first, tag_byte_last; 715 uint32_t sizem1, tag_count, tag_size, n, c; 716 uint8_t *mem1, *mem2; 717 MMUAccessType type; 718 719 bit55 = extract64(ptr, 55, 1); 720 *fault = ptr; 721 722 /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ 723 if (unlikely(!tbi_check(desc, bit55))) { 724 return -1; 725 } 726 727 ptr_tag = allocation_tag_from_addr(ptr); 728 729 if (tcma_check(desc, bit55, ptr_tag)) { 730 return 1; 731 } 732 733 mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); 734 type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; 735 sizem1 = FIELD_EX32(desc, MTEDESC, SIZEM1); 736 737 /* Find the addr of the end of the access */ 738 ptr_last = ptr + sizem1; 739 740 /* Round the bounds to the tag granule, and compute the number of tags. */ 741 tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); 742 tag_last = QEMU_ALIGN_DOWN(ptr_last, TAG_GRANULE); 743 tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1; 744 745 /* Round the bounds to twice the tag granule, and compute the bytes. */ 746 tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE); 747 tag_byte_last = QEMU_ALIGN_DOWN(ptr_last, 2 * TAG_GRANULE); 748 749 /* Locate the page boundaries. */ 750 prev_page = ptr & TARGET_PAGE_MASK; 751 next_page = prev_page + TARGET_PAGE_SIZE; 752 753 if (likely(tag_last - prev_page < TARGET_PAGE_SIZE)) { 754 /* Memory access stays on one page. */ 755 tag_size = ((tag_byte_last - tag_byte_first) / (2 * TAG_GRANULE)) + 1; 756 mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, sizem1 + 1, 757 MMU_DATA_LOAD, tag_size, ra); 758 if (!mem1) { 759 return 1; 760 } 761 /* Perform all of the comparisons. */ 762 n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count); 763 } else { 764 /* Memory access crosses to next page. */ 765 tag_size = (next_page - tag_byte_first) / (2 * TAG_GRANULE); 766 mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr, 767 MMU_DATA_LOAD, tag_size, ra); 768 769 tag_size = ((tag_byte_last - next_page) / (2 * TAG_GRANULE)) + 1; 770 mem2 = allocation_tag_mem(env, mmu_idx, next_page, type, 771 ptr_last - next_page + 1, 772 MMU_DATA_LOAD, tag_size, ra); 773 774 /* 775 * Perform all of the comparisons. 776 * Note the possible but unlikely case of the operation spanning 777 * two pages that do not both have tagging enabled. 778 */ 779 n = c = (next_page - tag_first) / TAG_GRANULE; 780 if (mem1) { 781 n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, c); 782 } 783 if (n == c) { 784 if (!mem2) { 785 return 1; 786 } 787 n += checkN(mem2, 0, ptr_tag, tag_count - c); 788 } 789 } 790 791 if (likely(n == tag_count)) { 792 return 1; 793 } 794 795 /* 796 * If we failed, we know which granule. For the first granule, the 797 * failure address is @ptr, the first byte accessed. Otherwise the 798 * failure address is the first byte of the nth granule. 799 */ 800 if (n > 0) { 801 *fault = tag_first + n * TAG_GRANULE; 802 } 803 return 0; 804} 805 806uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra) 807{ 808 uint64_t fault; 809 int ret = mte_probe_int(env, desc, ptr, ra, &fault); 810 811 if (unlikely(ret == 0)) { 812 mte_check_fail(env, desc, fault, ra); 813 } else if (ret < 0) { 814 return ptr; 815 } 816 return useronly_clean_ptr(ptr); 817} 818 819uint64_t HELPER(mte_check)(CPUARMState *env, uint32_t desc, uint64_t ptr) 820{ 821 return mte_check(env, desc, ptr, GETPC()); 822} 823 824/* 825 * No-fault version of mte_check, to be used by SVE for MemSingleNF. 826 * Returns false if the access is Checked and the check failed. This 827 * is only intended to probe the tag -- the validity of the page must 828 * be checked beforehand. 829 */ 830bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr) 831{ 832 uint64_t fault; 833 int ret = mte_probe_int(env, desc, ptr, 0, &fault); 834 835 return ret != 0; 836} 837 838/* 839 * Perform an MTE checked access for DC_ZVA. 840 */ 841uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr) 842{ 843 uintptr_t ra = GETPC(); 844 int log2_dcz_bytes, log2_tag_bytes; 845 int mmu_idx, bit55; 846 intptr_t dcz_bytes, tag_bytes, i; 847 void *mem; 848 uint64_t ptr_tag, mem_tag, align_ptr; 849 850 bit55 = extract64(ptr, 55, 1); 851 852 /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ 853 if (unlikely(!tbi_check(desc, bit55))) { 854 return ptr; 855 } 856 857 ptr_tag = allocation_tag_from_addr(ptr); 858 859 if (tcma_check(desc, bit55, ptr_tag)) { 860 goto done; 861 } 862 863 /* 864 * In arm_cpu_realizefn, we asserted that dcz > LOG2_TAG_GRANULE+1, 865 * i.e. 32 bytes, which is an unreasonably small dcz anyway, to make 866 * sure that we can access one complete tag byte here. 867 */ 868 log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2; 869 log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1); 870 dcz_bytes = (intptr_t)1 << log2_dcz_bytes; 871 tag_bytes = (intptr_t)1 << log2_tag_bytes; 872 align_ptr = ptr & -dcz_bytes; 873 874 /* 875 * Trap if accessing an invalid page. DC_ZVA requires that we supply 876 * the original pointer for an invalid page. But watchpoints require 877 * that we probe the actual space. So do both. 878 */ 879 mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); 880 (void) probe_write(env, ptr, 1, mmu_idx, ra); 881 mem = allocation_tag_mem(env, mmu_idx, align_ptr, MMU_DATA_STORE, 882 dcz_bytes, MMU_DATA_LOAD, tag_bytes, ra); 883 if (!mem) { 884 goto done; 885 } 886 887 /* 888 * Unlike the reasoning for checkN, DC_ZVA is always aligned, and thus 889 * it is quite easy to perform all of the comparisons at once without 890 * any extra masking. 891 * 892 * The most common zva block size is 64; some of the thunderx cpus use 893 * a block size of 128. For user-only, aarch64_max_initfn will set the 894 * block size to 512. Fill out the other cases for future-proofing. 895 * 896 * In order to be able to find the first miscompare later, we want the 897 * tag bytes to be in little-endian order. 898 */ 899 switch (log2_tag_bytes) { 900 case 0: /* zva_blocksize 32 */ 901 mem_tag = *(uint8_t *)mem; 902 ptr_tag *= 0x11u; 903 break; 904 case 1: /* zva_blocksize 64 */ 905 mem_tag = cpu_to_le16(*(uint16_t *)mem); 906 ptr_tag *= 0x1111u; 907 break; 908 case 2: /* zva_blocksize 128 */ 909 mem_tag = cpu_to_le32(*(uint32_t *)mem); 910 ptr_tag *= 0x11111111u; 911 break; 912 case 3: /* zva_blocksize 256 */ 913 mem_tag = cpu_to_le64(*(uint64_t *)mem); 914 ptr_tag *= 0x1111111111111111ull; 915 break; 916 917 default: /* zva_blocksize 512, 1024, 2048 */ 918 ptr_tag *= 0x1111111111111111ull; 919 i = 0; 920 do { 921 mem_tag = cpu_to_le64(*(uint64_t *)(mem + i)); 922 if (unlikely(mem_tag != ptr_tag)) { 923 goto fail; 924 } 925 i += 8; 926 align_ptr += 16 * TAG_GRANULE; 927 } while (i < tag_bytes); 928 goto done; 929 } 930 931 if (likely(mem_tag == ptr_tag)) { 932 goto done; 933 } 934 935 fail: 936 /* Locate the first nibble that differs. */ 937 i = ctz64(mem_tag ^ ptr_tag) >> 4; 938 mte_check_fail(env, desc, align_ptr + i * TAG_GRANULE, ra); 939 940 done: 941 return useronly_clean_ptr(ptr); 942}