xen-mapcache.c (18192B)
1/* 2 * Copyright (C) 2011 Citrix Ltd. 3 * 4 * This work is licensed under the terms of the GNU GPL, version 2. See 5 * the COPYING file in the top-level directory. 6 * 7 * Contributions after 2012-01-13 are licensed under the terms of the 8 * GNU GPL, version 2 or (at your option) any later version. 9 */ 10 11#include "qemu/osdep.h" 12#include "qemu/units.h" 13#include "qemu/error-report.h" 14 15#include <sys/resource.h> 16 17#include "hw/xen/xen-legacy-backend.h" 18#include "qemu/bitmap.h" 19 20#include "sysemu/runstate.h" 21#include "sysemu/xen-mapcache.h" 22#include "trace.h" 23 24 25//#define MAPCACHE_DEBUG 26 27#ifdef MAPCACHE_DEBUG 28# define DPRINTF(fmt, ...) do { \ 29 fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \ 30} while (0) 31#else 32# define DPRINTF(fmt, ...) do { } while (0) 33#endif 34 35#if HOST_LONG_BITS == 32 36# define MCACHE_BUCKET_SHIFT 16 37# define MCACHE_MAX_SIZE (1UL<<31) /* 2GB Cap */ 38#else 39# define MCACHE_BUCKET_SHIFT 20 40# define MCACHE_MAX_SIZE (1UL<<35) /* 32GB Cap */ 41#endif 42#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT) 43 44/* This is the size of the virtual address space reserve to QEMU that will not 45 * be use by MapCache. 46 * From empirical tests I observed that qemu use 75MB more than the 47 * max_mcache_size. 48 */ 49#define NON_MCACHE_MEMORY_SIZE (80 * MiB) 50 51typedef struct MapCacheEntry { 52 hwaddr paddr_index; 53 uint8_t *vaddr_base; 54 unsigned long *valid_mapping; 55 uint8_t lock; 56#define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0) 57 uint8_t flags; 58 hwaddr size; 59 struct MapCacheEntry *next; 60} MapCacheEntry; 61 62typedef struct MapCacheRev { 63 uint8_t *vaddr_req; 64 hwaddr paddr_index; 65 hwaddr size; 66 QTAILQ_ENTRY(MapCacheRev) next; 67 bool dma; 68} MapCacheRev; 69 70typedef struct MapCache { 71 MapCacheEntry *entry; 72 unsigned long nr_buckets; 73 QTAILQ_HEAD(, MapCacheRev) locked_entries; 74 75 /* For most cases (>99.9%), the page address is the same. */ 76 MapCacheEntry *last_entry; 77 unsigned long max_mcache_size; 78 unsigned int mcache_bucket_shift; 79 80 phys_offset_to_gaddr_t phys_offset_to_gaddr; 81 QemuMutex lock; 82 void *opaque; 83} MapCache; 84 85static MapCache *mapcache; 86 87static inline void mapcache_lock(void) 88{ 89 qemu_mutex_lock(&mapcache->lock); 90} 91 92static inline void mapcache_unlock(void) 93{ 94 qemu_mutex_unlock(&mapcache->lock); 95} 96 97static inline int test_bits(int nr, int size, const unsigned long *addr) 98{ 99 unsigned long res = find_next_zero_bit(addr, size + nr, nr); 100 if (res >= nr + size) 101 return 1; 102 else 103 return 0; 104} 105 106void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque) 107{ 108 unsigned long size; 109 struct rlimit rlimit_as; 110 111 mapcache = g_malloc0(sizeof (MapCache)); 112 113 mapcache->phys_offset_to_gaddr = f; 114 mapcache->opaque = opaque; 115 qemu_mutex_init(&mapcache->lock); 116 117 QTAILQ_INIT(&mapcache->locked_entries); 118 119 if (geteuid() == 0) { 120 rlimit_as.rlim_cur = RLIM_INFINITY; 121 rlimit_as.rlim_max = RLIM_INFINITY; 122 mapcache->max_mcache_size = MCACHE_MAX_SIZE; 123 } else { 124 getrlimit(RLIMIT_AS, &rlimit_as); 125 rlimit_as.rlim_cur = rlimit_as.rlim_max; 126 127 if (rlimit_as.rlim_max != RLIM_INFINITY) { 128 warn_report("QEMU's maximum size of virtual" 129 " memory is not infinity"); 130 } 131 if (rlimit_as.rlim_max < MCACHE_MAX_SIZE + NON_MCACHE_MEMORY_SIZE) { 132 mapcache->max_mcache_size = rlimit_as.rlim_max - 133 NON_MCACHE_MEMORY_SIZE; 134 } else { 135 mapcache->max_mcache_size = MCACHE_MAX_SIZE; 136 } 137 } 138 139 setrlimit(RLIMIT_AS, &rlimit_as); 140 141 mapcache->nr_buckets = 142 (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) + 143 (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >> 144 (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)); 145 146 size = mapcache->nr_buckets * sizeof (MapCacheEntry); 147 size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1); 148 DPRINTF("%s, nr_buckets = %lx size %lu\n", __func__, 149 mapcache->nr_buckets, size); 150 mapcache->entry = g_malloc0(size); 151} 152 153static void xen_remap_bucket(MapCacheEntry *entry, 154 void *vaddr, 155 hwaddr size, 156 hwaddr address_index, 157 bool dummy) 158{ 159 uint8_t *vaddr_base; 160 xen_pfn_t *pfns; 161 int *err; 162 unsigned int i; 163 hwaddr nb_pfn = size >> XC_PAGE_SHIFT; 164 165 trace_xen_remap_bucket(address_index); 166 167 pfns = g_malloc0(nb_pfn * sizeof (xen_pfn_t)); 168 err = g_malloc0(nb_pfn * sizeof (int)); 169 170 if (entry->vaddr_base != NULL) { 171 if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) { 172 ram_block_notify_remove(entry->vaddr_base, entry->size, 173 entry->size); 174 } 175 176 /* 177 * If an entry is being replaced by another mapping and we're using 178 * MAP_FIXED flag for it - there is possibility of a race for vaddr 179 * address with another thread doing an mmap call itself 180 * (see man 2 mmap). To avoid that we skip explicit unmapping here 181 * and allow the kernel to destroy the previous mappings by replacing 182 * them in mmap call later. 183 * 184 * Non-identical replacements are not allowed therefore. 185 */ 186 assert(!vaddr || (entry->vaddr_base == vaddr && entry->size == size)); 187 188 if (!vaddr && munmap(entry->vaddr_base, entry->size) != 0) { 189 perror("unmap fails"); 190 exit(-1); 191 } 192 } 193 g_free(entry->valid_mapping); 194 entry->valid_mapping = NULL; 195 196 for (i = 0; i < nb_pfn; i++) { 197 pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; 198 } 199 200 /* 201 * If the caller has requested the mapping at a specific address use 202 * MAP_FIXED to make sure it's honored. 203 */ 204 if (!dummy) { 205 vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr, 206 PROT_READ | PROT_WRITE, 207 vaddr ? MAP_FIXED : 0, 208 nb_pfn, pfns, err); 209 if (vaddr_base == NULL) { 210 perror("xenforeignmemory_map2"); 211 exit(-1); 212 } 213 } else { 214 /* 215 * We create dummy mappings where we are unable to create a foreign 216 * mapping immediately due to certain circumstances (i.e. on resume now) 217 */ 218 vaddr_base = mmap(vaddr, size, PROT_READ | PROT_WRITE, 219 MAP_ANON | MAP_SHARED | (vaddr ? MAP_FIXED : 0), 220 -1, 0); 221 if (vaddr_base == MAP_FAILED) { 222 perror("mmap"); 223 exit(-1); 224 } 225 } 226 227 if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) { 228 ram_block_notify_add(vaddr_base, size, size); 229 } 230 231 entry->vaddr_base = vaddr_base; 232 entry->paddr_index = address_index; 233 entry->size = size; 234 entry->valid_mapping = (unsigned long *) g_malloc0(sizeof(unsigned long) * 235 BITS_TO_LONGS(size >> XC_PAGE_SHIFT)); 236 237 if (dummy) { 238 entry->flags |= XEN_MAPCACHE_ENTRY_DUMMY; 239 } else { 240 entry->flags &= ~(XEN_MAPCACHE_ENTRY_DUMMY); 241 } 242 243 bitmap_zero(entry->valid_mapping, nb_pfn); 244 for (i = 0; i < nb_pfn; i++) { 245 if (!err[i]) { 246 bitmap_set(entry->valid_mapping, i, 1); 247 } 248 } 249 250 g_free(pfns); 251 g_free(err); 252} 253 254static uint8_t *xen_map_cache_unlocked(hwaddr phys_addr, hwaddr size, 255 uint8_t lock, bool dma) 256{ 257 MapCacheEntry *entry, *pentry = NULL, 258 *free_entry = NULL, *free_pentry = NULL; 259 hwaddr address_index; 260 hwaddr address_offset; 261 hwaddr cache_size = size; 262 hwaddr test_bit_size; 263 bool translated G_GNUC_UNUSED = false; 264 bool dummy = false; 265 266tryagain: 267 address_index = phys_addr >> MCACHE_BUCKET_SHIFT; 268 address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1); 269 270 trace_xen_map_cache(phys_addr); 271 272 /* test_bit_size is always a multiple of XC_PAGE_SIZE */ 273 if (size) { 274 test_bit_size = size + (phys_addr & (XC_PAGE_SIZE - 1)); 275 276 if (test_bit_size % XC_PAGE_SIZE) { 277 test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE); 278 } 279 } else { 280 test_bit_size = XC_PAGE_SIZE; 281 } 282 283 if (mapcache->last_entry != NULL && 284 mapcache->last_entry->paddr_index == address_index && 285 !lock && !size && 286 test_bits(address_offset >> XC_PAGE_SHIFT, 287 test_bit_size >> XC_PAGE_SHIFT, 288 mapcache->last_entry->valid_mapping)) { 289 trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset); 290 return mapcache->last_entry->vaddr_base + address_offset; 291 } 292 293 /* size is always a multiple of MCACHE_BUCKET_SIZE */ 294 if (size) { 295 cache_size = size + address_offset; 296 if (cache_size % MCACHE_BUCKET_SIZE) { 297 cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE); 298 } 299 } else { 300 cache_size = MCACHE_BUCKET_SIZE; 301 } 302 303 entry = &mapcache->entry[address_index % mapcache->nr_buckets]; 304 305 while (entry && (lock || entry->lock) && entry->vaddr_base && 306 (entry->paddr_index != address_index || entry->size != cache_size || 307 !test_bits(address_offset >> XC_PAGE_SHIFT, 308 test_bit_size >> XC_PAGE_SHIFT, 309 entry->valid_mapping))) { 310 if (!free_entry && !entry->lock) { 311 free_entry = entry; 312 free_pentry = pentry; 313 } 314 pentry = entry; 315 entry = entry->next; 316 } 317 if (!entry && free_entry) { 318 entry = free_entry; 319 pentry = free_pentry; 320 } 321 if (!entry) { 322 entry = g_malloc0(sizeof (MapCacheEntry)); 323 pentry->next = entry; 324 xen_remap_bucket(entry, NULL, cache_size, address_index, dummy); 325 } else if (!entry->lock) { 326 if (!entry->vaddr_base || entry->paddr_index != address_index || 327 entry->size != cache_size || 328 !test_bits(address_offset >> XC_PAGE_SHIFT, 329 test_bit_size >> XC_PAGE_SHIFT, 330 entry->valid_mapping)) { 331 xen_remap_bucket(entry, NULL, cache_size, address_index, dummy); 332 } 333 } 334 335 if(!test_bits(address_offset >> XC_PAGE_SHIFT, 336 test_bit_size >> XC_PAGE_SHIFT, 337 entry->valid_mapping)) { 338 mapcache->last_entry = NULL; 339#ifdef XEN_COMPAT_PHYSMAP 340 if (!translated && mapcache->phys_offset_to_gaddr) { 341 phys_addr = mapcache->phys_offset_to_gaddr(phys_addr, size); 342 translated = true; 343 goto tryagain; 344 } 345#endif 346 if (!dummy && runstate_check(RUN_STATE_INMIGRATE)) { 347 dummy = true; 348 goto tryagain; 349 } 350 trace_xen_map_cache_return(NULL); 351 return NULL; 352 } 353 354 mapcache->last_entry = entry; 355 if (lock) { 356 MapCacheRev *reventry = g_malloc0(sizeof(MapCacheRev)); 357 entry->lock++; 358 reventry->dma = dma; 359 reventry->vaddr_req = mapcache->last_entry->vaddr_base + address_offset; 360 reventry->paddr_index = mapcache->last_entry->paddr_index; 361 reventry->size = entry->size; 362 QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next); 363 } 364 365 trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset); 366 return mapcache->last_entry->vaddr_base + address_offset; 367} 368 369uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size, 370 uint8_t lock, bool dma) 371{ 372 uint8_t *p; 373 374 mapcache_lock(); 375 p = xen_map_cache_unlocked(phys_addr, size, lock, dma); 376 mapcache_unlock(); 377 return p; 378} 379 380ram_addr_t xen_ram_addr_from_mapcache(void *ptr) 381{ 382 MapCacheEntry *entry = NULL; 383 MapCacheRev *reventry; 384 hwaddr paddr_index; 385 hwaddr size; 386 ram_addr_t raddr; 387 int found = 0; 388 389 mapcache_lock(); 390 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { 391 if (reventry->vaddr_req == ptr) { 392 paddr_index = reventry->paddr_index; 393 size = reventry->size; 394 found = 1; 395 break; 396 } 397 } 398 if (!found) { 399 fprintf(stderr, "%s, could not find %p\n", __func__, ptr); 400 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { 401 DPRINTF(" "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, 402 reventry->vaddr_req); 403 } 404 abort(); 405 return 0; 406 } 407 408 entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; 409 while (entry && (entry->paddr_index != paddr_index || entry->size != size)) { 410 entry = entry->next; 411 } 412 if (!entry) { 413 DPRINTF("Trying to find address %p that is not in the mapcache!\n", ptr); 414 raddr = 0; 415 } else { 416 raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) + 417 ((unsigned long) ptr - (unsigned long) entry->vaddr_base); 418 } 419 mapcache_unlock(); 420 return raddr; 421} 422 423static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer) 424{ 425 MapCacheEntry *entry = NULL, *pentry = NULL; 426 MapCacheRev *reventry; 427 hwaddr paddr_index; 428 hwaddr size; 429 int found = 0; 430 431 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { 432 if (reventry->vaddr_req == buffer) { 433 paddr_index = reventry->paddr_index; 434 size = reventry->size; 435 found = 1; 436 break; 437 } 438 } 439 if (!found) { 440 DPRINTF("%s, could not find %p\n", __func__, buffer); 441 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { 442 DPRINTF(" "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); 443 } 444 return; 445 } 446 QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next); 447 g_free(reventry); 448 449 if (mapcache->last_entry != NULL && 450 mapcache->last_entry->paddr_index == paddr_index) { 451 mapcache->last_entry = NULL; 452 } 453 454 entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; 455 while (entry && (entry->paddr_index != paddr_index || entry->size != size)) { 456 pentry = entry; 457 entry = entry->next; 458 } 459 if (!entry) { 460 DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer); 461 return; 462 } 463 entry->lock--; 464 if (entry->lock > 0 || pentry == NULL) { 465 return; 466 } 467 468 pentry->next = entry->next; 469 ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size); 470 if (munmap(entry->vaddr_base, entry->size) != 0) { 471 perror("unmap fails"); 472 exit(-1); 473 } 474 g_free(entry->valid_mapping); 475 g_free(entry); 476} 477 478void xen_invalidate_map_cache_entry(uint8_t *buffer) 479{ 480 mapcache_lock(); 481 xen_invalidate_map_cache_entry_unlocked(buffer); 482 mapcache_unlock(); 483} 484 485void xen_invalidate_map_cache(void) 486{ 487 unsigned long i; 488 MapCacheRev *reventry; 489 490 /* Flush pending AIO before destroying the mapcache */ 491 bdrv_drain_all(); 492 493 mapcache_lock(); 494 495 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { 496 if (!reventry->dma) { 497 continue; 498 } 499 fprintf(stderr, "Locked DMA mapping while invalidating mapcache!" 500 " "TARGET_FMT_plx" -> %p is present\n", 501 reventry->paddr_index, reventry->vaddr_req); 502 } 503 504 for (i = 0; i < mapcache->nr_buckets; i++) { 505 MapCacheEntry *entry = &mapcache->entry[i]; 506 507 if (entry->vaddr_base == NULL) { 508 continue; 509 } 510 if (entry->lock > 0) { 511 continue; 512 } 513 514 if (munmap(entry->vaddr_base, entry->size) != 0) { 515 perror("unmap fails"); 516 exit(-1); 517 } 518 519 entry->paddr_index = 0; 520 entry->vaddr_base = NULL; 521 entry->size = 0; 522 g_free(entry->valid_mapping); 523 entry->valid_mapping = NULL; 524 } 525 526 mapcache->last_entry = NULL; 527 528 mapcache_unlock(); 529} 530 531static uint8_t *xen_replace_cache_entry_unlocked(hwaddr old_phys_addr, 532 hwaddr new_phys_addr, 533 hwaddr size) 534{ 535 MapCacheEntry *entry; 536 hwaddr address_index, address_offset; 537 hwaddr test_bit_size, cache_size = size; 538 539 address_index = old_phys_addr >> MCACHE_BUCKET_SHIFT; 540 address_offset = old_phys_addr & (MCACHE_BUCKET_SIZE - 1); 541 542 assert(size); 543 /* test_bit_size is always a multiple of XC_PAGE_SIZE */ 544 test_bit_size = size + (old_phys_addr & (XC_PAGE_SIZE - 1)); 545 if (test_bit_size % XC_PAGE_SIZE) { 546 test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE); 547 } 548 cache_size = size + address_offset; 549 if (cache_size % MCACHE_BUCKET_SIZE) { 550 cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE); 551 } 552 553 entry = &mapcache->entry[address_index % mapcache->nr_buckets]; 554 while (entry && !(entry->paddr_index == address_index && 555 entry->size == cache_size)) { 556 entry = entry->next; 557 } 558 if (!entry) { 559 DPRINTF("Trying to update an entry for "TARGET_FMT_plx \ 560 "that is not in the mapcache!\n", old_phys_addr); 561 return NULL; 562 } 563 564 address_index = new_phys_addr >> MCACHE_BUCKET_SHIFT; 565 address_offset = new_phys_addr & (MCACHE_BUCKET_SIZE - 1); 566 567 fprintf(stderr, "Replacing a dummy mapcache entry for "TARGET_FMT_plx \ 568 " with "TARGET_FMT_plx"\n", old_phys_addr, new_phys_addr); 569 570 xen_remap_bucket(entry, entry->vaddr_base, 571 cache_size, address_index, false); 572 if (!test_bits(address_offset >> XC_PAGE_SHIFT, 573 test_bit_size >> XC_PAGE_SHIFT, 574 entry->valid_mapping)) { 575 DPRINTF("Unable to update a mapcache entry for "TARGET_FMT_plx"!\n", 576 old_phys_addr); 577 return NULL; 578 } 579 580 return entry->vaddr_base + address_offset; 581} 582 583uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr, 584 hwaddr new_phys_addr, 585 hwaddr size) 586{ 587 uint8_t *p; 588 589 mapcache_lock(); 590 p = xen_replace_cache_entry_unlocked(old_phys_addr, new_phys_addr, size); 591 mapcache_unlock(); 592 return p; 593}