amd_iommu.c (53456B)
1/* 2 * QEMU emulation of AMD IOMMU (AMD-Vi) 3 * 4 * Copyright (C) 2011 Eduard - Gabriel Munteanu 5 * Copyright (C) 2015, 2016 David Kiarie Kahurani 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 17 * You should have received a copy of the GNU General Public License along 18 * with this program; if not, see <http://www.gnu.org/licenses/>. 19 * 20 * Cache implementation inspired by hw/i386/intel_iommu.c 21 */ 22 23#include "qemu/osdep.h" 24#include "hw/i386/pc.h" 25#include "hw/pci/msi.h" 26#include "hw/pci/pci_bus.h" 27#include "migration/vmstate.h" 28#include "amd_iommu.h" 29#include "qapi/error.h" 30#include "qemu/error-report.h" 31#include "hw/i386/apic_internal.h" 32#include "trace.h" 33#include "hw/i386/apic-msidef.h" 34 35/* used AMD-Vi MMIO registers */ 36const char *amdvi_mmio_low[] = { 37 "AMDVI_MMIO_DEVTAB_BASE", 38 "AMDVI_MMIO_CMDBUF_BASE", 39 "AMDVI_MMIO_EVTLOG_BASE", 40 "AMDVI_MMIO_CONTROL", 41 "AMDVI_MMIO_EXCL_BASE", 42 "AMDVI_MMIO_EXCL_LIMIT", 43 "AMDVI_MMIO_EXT_FEATURES", 44 "AMDVI_MMIO_PPR_BASE", 45 "UNHANDLED" 46}; 47const char *amdvi_mmio_high[] = { 48 "AMDVI_MMIO_COMMAND_HEAD", 49 "AMDVI_MMIO_COMMAND_TAIL", 50 "AMDVI_MMIO_EVTLOG_HEAD", 51 "AMDVI_MMIO_EVTLOG_TAIL", 52 "AMDVI_MMIO_STATUS", 53 "AMDVI_MMIO_PPR_HEAD", 54 "AMDVI_MMIO_PPR_TAIL", 55 "UNHANDLED" 56}; 57 58struct AMDVIAddressSpace { 59 uint8_t bus_num; /* bus number */ 60 uint8_t devfn; /* device function */ 61 AMDVIState *iommu_state; /* AMDVI - one per machine */ 62 MemoryRegion root; /* AMDVI Root memory map region */ 63 IOMMUMemoryRegion iommu; /* Device's address translation region */ 64 MemoryRegion iommu_ir; /* Device's interrupt remapping region */ 65 AddressSpace as; /* device's corresponding address space */ 66}; 67 68/* AMDVI cache entry */ 69typedef struct AMDVIIOTLBEntry { 70 uint16_t domid; /* assigned domain id */ 71 uint16_t devid; /* device owning entry */ 72 uint64_t perms; /* access permissions */ 73 uint64_t translated_addr; /* translated address */ 74 uint64_t page_mask; /* physical page size */ 75} AMDVIIOTLBEntry; 76 77/* configure MMIO registers at startup/reset */ 78static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val, 79 uint64_t romask, uint64_t w1cmask) 80{ 81 stq_le_p(&s->mmior[addr], val); 82 stq_le_p(&s->romask[addr], romask); 83 stq_le_p(&s->w1cmask[addr], w1cmask); 84} 85 86static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr) 87{ 88 return lduw_le_p(&s->mmior[addr]); 89} 90 91static uint32_t amdvi_readl(AMDVIState *s, hwaddr addr) 92{ 93 return ldl_le_p(&s->mmior[addr]); 94} 95 96static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr) 97{ 98 return ldq_le_p(&s->mmior[addr]); 99} 100 101/* internal write */ 102static void amdvi_writeq_raw(AMDVIState *s, hwaddr addr, uint64_t val) 103{ 104 stq_le_p(&s->mmior[addr], val); 105} 106 107/* external write */ 108static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val) 109{ 110 uint16_t romask = lduw_le_p(&s->romask[addr]); 111 uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]); 112 uint16_t oldval = lduw_le_p(&s->mmior[addr]); 113 stw_le_p(&s->mmior[addr], 114 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); 115} 116 117static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val) 118{ 119 uint32_t romask = ldl_le_p(&s->romask[addr]); 120 uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]); 121 uint32_t oldval = ldl_le_p(&s->mmior[addr]); 122 stl_le_p(&s->mmior[addr], 123 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); 124} 125 126static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val) 127{ 128 uint64_t romask = ldq_le_p(&s->romask[addr]); 129 uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]); 130 uint32_t oldval = ldq_le_p(&s->mmior[addr]); 131 stq_le_p(&s->mmior[addr], 132 ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); 133} 134 135/* OR a 64-bit register with a 64-bit value */ 136static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val) 137{ 138 return amdvi_readq(s, addr) | val; 139} 140 141/* OR a 64-bit register with a 64-bit value storing result in the register */ 142static void amdvi_assign_orq(AMDVIState *s, hwaddr addr, uint64_t val) 143{ 144 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val); 145} 146 147/* AND a 64-bit register with a 64-bit value storing result in the register */ 148static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val) 149{ 150 amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) & val); 151} 152 153static void amdvi_generate_msi_interrupt(AMDVIState *s) 154{ 155 MSIMessage msg = {}; 156 MemTxAttrs attrs = { 157 .requester_id = pci_requester_id(&s->pci.dev) 158 }; 159 160 if (msi_enabled(&s->pci.dev)) { 161 msg = msi_get_message(&s->pci.dev, 0); 162 address_space_stl_le(&address_space_memory, msg.address, msg.data, 163 attrs, NULL); 164 } 165} 166 167static void amdvi_log_event(AMDVIState *s, uint64_t *evt) 168{ 169 /* event logging not enabled */ 170 if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS, 171 AMDVI_MMIO_STATUS_EVT_OVF)) { 172 return; 173 } 174 175 /* event log buffer full */ 176 if (s->evtlog_tail >= s->evtlog_len) { 177 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF); 178 /* generate interrupt */ 179 amdvi_generate_msi_interrupt(s); 180 return; 181 } 182 183 if (dma_memory_write(&address_space_memory, s->evtlog + s->evtlog_tail, 184 evt, AMDVI_EVENT_LEN)) { 185 trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail); 186 } 187 188 s->evtlog_tail += AMDVI_EVENT_LEN; 189 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT); 190 amdvi_generate_msi_interrupt(s); 191} 192 193static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start, 194 int length) 195{ 196 int index = start / 64, bitpos = start % 64; 197 uint64_t mask = MAKE_64BIT_MASK(start, length); 198 buffer[index] &= ~mask; 199 buffer[index] |= (value << bitpos) & mask; 200} 201/* 202 * AMDVi event structure 203 * 0:15 -> DeviceID 204 * 55:63 -> event type + miscellaneous info 205 * 63:127 -> related address 206 */ 207static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr, 208 uint16_t info) 209{ 210 amdvi_setevent_bits(evt, devid, 0, 16); 211 amdvi_setevent_bits(evt, info, 55, 8); 212 amdvi_setevent_bits(evt, addr, 63, 64); 213} 214/* log an error encountered during a page walk 215 * 216 * @addr: virtual address in translation request 217 */ 218static void amdvi_page_fault(AMDVIState *s, uint16_t devid, 219 hwaddr addr, uint16_t info) 220{ 221 uint64_t evt[4]; 222 223 info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF; 224 amdvi_encode_event(evt, devid, addr, info); 225 amdvi_log_event(s, evt); 226 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 227 PCI_STATUS_SIG_TARGET_ABORT); 228} 229/* 230 * log a master abort accessing device table 231 * @devtab : address of device table entry 232 * @info : error flags 233 */ 234static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid, 235 hwaddr devtab, uint16_t info) 236{ 237 uint64_t evt[4]; 238 239 info |= AMDVI_EVENT_DEV_TAB_HW_ERROR; 240 241 amdvi_encode_event(evt, devid, devtab, info); 242 amdvi_log_event(s, evt); 243 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 244 PCI_STATUS_SIG_TARGET_ABORT); 245} 246/* log an event trying to access command buffer 247 * @addr : address that couldn't be accessed 248 */ 249static void amdvi_log_command_error(AMDVIState *s, hwaddr addr) 250{ 251 uint64_t evt[4], info = AMDVI_EVENT_COMMAND_HW_ERROR; 252 253 amdvi_encode_event(evt, 0, addr, info); 254 amdvi_log_event(s, evt); 255 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 256 PCI_STATUS_SIG_TARGET_ABORT); 257} 258/* log an illegal comand event 259 * @addr : address of illegal command 260 */ 261static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info, 262 hwaddr addr) 263{ 264 uint64_t evt[4]; 265 266 info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR; 267 amdvi_encode_event(evt, 0, addr, info); 268 amdvi_log_event(s, evt); 269} 270/* log an error accessing device table 271 * 272 * @devid : device owning the table entry 273 * @devtab : address of device table entry 274 * @info : error flags 275 */ 276static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid, 277 hwaddr addr, uint16_t info) 278{ 279 uint64_t evt[4]; 280 281 info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY; 282 amdvi_encode_event(evt, devid, addr, info); 283 amdvi_log_event(s, evt); 284} 285/* log an error accessing a PTE entry 286 * @addr : address that couldn't be accessed 287 */ 288static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid, 289 hwaddr addr, uint16_t info) 290{ 291 uint64_t evt[4]; 292 293 info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR; 294 amdvi_encode_event(evt, devid, addr, info); 295 amdvi_log_event(s, evt); 296 pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, 297 PCI_STATUS_SIG_TARGET_ABORT); 298} 299 300static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2) 301{ 302 return *((const uint64_t *)v1) == *((const uint64_t *)v2); 303} 304 305static guint amdvi_uint64_hash(gconstpointer v) 306{ 307 return (guint)*(const uint64_t *)v; 308} 309 310static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr, 311 uint64_t devid) 312{ 313 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) | 314 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); 315 return g_hash_table_lookup(s->iotlb, &key); 316} 317 318static void amdvi_iotlb_reset(AMDVIState *s) 319{ 320 assert(s->iotlb); 321 trace_amdvi_iotlb_reset(); 322 g_hash_table_remove_all(s->iotlb); 323} 324 325static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value, 326 gpointer user_data) 327{ 328 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value; 329 uint16_t devid = *(uint16_t *)user_data; 330 return entry->devid == devid; 331} 332 333static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr, 334 uint64_t devid) 335{ 336 uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) | 337 ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); 338 g_hash_table_remove(s->iotlb, &key); 339} 340 341static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid, 342 uint64_t gpa, IOMMUTLBEntry to_cache, 343 uint16_t domid) 344{ 345 AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1); 346 uint64_t *key = g_new(uint64_t, 1); 347 uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K; 348 349 /* don't cache erroneous translations */ 350 if (to_cache.perm != IOMMU_NONE) { 351 trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid), 352 PCI_FUNC(devid), gpa, to_cache.translated_addr); 353 354 if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) { 355 amdvi_iotlb_reset(s); 356 } 357 358 entry->domid = domid; 359 entry->perms = to_cache.perm; 360 entry->translated_addr = to_cache.translated_addr; 361 entry->page_mask = to_cache.addr_mask; 362 *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT); 363 g_hash_table_replace(s->iotlb, key, entry); 364 } 365} 366 367static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd) 368{ 369 /* pad the last 3 bits */ 370 hwaddr addr = cpu_to_le64(extract64(cmd[0], 3, 49)) << 3; 371 uint64_t data = cpu_to_le64(cmd[1]); 372 373 if (extract64(cmd[0], 52, 8)) { 374 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 375 s->cmdbuf + s->cmdbuf_head); 376 } 377 if (extract64(cmd[0], 0, 1)) { 378 if (dma_memory_write(&address_space_memory, addr, &data, 379 AMDVI_COMPLETION_DATA_SIZE)) { 380 trace_amdvi_completion_wait_fail(addr); 381 } 382 } 383 /* set completion interrupt */ 384 if (extract64(cmd[0], 1, 1)) { 385 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT); 386 /* generate interrupt */ 387 amdvi_generate_msi_interrupt(s); 388 } 389 trace_amdvi_completion_wait(addr, data); 390} 391 392/* log error without aborting since linux seems to be using reserved bits */ 393static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd) 394{ 395 uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16)); 396 397 /* This command should invalidate internal caches of which there isn't */ 398 if (extract64(cmd[0], 16, 44) || cmd[1]) { 399 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 400 s->cmdbuf + s->cmdbuf_head); 401 } 402 trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid), 403 PCI_FUNC(devid)); 404} 405 406static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd) 407{ 408 if (extract64(cmd[0], 16, 16) || extract64(cmd[0], 52, 8) || 409 extract64(cmd[1], 0, 2) || extract64(cmd[1], 3, 29) 410 || extract64(cmd[1], 48, 16)) { 411 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 412 s->cmdbuf + s->cmdbuf_head); 413 } 414 trace_amdvi_ppr_exec(); 415} 416 417static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd) 418{ 419 if (extract64(cmd[0], 0, 60) || cmd[1]) { 420 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 421 s->cmdbuf + s->cmdbuf_head); 422 } 423 424 amdvi_iotlb_reset(s); 425 trace_amdvi_all_inval(); 426} 427 428static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value, 429 gpointer user_data) 430{ 431 AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value; 432 uint16_t domid = *(uint16_t *)user_data; 433 return entry->domid == domid; 434} 435 436/* we don't have devid - we can't remove pages by address */ 437static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd) 438{ 439 uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16)); 440 441 if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 48, 12) || 442 extract64(cmd[1], 3, 9)) { 443 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 444 s->cmdbuf + s->cmdbuf_head); 445 } 446 447 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid, 448 &domid); 449 trace_amdvi_pages_inval(domid); 450} 451 452static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd) 453{ 454 if (extract64(cmd[0], 16, 8) || extract64(cmd[0], 52, 8) || 455 extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) || 456 extract64(cmd[1], 5, 7)) { 457 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 458 s->cmdbuf + s->cmdbuf_head); 459 } 460 461 trace_amdvi_prefetch_pages(); 462} 463 464static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd) 465{ 466 if (extract64(cmd[0], 16, 44) || cmd[1]) { 467 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 468 s->cmdbuf + s->cmdbuf_head); 469 return; 470 } 471 472 trace_amdvi_intr_inval(); 473} 474 475/* FIXME: Try to work with the specified size instead of all the pages 476 * when the S bit is on 477 */ 478static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd) 479{ 480 481 uint16_t devid = extract64(cmd[0], 0, 16); 482 if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) || 483 extract64(cmd[1], 6, 6)) { 484 amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), 485 s->cmdbuf + s->cmdbuf_head); 486 return; 487 } 488 489 if (extract64(cmd[1], 0, 1)) { 490 g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_devid, 491 &devid); 492 } else { 493 amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12, 494 cpu_to_le16(extract64(cmd[1], 0, 16))); 495 } 496 trace_amdvi_iotlb_inval(); 497} 498 499/* not honouring reserved bits is regarded as an illegal command */ 500static void amdvi_cmdbuf_exec(AMDVIState *s) 501{ 502 uint64_t cmd[2]; 503 504 if (dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head, 505 cmd, AMDVI_COMMAND_SIZE)) { 506 trace_amdvi_command_read_fail(s->cmdbuf, s->cmdbuf_head); 507 amdvi_log_command_error(s, s->cmdbuf + s->cmdbuf_head); 508 return; 509 } 510 511 switch (extract64(cmd[0], 60, 4)) { 512 case AMDVI_CMD_COMPLETION_WAIT: 513 amdvi_completion_wait(s, cmd); 514 break; 515 case AMDVI_CMD_INVAL_DEVTAB_ENTRY: 516 amdvi_inval_devtab_entry(s, cmd); 517 break; 518 case AMDVI_CMD_INVAL_AMDVI_PAGES: 519 amdvi_inval_pages(s, cmd); 520 break; 521 case AMDVI_CMD_INVAL_IOTLB_PAGES: 522 iommu_inval_iotlb(s, cmd); 523 break; 524 case AMDVI_CMD_INVAL_INTR_TABLE: 525 amdvi_inval_inttable(s, cmd); 526 break; 527 case AMDVI_CMD_PREFETCH_AMDVI_PAGES: 528 amdvi_prefetch_pages(s, cmd); 529 break; 530 case AMDVI_CMD_COMPLETE_PPR_REQUEST: 531 amdvi_complete_ppr(s, cmd); 532 break; 533 case AMDVI_CMD_INVAL_AMDVI_ALL: 534 amdvi_inval_all(s, cmd); 535 break; 536 default: 537 trace_amdvi_unhandled_command(extract64(cmd[1], 60, 4)); 538 /* log illegal command */ 539 amdvi_log_illegalcom_error(s, extract64(cmd[1], 60, 4), 540 s->cmdbuf + s->cmdbuf_head); 541 } 542} 543 544static void amdvi_cmdbuf_run(AMDVIState *s) 545{ 546 if (!s->cmdbuf_enabled) { 547 trace_amdvi_command_error(amdvi_readq(s, AMDVI_MMIO_CONTROL)); 548 return; 549 } 550 551 /* check if there is work to do. */ 552 while (s->cmdbuf_head != s->cmdbuf_tail) { 553 trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf); 554 amdvi_cmdbuf_exec(s); 555 s->cmdbuf_head += AMDVI_COMMAND_SIZE; 556 amdvi_writeq_raw(s, AMDVI_MMIO_COMMAND_HEAD, s->cmdbuf_head); 557 558 /* wrap head pointer */ 559 if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) { 560 s->cmdbuf_head = 0; 561 } 562 } 563} 564 565static void amdvi_mmio_trace(hwaddr addr, unsigned size) 566{ 567 uint8_t index = (addr & ~0x2000) / 8; 568 569 if ((addr & 0x2000)) { 570 /* high table */ 571 index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index; 572 trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07); 573 } else { 574 index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index; 575 trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07); 576 } 577} 578 579static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size) 580{ 581 AMDVIState *s = opaque; 582 583 uint64_t val = -1; 584 if (addr + size > AMDVI_MMIO_SIZE) { 585 trace_amdvi_mmio_read_invalid(AMDVI_MMIO_SIZE, addr, size); 586 return (uint64_t)-1; 587 } 588 589 if (size == 2) { 590 val = amdvi_readw(s, addr); 591 } else if (size == 4) { 592 val = amdvi_readl(s, addr); 593 } else if (size == 8) { 594 val = amdvi_readq(s, addr); 595 } 596 amdvi_mmio_trace(addr, size); 597 598 return val; 599} 600 601static void amdvi_handle_control_write(AMDVIState *s) 602{ 603 unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL); 604 s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN); 605 606 s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN); 607 s->evtlog_enabled = s->enabled && !!(control & 608 AMDVI_MMIO_CONTROL_EVENTLOGEN); 609 610 s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN); 611 s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN); 612 s->cmdbuf_enabled = s->enabled && !!(control & 613 AMDVI_MMIO_CONTROL_CMDBUFLEN); 614 s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN); 615 616 /* update the flags depending on the control register */ 617 if (s->cmdbuf_enabled) { 618 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN); 619 } else { 620 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_CMDBUF_RUN); 621 } 622 if (s->evtlog_enabled) { 623 amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN); 624 } else { 625 amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN); 626 } 627 628 trace_amdvi_control_status(control); 629 amdvi_cmdbuf_run(s); 630} 631 632static inline void amdvi_handle_devtab_write(AMDVIState *s) 633 634{ 635 uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE); 636 s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK); 637 638 /* set device table length */ 639 s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 * 640 (AMDVI_MMIO_DEVTAB_SIZE_UNIT / 641 AMDVI_MMIO_DEVTAB_ENTRY_SIZE)); 642} 643 644static inline void amdvi_handle_cmdhead_write(AMDVIState *s) 645{ 646 s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD) 647 & AMDVI_MMIO_CMDBUF_HEAD_MASK; 648 amdvi_cmdbuf_run(s); 649} 650 651static inline void amdvi_handle_cmdbase_write(AMDVIState *s) 652{ 653 s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE) 654 & AMDVI_MMIO_CMDBUF_BASE_MASK; 655 s->cmdbuf_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_CMDBUF_SIZE_BYTE) 656 & AMDVI_MMIO_CMDBUF_SIZE_MASK); 657 s->cmdbuf_head = s->cmdbuf_tail = 0; 658} 659 660static inline void amdvi_handle_cmdtail_write(AMDVIState *s) 661{ 662 s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL) 663 & AMDVI_MMIO_CMDBUF_TAIL_MASK; 664 amdvi_cmdbuf_run(s); 665} 666 667static inline void amdvi_handle_excllim_write(AMDVIState *s) 668{ 669 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT); 670 s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) | 671 AMDVI_MMIO_EXCL_LIMIT_LOW; 672} 673 674static inline void amdvi_handle_evtbase_write(AMDVIState *s) 675{ 676 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE); 677 s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK; 678 s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE) 679 & AMDVI_MMIO_EVTLOG_SIZE_MASK); 680} 681 682static inline void amdvi_handle_evttail_write(AMDVIState *s) 683{ 684 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL); 685 s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK; 686} 687 688static inline void amdvi_handle_evthead_write(AMDVIState *s) 689{ 690 uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD); 691 s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK; 692} 693 694static inline void amdvi_handle_pprbase_write(AMDVIState *s) 695{ 696 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE); 697 s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK; 698 s->pprlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_PPRLOG_SIZE_BYTE) 699 & AMDVI_MMIO_PPRLOG_SIZE_MASK); 700} 701 702static inline void amdvi_handle_pprhead_write(AMDVIState *s) 703{ 704 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD); 705 s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK; 706} 707 708static inline void amdvi_handle_pprtail_write(AMDVIState *s) 709{ 710 uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL); 711 s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK; 712} 713 714/* FIXME: something might go wrong if System Software writes in chunks 715 * of one byte but linux writes in chunks of 4 bytes so currently it 716 * works correctly with linux but will definitely be busted if software 717 * reads/writes 8 bytes 718 */ 719static void amdvi_mmio_reg_write(AMDVIState *s, unsigned size, uint64_t val, 720 hwaddr addr) 721{ 722 if (size == 2) { 723 amdvi_writew(s, addr, val); 724 } else if (size == 4) { 725 amdvi_writel(s, addr, val); 726 } else if (size == 8) { 727 amdvi_writeq(s, addr, val); 728 } 729} 730 731static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val, 732 unsigned size) 733{ 734 AMDVIState *s = opaque; 735 unsigned long offset = addr & 0x07; 736 737 if (addr + size > AMDVI_MMIO_SIZE) { 738 trace_amdvi_mmio_write("error: addr outside region: max ", 739 (uint64_t)AMDVI_MMIO_SIZE, size, val, offset); 740 return; 741 } 742 743 amdvi_mmio_trace(addr, size); 744 switch (addr & ~0x07) { 745 case AMDVI_MMIO_CONTROL: 746 amdvi_mmio_reg_write(s, size, val, addr); 747 amdvi_handle_control_write(s); 748 break; 749 case AMDVI_MMIO_DEVICE_TABLE: 750 amdvi_mmio_reg_write(s, size, val, addr); 751 /* set device table address 752 * This also suffers from inability to tell whether software 753 * is done writing 754 */ 755 if (offset || (size == 8)) { 756 amdvi_handle_devtab_write(s); 757 } 758 break; 759 case AMDVI_MMIO_COMMAND_HEAD: 760 amdvi_mmio_reg_write(s, size, val, addr); 761 amdvi_handle_cmdhead_write(s); 762 break; 763 case AMDVI_MMIO_COMMAND_BASE: 764 amdvi_mmio_reg_write(s, size, val, addr); 765 /* FIXME - make sure System Software has finished writing incase 766 * it writes in chucks less than 8 bytes in a robust way.As for 767 * now, this hacks works for the linux driver 768 */ 769 if (offset || (size == 8)) { 770 amdvi_handle_cmdbase_write(s); 771 } 772 break; 773 case AMDVI_MMIO_COMMAND_TAIL: 774 amdvi_mmio_reg_write(s, size, val, addr); 775 amdvi_handle_cmdtail_write(s); 776 break; 777 case AMDVI_MMIO_EVENT_BASE: 778 amdvi_mmio_reg_write(s, size, val, addr); 779 amdvi_handle_evtbase_write(s); 780 break; 781 case AMDVI_MMIO_EVENT_HEAD: 782 amdvi_mmio_reg_write(s, size, val, addr); 783 amdvi_handle_evthead_write(s); 784 break; 785 case AMDVI_MMIO_EVENT_TAIL: 786 amdvi_mmio_reg_write(s, size, val, addr); 787 amdvi_handle_evttail_write(s); 788 break; 789 case AMDVI_MMIO_EXCL_LIMIT: 790 amdvi_mmio_reg_write(s, size, val, addr); 791 amdvi_handle_excllim_write(s); 792 break; 793 /* PPR log base - unused for now */ 794 case AMDVI_MMIO_PPR_BASE: 795 amdvi_mmio_reg_write(s, size, val, addr); 796 amdvi_handle_pprbase_write(s); 797 break; 798 /* PPR log head - also unused for now */ 799 case AMDVI_MMIO_PPR_HEAD: 800 amdvi_mmio_reg_write(s, size, val, addr); 801 amdvi_handle_pprhead_write(s); 802 break; 803 /* PPR log tail - unused for now */ 804 case AMDVI_MMIO_PPR_TAIL: 805 amdvi_mmio_reg_write(s, size, val, addr); 806 amdvi_handle_pprtail_write(s); 807 break; 808 } 809} 810 811static inline uint64_t amdvi_get_perms(uint64_t entry) 812{ 813 return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >> 814 AMDVI_DEV_PERM_SHIFT; 815} 816 817/* validate that reserved bits are honoured */ 818static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid, 819 uint64_t *dte) 820{ 821 if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED) 822 || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED) 823 || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) { 824 amdvi_log_illegaldevtab_error(s, devid, 825 s->devtab + 826 devid * AMDVI_DEVTAB_ENTRY_SIZE, 0); 827 return false; 828 } 829 830 return true; 831} 832 833/* get a device table entry given the devid */ 834static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry) 835{ 836 uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE; 837 838 if (dma_memory_read(&address_space_memory, s->devtab + offset, entry, 839 AMDVI_DEVTAB_ENTRY_SIZE)) { 840 trace_amdvi_dte_get_fail(s->devtab, offset); 841 /* log error accessing dte */ 842 amdvi_log_devtab_error(s, devid, s->devtab + offset, 0); 843 return false; 844 } 845 846 *entry = le64_to_cpu(*entry); 847 if (!amdvi_validate_dte(s, devid, entry)) { 848 trace_amdvi_invalid_dte(entry[0]); 849 return false; 850 } 851 852 return true; 853} 854 855/* get pte translation mode */ 856static inline uint8_t get_pte_translation_mode(uint64_t pte) 857{ 858 return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK; 859} 860 861static inline uint64_t pte_override_page_mask(uint64_t pte) 862{ 863 uint8_t page_mask = 13; 864 uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) >> 12; 865 /* find the first zero bit */ 866 while (addr & 1) { 867 page_mask++; 868 addr = addr >> 1; 869 } 870 871 return ~((1ULL << page_mask) - 1); 872} 873 874static inline uint64_t pte_get_page_mask(uint64_t oldlevel) 875{ 876 return ~((1UL << ((oldlevel * 9) + 3)) - 1); 877} 878 879static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr, 880 uint16_t devid) 881{ 882 uint64_t pte; 883 884 if (dma_memory_read(&address_space_memory, pte_addr, &pte, sizeof(pte))) { 885 trace_amdvi_get_pte_hwerror(pte_addr); 886 amdvi_log_pagetab_error(s, devid, pte_addr, 0); 887 pte = 0; 888 return pte; 889 } 890 891 pte = le64_to_cpu(pte); 892 return pte; 893} 894 895static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte, 896 IOMMUTLBEntry *ret, unsigned perms, 897 hwaddr addr) 898{ 899 unsigned level, present, pte_perms, oldlevel; 900 uint64_t pte = dte[0], pte_addr, page_mask; 901 902 /* make sure the DTE has TV = 1 */ 903 if (pte & AMDVI_DEV_TRANSLATION_VALID) { 904 level = get_pte_translation_mode(pte); 905 if (level >= 7) { 906 trace_amdvi_mode_invalid(level, addr); 907 return; 908 } 909 if (level == 0) { 910 goto no_remap; 911 } 912 913 /* we are at the leaf page table or page table encodes a huge page */ 914 while (level > 0) { 915 pte_perms = amdvi_get_perms(pte); 916 present = pte & 1; 917 if (!present || perms != (perms & pte_perms)) { 918 amdvi_page_fault(as->iommu_state, as->devfn, addr, perms); 919 trace_amdvi_page_fault(addr); 920 return; 921 } 922 923 /* go to the next lower level */ 924 pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK; 925 /* add offset and load pte */ 926 pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3; 927 pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn); 928 if (!pte) { 929 return; 930 } 931 oldlevel = level; 932 level = get_pte_translation_mode(pte); 933 if (level == 0x7) { 934 break; 935 } 936 } 937 938 if (level == 0x7) { 939 page_mask = pte_override_page_mask(pte); 940 } else { 941 page_mask = pte_get_page_mask(oldlevel); 942 } 943 944 /* get access permissions from pte */ 945 ret->iova = addr & page_mask; 946 ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask; 947 ret->addr_mask = ~page_mask; 948 ret->perm = amdvi_get_perms(pte); 949 return; 950 } 951no_remap: 952 ret->iova = addr & AMDVI_PAGE_MASK_4K; 953 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K; 954 ret->addr_mask = ~AMDVI_PAGE_MASK_4K; 955 ret->perm = amdvi_get_perms(pte); 956} 957 958static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr, 959 bool is_write, IOMMUTLBEntry *ret) 960{ 961 AMDVIState *s = as->iommu_state; 962 uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn); 963 AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid); 964 uint64_t entry[4]; 965 966 if (iotlb_entry) { 967 trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid), 968 PCI_FUNC(devid), addr, iotlb_entry->translated_addr); 969 ret->iova = addr & ~iotlb_entry->page_mask; 970 ret->translated_addr = iotlb_entry->translated_addr; 971 ret->addr_mask = iotlb_entry->page_mask; 972 ret->perm = iotlb_entry->perms; 973 return; 974 } 975 976 if (!amdvi_get_dte(s, devid, entry)) { 977 return; 978 } 979 980 /* devices with V = 0 are not translated */ 981 if (!(entry[0] & AMDVI_DEV_VALID)) { 982 goto out; 983 } 984 985 amdvi_page_walk(as, entry, ret, 986 is_write ? AMDVI_PERM_WRITE : AMDVI_PERM_READ, addr); 987 988 amdvi_update_iotlb(s, devid, addr, *ret, 989 entry[1] & AMDVI_DEV_DOMID_ID_MASK); 990 return; 991 992out: 993 ret->iova = addr & AMDVI_PAGE_MASK_4K; 994 ret->translated_addr = addr & AMDVI_PAGE_MASK_4K; 995 ret->addr_mask = ~AMDVI_PAGE_MASK_4K; 996 ret->perm = IOMMU_RW; 997} 998 999static inline bool amdvi_is_interrupt_addr(hwaddr addr) 1000{ 1001 return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST; 1002} 1003 1004static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr, 1005 IOMMUAccessFlags flag, int iommu_idx) 1006{ 1007 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu); 1008 AMDVIState *s = as->iommu_state; 1009 IOMMUTLBEntry ret = { 1010 .target_as = &address_space_memory, 1011 .iova = addr, 1012 .translated_addr = 0, 1013 .addr_mask = ~(hwaddr)0, 1014 .perm = IOMMU_NONE 1015 }; 1016 1017 if (!s->enabled) { 1018 /* AMDVI disabled - corresponds to iommu=off not 1019 * failure to provide any parameter 1020 */ 1021 ret.iova = addr & AMDVI_PAGE_MASK_4K; 1022 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K; 1023 ret.addr_mask = ~AMDVI_PAGE_MASK_4K; 1024 ret.perm = IOMMU_RW; 1025 return ret; 1026 } else if (amdvi_is_interrupt_addr(addr)) { 1027 ret.iova = addr & AMDVI_PAGE_MASK_4K; 1028 ret.translated_addr = addr & AMDVI_PAGE_MASK_4K; 1029 ret.addr_mask = ~AMDVI_PAGE_MASK_4K; 1030 ret.perm = IOMMU_WO; 1031 return ret; 1032 } 1033 1034 amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret); 1035 trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn), 1036 PCI_FUNC(as->devfn), addr, ret.translated_addr); 1037 return ret; 1038} 1039 1040static int amdvi_get_irte(AMDVIState *s, MSIMessage *origin, uint64_t *dte, 1041 union irte *irte, uint16_t devid) 1042{ 1043 uint64_t irte_root, offset; 1044 1045 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK; 1046 offset = (origin->data & AMDVI_IRTE_OFFSET) << 2; 1047 1048 trace_amdvi_ir_irte(irte_root, offset); 1049 1050 if (dma_memory_read(&address_space_memory, irte_root + offset, 1051 irte, sizeof(*irte))) { 1052 trace_amdvi_ir_err("failed to get irte"); 1053 return -AMDVI_IR_GET_IRTE; 1054 } 1055 1056 trace_amdvi_ir_irte_val(irte->val); 1057 1058 return 0; 1059} 1060 1061static int amdvi_int_remap_legacy(AMDVIState *iommu, 1062 MSIMessage *origin, 1063 MSIMessage *translated, 1064 uint64_t *dte, 1065 X86IOMMUIrq *irq, 1066 uint16_t sid) 1067{ 1068 int ret; 1069 union irte irte; 1070 1071 /* get interrupt remapping table */ 1072 ret = amdvi_get_irte(iommu, origin, dte, &irte, sid); 1073 if (ret < 0) { 1074 return ret; 1075 } 1076 1077 if (!irte.fields.valid) { 1078 trace_amdvi_ir_target_abort("RemapEn is disabled"); 1079 return -AMDVI_IR_TARGET_ABORT; 1080 } 1081 1082 if (irte.fields.guest_mode) { 1083 error_report_once("guest mode is not zero"); 1084 return -AMDVI_IR_ERR; 1085 } 1086 1087 if (irte.fields.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) { 1088 error_report_once("reserved int_type"); 1089 return -AMDVI_IR_ERR; 1090 } 1091 1092 irq->delivery_mode = irte.fields.int_type; 1093 irq->vector = irte.fields.vector; 1094 irq->dest_mode = irte.fields.dm; 1095 irq->redir_hint = irte.fields.rq_eoi; 1096 irq->dest = irte.fields.destination; 1097 1098 return 0; 1099} 1100 1101static int amdvi_get_irte_ga(AMDVIState *s, MSIMessage *origin, uint64_t *dte, 1102 struct irte_ga *irte, uint16_t devid) 1103{ 1104 uint64_t irte_root, offset; 1105 1106 irte_root = dte[2] & AMDVI_IR_PHYS_ADDR_MASK; 1107 offset = (origin->data & AMDVI_IRTE_OFFSET) << 4; 1108 trace_amdvi_ir_irte(irte_root, offset); 1109 1110 if (dma_memory_read(&address_space_memory, irte_root + offset, 1111 irte, sizeof(*irte))) { 1112 trace_amdvi_ir_err("failed to get irte_ga"); 1113 return -AMDVI_IR_GET_IRTE; 1114 } 1115 1116 trace_amdvi_ir_irte_ga_val(irte->hi.val, irte->lo.val); 1117 return 0; 1118} 1119 1120static int amdvi_int_remap_ga(AMDVIState *iommu, 1121 MSIMessage *origin, 1122 MSIMessage *translated, 1123 uint64_t *dte, 1124 X86IOMMUIrq *irq, 1125 uint16_t sid) 1126{ 1127 int ret; 1128 struct irte_ga irte; 1129 1130 /* get interrupt remapping table */ 1131 ret = amdvi_get_irte_ga(iommu, origin, dte, &irte, sid); 1132 if (ret < 0) { 1133 return ret; 1134 } 1135 1136 if (!irte.lo.fields_remap.valid) { 1137 trace_amdvi_ir_target_abort("RemapEn is disabled"); 1138 return -AMDVI_IR_TARGET_ABORT; 1139 } 1140 1141 if (irte.lo.fields_remap.guest_mode) { 1142 error_report_once("guest mode is not zero"); 1143 return -AMDVI_IR_ERR; 1144 } 1145 1146 if (irte.lo.fields_remap.int_type > AMDVI_IOAPIC_INT_TYPE_ARBITRATED) { 1147 error_report_once("reserved int_type is set"); 1148 return -AMDVI_IR_ERR; 1149 } 1150 1151 irq->delivery_mode = irte.lo.fields_remap.int_type; 1152 irq->vector = irte.hi.fields.vector; 1153 irq->dest_mode = irte.lo.fields_remap.dm; 1154 irq->redir_hint = irte.lo.fields_remap.rq_eoi; 1155 irq->dest = irte.lo.fields_remap.destination; 1156 1157 return 0; 1158} 1159 1160static int __amdvi_int_remap_msi(AMDVIState *iommu, 1161 MSIMessage *origin, 1162 MSIMessage *translated, 1163 uint64_t *dte, 1164 X86IOMMUIrq *irq, 1165 uint16_t sid) 1166{ 1167 int ret; 1168 uint8_t int_ctl; 1169 1170 int_ctl = (dte[2] >> AMDVI_IR_INTCTL_SHIFT) & 3; 1171 trace_amdvi_ir_intctl(int_ctl); 1172 1173 switch (int_ctl) { 1174 case AMDVI_IR_INTCTL_PASS: 1175 memcpy(translated, origin, sizeof(*origin)); 1176 return 0; 1177 case AMDVI_IR_INTCTL_REMAP: 1178 break; 1179 case AMDVI_IR_INTCTL_ABORT: 1180 trace_amdvi_ir_target_abort("int_ctl abort"); 1181 return -AMDVI_IR_TARGET_ABORT; 1182 default: 1183 trace_amdvi_ir_err("int_ctl reserved"); 1184 return -AMDVI_IR_ERR; 1185 } 1186 1187 if (iommu->ga_enabled) { 1188 ret = amdvi_int_remap_ga(iommu, origin, translated, dte, irq, sid); 1189 } else { 1190 ret = amdvi_int_remap_legacy(iommu, origin, translated, dte, irq, sid); 1191 } 1192 1193 return ret; 1194} 1195 1196/* Interrupt remapping for MSI/MSI-X entry */ 1197static int amdvi_int_remap_msi(AMDVIState *iommu, 1198 MSIMessage *origin, 1199 MSIMessage *translated, 1200 uint16_t sid) 1201{ 1202 int ret = 0; 1203 uint64_t pass = 0; 1204 uint64_t dte[4] = { 0 }; 1205 X86IOMMUIrq irq = { 0 }; 1206 uint8_t dest_mode, delivery_mode; 1207 1208 assert(origin && translated); 1209 1210 /* 1211 * When IOMMU is enabled, interrupt remap request will come either from 1212 * IO-APIC or PCI device. If interrupt is from PCI device then it will 1213 * have a valid requester id but if the interrupt is from IO-APIC 1214 * then requester id will be invalid. 1215 */ 1216 if (sid == X86_IOMMU_SID_INVALID) { 1217 sid = AMDVI_IOAPIC_SB_DEVID; 1218 } 1219 1220 trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid); 1221 1222 /* check if device table entry is set before we go further. */ 1223 if (!iommu || !iommu->devtab_len) { 1224 memcpy(translated, origin, sizeof(*origin)); 1225 goto out; 1226 } 1227 1228 if (!amdvi_get_dte(iommu, sid, dte)) { 1229 return -AMDVI_IR_ERR; 1230 } 1231 1232 /* Check if IR is enabled in DTE */ 1233 if (!(dte[2] & AMDVI_IR_REMAP_ENABLE)) { 1234 memcpy(translated, origin, sizeof(*origin)); 1235 goto out; 1236 } 1237 1238 /* validate that we are configure with intremap=on */ 1239 if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) { 1240 trace_amdvi_err("Interrupt remapping is enabled in the guest but " 1241 "not in the host. Use intremap=on to enable interrupt " 1242 "remapping in amd-iommu."); 1243 return -AMDVI_IR_ERR; 1244 } 1245 1246 if (origin->address & AMDVI_MSI_ADDR_HI_MASK) { 1247 trace_amdvi_err("MSI address high 32 bits non-zero when " 1248 "Interrupt Remapping enabled."); 1249 return -AMDVI_IR_ERR; 1250 } 1251 1252 if ((origin->address & AMDVI_MSI_ADDR_LO_MASK) != APIC_DEFAULT_ADDRESS) { 1253 trace_amdvi_err("MSI is not from IOAPIC."); 1254 return -AMDVI_IR_ERR; 1255 } 1256 1257 /* 1258 * The MSI data register [10:8] are used to get the upstream interrupt type. 1259 * 1260 * See MSI/MSI-X format: 1261 * https://pdfs.semanticscholar.org/presentation/9420/c279e942eca568157711ef5c92b800c40a79.pdf 1262 * (page 5) 1263 */ 1264 delivery_mode = (origin->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 7; 1265 1266 switch (delivery_mode) { 1267 case AMDVI_IOAPIC_INT_TYPE_FIXED: 1268 case AMDVI_IOAPIC_INT_TYPE_ARBITRATED: 1269 trace_amdvi_ir_delivery_mode("fixed/arbitrated"); 1270 ret = __amdvi_int_remap_msi(iommu, origin, translated, dte, &irq, sid); 1271 if (ret < 0) { 1272 goto remap_fail; 1273 } else { 1274 /* Translate IRQ to MSI messages */ 1275 x86_iommu_irq_to_msi_message(&irq, translated); 1276 goto out; 1277 } 1278 break; 1279 case AMDVI_IOAPIC_INT_TYPE_SMI: 1280 error_report("SMI is not supported!"); 1281 ret = -AMDVI_IR_ERR; 1282 break; 1283 case AMDVI_IOAPIC_INT_TYPE_NMI: 1284 pass = dte[3] & AMDVI_DEV_NMI_PASS_MASK; 1285 trace_amdvi_ir_delivery_mode("nmi"); 1286 break; 1287 case AMDVI_IOAPIC_INT_TYPE_INIT: 1288 pass = dte[3] & AMDVI_DEV_INT_PASS_MASK; 1289 trace_amdvi_ir_delivery_mode("init"); 1290 break; 1291 case AMDVI_IOAPIC_INT_TYPE_EINT: 1292 pass = dte[3] & AMDVI_DEV_EINT_PASS_MASK; 1293 trace_amdvi_ir_delivery_mode("eint"); 1294 break; 1295 default: 1296 trace_amdvi_ir_delivery_mode("unsupported delivery_mode"); 1297 ret = -AMDVI_IR_ERR; 1298 break; 1299 } 1300 1301 if (ret < 0) { 1302 goto remap_fail; 1303 } 1304 1305 /* 1306 * The MSI address register bit[2] is used to get the destination 1307 * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts 1308 * only. 1309 */ 1310 dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1; 1311 if (dest_mode) { 1312 trace_amdvi_ir_err("invalid dest_mode"); 1313 ret = -AMDVI_IR_ERR; 1314 goto remap_fail; 1315 } 1316 1317 if (pass) { 1318 memcpy(translated, origin, sizeof(*origin)); 1319 } else { 1320 trace_amdvi_ir_err("passthrough is not enabled"); 1321 ret = -AMDVI_IR_ERR; 1322 goto remap_fail; 1323 } 1324 1325out: 1326 trace_amdvi_ir_remap_msi(origin->address, origin->data, 1327 translated->address, translated->data); 1328 return 0; 1329 1330remap_fail: 1331 return ret; 1332} 1333 1334static int amdvi_int_remap(X86IOMMUState *iommu, 1335 MSIMessage *origin, 1336 MSIMessage *translated, 1337 uint16_t sid) 1338{ 1339 return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin, 1340 translated, sid); 1341} 1342 1343static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr, 1344 uint64_t value, unsigned size, 1345 MemTxAttrs attrs) 1346{ 1347 int ret; 1348 MSIMessage from = { 0, 0 }, to = { 0, 0 }; 1349 uint16_t sid = AMDVI_IOAPIC_SB_DEVID; 1350 1351 from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST; 1352 from.data = (uint32_t) value; 1353 1354 trace_amdvi_mem_ir_write_req(addr, value, size); 1355 1356 if (!attrs.unspecified) { 1357 /* We have explicit Source ID */ 1358 sid = attrs.requester_id; 1359 } 1360 1361 ret = amdvi_int_remap_msi(opaque, &from, &to, sid); 1362 if (ret < 0) { 1363 /* TODO: log the event using IOMMU log event interface */ 1364 error_report_once("failed to remap interrupt from devid 0x%x", sid); 1365 return MEMTX_ERROR; 1366 } 1367 1368 apic_get_class()->send_msi(&to); 1369 1370 trace_amdvi_mem_ir_write(to.address, to.data); 1371 return MEMTX_OK; 1372} 1373 1374static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr, 1375 uint64_t *data, unsigned size, 1376 MemTxAttrs attrs) 1377{ 1378 return MEMTX_OK; 1379} 1380 1381static const MemoryRegionOps amdvi_ir_ops = { 1382 .read_with_attrs = amdvi_mem_ir_read, 1383 .write_with_attrs = amdvi_mem_ir_write, 1384 .endianness = DEVICE_LITTLE_ENDIAN, 1385 .impl = { 1386 .min_access_size = 4, 1387 .max_access_size = 4, 1388 }, 1389 .valid = { 1390 .min_access_size = 4, 1391 .max_access_size = 4, 1392 } 1393}; 1394 1395static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) 1396{ 1397 char name[128]; 1398 AMDVIState *s = opaque; 1399 AMDVIAddressSpace **iommu_as, *amdvi_dev_as; 1400 int bus_num = pci_bus_num(bus); 1401 1402 iommu_as = s->address_spaces[bus_num]; 1403 1404 /* allocate memory during the first run */ 1405 if (!iommu_as) { 1406 iommu_as = g_malloc0(sizeof(AMDVIAddressSpace *) * PCI_DEVFN_MAX); 1407 s->address_spaces[bus_num] = iommu_as; 1408 } 1409 1410 /* set up AMD-Vi region */ 1411 if (!iommu_as[devfn]) { 1412 snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn); 1413 1414 iommu_as[devfn] = g_malloc0(sizeof(AMDVIAddressSpace)); 1415 iommu_as[devfn]->bus_num = (uint8_t)bus_num; 1416 iommu_as[devfn]->devfn = (uint8_t)devfn; 1417 iommu_as[devfn]->iommu_state = s; 1418 1419 amdvi_dev_as = iommu_as[devfn]; 1420 1421 /* 1422 * Memory region relationships looks like (Address range shows 1423 * only lower 32 bits to make it short in length...): 1424 * 1425 * |-----------------+-------------------+----------| 1426 * | Name | Address range | Priority | 1427 * |-----------------+-------------------+----------+ 1428 * | amdvi_root | 00000000-ffffffff | 0 | 1429 * | amdvi_iommu | 00000000-ffffffff | 1 | 1430 * | amdvi_iommu_ir | fee00000-feefffff | 64 | 1431 * |-----------------+-------------------+----------| 1432 */ 1433 memory_region_init_iommu(&amdvi_dev_as->iommu, 1434 sizeof(amdvi_dev_as->iommu), 1435 TYPE_AMD_IOMMU_MEMORY_REGION, 1436 OBJECT(s), 1437 "amd_iommu", UINT64_MAX); 1438 memory_region_init(&amdvi_dev_as->root, OBJECT(s), 1439 "amdvi_root", UINT64_MAX); 1440 address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name); 1441 memory_region_init_io(&amdvi_dev_as->iommu_ir, OBJECT(s), 1442 &amdvi_ir_ops, s, "amd_iommu_ir", 1443 AMDVI_INT_ADDR_SIZE); 1444 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 1445 AMDVI_INT_ADDR_FIRST, 1446 &amdvi_dev_as->iommu_ir, 1447 64); 1448 memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0, 1449 MEMORY_REGION(&amdvi_dev_as->iommu), 1450 1); 1451 } 1452 return &iommu_as[devfn]->as; 1453} 1454 1455static const MemoryRegionOps mmio_mem_ops = { 1456 .read = amdvi_mmio_read, 1457 .write = amdvi_mmio_write, 1458 .endianness = DEVICE_LITTLE_ENDIAN, 1459 .impl = { 1460 .min_access_size = 1, 1461 .max_access_size = 8, 1462 .unaligned = false, 1463 }, 1464 .valid = { 1465 .min_access_size = 1, 1466 .max_access_size = 8, 1467 } 1468}; 1469 1470static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, 1471 IOMMUNotifierFlag old, 1472 IOMMUNotifierFlag new, 1473 Error **errp) 1474{ 1475 AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu); 1476 1477 if (new & IOMMU_NOTIFIER_MAP) { 1478 error_setg(errp, 1479 "device %02x.%02x.%x requires iommu notifier which is not " 1480 "currently supported", as->bus_num, PCI_SLOT(as->devfn), 1481 PCI_FUNC(as->devfn)); 1482 return -EINVAL; 1483 } 1484 return 0; 1485} 1486 1487static void amdvi_init(AMDVIState *s) 1488{ 1489 amdvi_iotlb_reset(s); 1490 1491 s->devtab_len = 0; 1492 s->cmdbuf_len = 0; 1493 s->cmdbuf_head = 0; 1494 s->cmdbuf_tail = 0; 1495 s->evtlog_head = 0; 1496 s->evtlog_tail = 0; 1497 s->excl_enabled = false; 1498 s->excl_allow = false; 1499 s->mmio_enabled = false; 1500 s->enabled = false; 1501 s->ats_enabled = false; 1502 s->cmdbuf_enabled = false; 1503 1504 /* reset MMIO */ 1505 memset(s->mmior, 0, AMDVI_MMIO_SIZE); 1506 amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES, 1507 0xffffffffffffffef, 0); 1508 amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67); 1509 1510 /* reset device ident */ 1511 pci_config_set_vendor_id(s->pci.dev.config, PCI_VENDOR_ID_AMD); 1512 pci_config_set_prog_interface(s->pci.dev.config, 00); 1513 pci_config_set_device_id(s->pci.dev.config, s->devid); 1514 pci_config_set_class(s->pci.dev.config, 0x0806); 1515 1516 /* reset AMDVI specific capabilities, all r/o */ 1517 pci_set_long(s->pci.dev.config + s->capab_offset, AMDVI_CAPAB_FEATURES); 1518 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_LOW, 1519 s->mmio.addr & ~(0xffff0000)); 1520 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH, 1521 (s->mmio.addr & ~(0xffff)) >> 16); 1522 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_RANGE, 1523 0xff000000); 1524 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, 0); 1525 pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, 1526 AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR); 1527} 1528 1529static void amdvi_sysbus_reset(DeviceState *dev) 1530{ 1531 AMDVIState *s = AMD_IOMMU_DEVICE(dev); 1532 1533 msi_reset(&s->pci.dev); 1534 amdvi_init(s); 1535} 1536 1537static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) 1538{ 1539 int ret = 0; 1540 AMDVIState *s = AMD_IOMMU_DEVICE(dev); 1541 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); 1542 MachineState *ms = MACHINE(qdev_get_machine()); 1543 PCMachineState *pcms = PC_MACHINE(ms); 1544 X86MachineState *x86ms = X86_MACHINE(ms); 1545 PCIBus *bus = pcms->bus; 1546 1547 s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, 1548 amdvi_uint64_equal, g_free, g_free); 1549 1550 /* This device should take care of IOMMU PCI properties */ 1551 x86_iommu->type = TYPE_AMD; 1552 if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) { 1553 return; 1554 } 1555 ret = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0, 1556 AMDVI_CAPAB_SIZE, errp); 1557 if (ret < 0) { 1558 return; 1559 } 1560 s->capab_offset = ret; 1561 1562 ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0, 1563 AMDVI_CAPAB_REG_SIZE, errp); 1564 if (ret < 0) { 1565 return; 1566 } 1567 ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0, 1568 AMDVI_CAPAB_REG_SIZE, errp); 1569 if (ret < 0) { 1570 return; 1571 } 1572 1573 /* Pseudo address space under root PCI bus. */ 1574 x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); 1575 1576 /* set up MMIO */ 1577 memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio", 1578 AMDVI_MMIO_SIZE); 1579 1580 sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio); 1581 sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR); 1582 pci_setup_iommu(bus, amdvi_host_dma_iommu, s); 1583 s->devid = object_property_get_int(OBJECT(&s->pci), "addr", &error_abort); 1584 msi_init(&s->pci.dev, 0, 1, true, false, errp); 1585 amdvi_init(s); 1586} 1587 1588static const VMStateDescription vmstate_amdvi_sysbus = { 1589 .name = "amd-iommu", 1590 .unmigratable = 1 1591}; 1592 1593static void amdvi_sysbus_instance_init(Object *klass) 1594{ 1595 AMDVIState *s = AMD_IOMMU_DEVICE(klass); 1596 1597 object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI); 1598} 1599 1600static void amdvi_sysbus_class_init(ObjectClass *klass, void *data) 1601{ 1602 DeviceClass *dc = DEVICE_CLASS(klass); 1603 X86IOMMUClass *dc_class = X86_IOMMU_DEVICE_CLASS(klass); 1604 1605 dc->reset = amdvi_sysbus_reset; 1606 dc->vmsd = &vmstate_amdvi_sysbus; 1607 dc->hotpluggable = false; 1608 dc_class->realize = amdvi_sysbus_realize; 1609 dc_class->int_remap = amdvi_int_remap; 1610 /* Supported by the pc-q35-* machine types */ 1611 dc->user_creatable = true; 1612 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1613 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device"; 1614} 1615 1616static const TypeInfo amdvi_sysbus = { 1617 .name = TYPE_AMD_IOMMU_DEVICE, 1618 .parent = TYPE_X86_IOMMU_DEVICE, 1619 .instance_size = sizeof(AMDVIState), 1620 .instance_init = amdvi_sysbus_instance_init, 1621 .class_init = amdvi_sysbus_class_init 1622}; 1623 1624static void amdvi_pci_class_init(ObjectClass *klass, void *data) 1625{ 1626 DeviceClass *dc = DEVICE_CLASS(klass); 1627 1628 set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1629 dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device"; 1630} 1631 1632static const TypeInfo amdvi_pci = { 1633 .name = TYPE_AMD_IOMMU_PCI, 1634 .parent = TYPE_PCI_DEVICE, 1635 .instance_size = sizeof(AMDVIPCIState), 1636 .class_init = amdvi_pci_class_init, 1637 .interfaces = (InterfaceInfo[]) { 1638 { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 1639 { }, 1640 }, 1641}; 1642 1643static void amdvi_iommu_memory_region_class_init(ObjectClass *klass, void *data) 1644{ 1645 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 1646 1647 imrc->translate = amdvi_translate; 1648 imrc->notify_flag_changed = amdvi_iommu_notify_flag_changed; 1649} 1650 1651static const TypeInfo amdvi_iommu_memory_region_info = { 1652 .parent = TYPE_IOMMU_MEMORY_REGION, 1653 .name = TYPE_AMD_IOMMU_MEMORY_REGION, 1654 .class_init = amdvi_iommu_memory_region_class_init, 1655}; 1656 1657static void amdvi_register_types(void) 1658{ 1659 type_register_static(&amdvi_pci); 1660 type_register_static(&amdvi_sysbus); 1661 type_register_static(&amdvi_iommu_memory_region_info); 1662} 1663 1664type_init(amdvi_register_types);