IOMMU advertises Access/Dirty bits if the extended capability DMAR register reports it (ECAP, mnemonic ECAP.SSADS albeit it used to be known as SLADS before). The first stage table, though, has no bit for advertising Access/Dirty, unless referenced via a scalable-mode PASID Entry. Relevant Intel IOMMU SDM ref for first stage table "3.6.2 Accessed, Extended Accessed, and Dirty Flags" and second stage table "3.7.2 Accessed and Dirty Flags". To enable it we depend on scalable-mode for the second-stage table, so we limit use of dirty-bit to scalable-mode To use SSADS, we set a bit in the scalable-mode PASID Table entry, by setting bit 9 (SSADE). When we do so we require flushing the context/pasid-table caches and IOTLB much like AMD. Relevant SDM refs: "3.7.2 Accessed and Dirty Flags" "6.5.3.3 Guidance to Software for Invalidations, Table 23. Guidance to Software for Invalidations" To read out what's dirtied, same thing as past implementations is done. Dirty bit support is located in the same location (bit 9). The IOTLB caches some attributes when SSADE is enabled and dirty-ness information, so we also need to flush IOTLB to make sure IOMMU attempts to set the dirty bit again. Relevant manuals over the hardware translation is chapter 6 with some special mention to: "6.2.3.1 Scalable-Mode PASID-Table Entry Programming Considerations" "6.2.4 IOTLB" The first 12bits of the PTE are already cached via the SLPTE pointer, similar to how it is added in amd-iommu. Use also the previously added PASID entry cache in order to fetch whether Dirty bit was enabled or not in the SM second stage table. This is useful for covering and prototyping IOMMU support for access/dirty bits. Signed-off-by: Joao Martins <joao.m.martins@xxxxxxxxxx> --- hw/i386/intel_iommu.c | 85 ++++++++++++++++++++++++++++++---- hw/i386/intel_iommu_internal.h | 4 ++ hw/i386/trace-events | 2 + 3 files changed, 82 insertions(+), 9 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 752940fa4c0e..e946f793a968 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -651,6 +651,21 @@ static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index) return slpte; } +/* Get the content of a spte located in @base_addr[@index] */ +static uint64_t vtd_set_slpte(dma_addr_t base_addr, uint32_t index, + uint64_t slpte) +{ + + if (dma_memory_write(&address_space_memory, + base_addr + index * sizeof(slpte), &slpte, + sizeof(slpte), MEMTXATTRS_UNSPECIFIED)) { + slpte = (uint64_t)-1; + return slpte; + } + + return vtd_get_slpte(base_addr, index); +} + /* Given an iova and the level of paging structure, return the offset * of current level. */ @@ -720,6 +735,11 @@ static inline bool vtd_pe_present(VTDPASIDEntry *pe) return pe->val[0] & VTD_PASID_ENTRY_P; } +static inline bool vtd_pe_slad_enabled(VTDPASIDEntry *pe) +{ + return pe->val[0] & VTD_SM_PASID_ENTRY_SLADE; +} + static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid, dma_addr_t addr, @@ -1026,6 +1046,33 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) return NULL; } +static inline bool vtd_ssad_update(IntelIOMMUState *s, uint16_t pe_flags, + uint64_t *slpte, bool is_write, + bool reads, bool writes) +{ + bool dirty, access = reads; + + if (!(pe_flags & VTD_SM_PASID_ENTRY_SLADE)) { + return false; + } + + dirty = access = false; + + if (is_write && writes && !(*slpte & VTD_SL_D)) { + *slpte |= VTD_SL_D; + trace_vtd_dirty_update(*slpte); + dirty = true; + } + + if (((!is_write && reads) || dirty) && !(*slpte & VTD_SL_A)) { + *slpte |= VTD_SL_A; + trace_vtd_access_update(*slpte); + access = true; + } + + return dirty || access; +} + /* Given the @iova, get relevant @slptep. @slpte_level will be the last level * of the translation, can be used for deciding the size of large page. */ @@ -1039,6 +1086,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, uint32_t offset; uint64_t slpte; uint64_t access_right_check; + uint16_t pe_flags; if (!vtd_iova_range_check(s, iova, ce, aw_bits)) { error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ")", @@ -1054,14 +1102,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, slpte = vtd_get_slpte(addr, offset); if (slpte == (uint64_t)-1) { - error_report_once("%s: detected read error on DMAR slpte " - "(iova=0x%" PRIx64 ")", __func__, iova); - if (level == vtd_get_iova_level(s, ce)) { - /* Invalid programming of context-entry */ - return -VTD_FR_CONTEXT_ENTRY_INV; - } else { - return -VTD_FR_PAGING_ENTRY_INV; - } + goto inv_slpte; } *reads = (*reads) && (slpte & VTD_SL_R); *writes = (*writes) && (slpte & VTD_SL_W); @@ -1081,6 +1122,14 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, } if (vtd_is_last_slpte(slpte, level)) { + pe_flags = vtd_sm_pasid_entry_flags(s, ce); + if (vtd_ssad_update(s, pe_flags, &slpte, is_write, + *reads, *writes)) { + slpte = vtd_set_slpte(addr, offset, slpte); + if (slpte == (uint64_t)-1) { + goto inv_slpte; + } + } *slptep = slpte; *slpte_level = level; return 0; @@ -1088,6 +1137,16 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, addr = vtd_get_slpte_addr(slpte, aw_bits); level--; } + +inv_slpte: + error_report_once("%s: detected read error on DMAR slpte " + "(iova=0x%" PRIx64 ")", __func__, iova); + if (level == vtd_get_iova_level(s, ce)) { + /* Invalid programming of context-entry */ + return -VTD_FR_CONTEXT_ENTRY_INV; + } else { + return -VTD_FR_PAGING_ENTRY_INV; + } } typedef int (*vtd_page_walk_hook)(IOMMUTLBEvent *event, void *private); @@ -1742,6 +1801,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, slpte = iotlb_entry->slpte; access_flags = iotlb_entry->access_flags; page_mask = iotlb_entry->mask; + if (vtd_ssad_update(s, iotlb_entry->sm_pe_flags, &slpte, is_write, + access_flags & IOMMU_RO, access_flags & IOMMU_WO)) { + uint32_t offset; + + offset = vtd_iova_level_offset(addr, vtd_get_iova_level(s, &ce)); + vtd_set_slpte(addr, offset, slpte); + } goto out; } @@ -3693,7 +3759,8 @@ static void vtd_init(IntelIOMMUState *s) /* TODO: read cap/ecap from host to decide which cap to be exposed. */ if (s->scalable_mode) { - s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS; + s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS | + VTD_ECAP_SLADS; } if (s->snoop_control) { diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 1ff13b40f9bb..c00f6e7b4a72 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -192,6 +192,7 @@ #define VTD_ECAP_MHMV (15ULL << 20) #define VTD_ECAP_SRS (1ULL << 31) #define VTD_ECAP_SMTS (1ULL << 43) +#define VTD_ECAP_SLADS (1ULL << 45) #define VTD_ECAP_SLTS (1ULL << 46) /* CAP_REG */ @@ -492,6 +493,7 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted guest-address-width */ #define VTD_SM_PASID_ENTRY_DID(val) ((val) & VTD_DOMAIN_ID_MASK) +#define VTD_SM_PASID_ENTRY_SLADE (1ULL << 9) /* Second Level Page Translation Pointer*/ #define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL) @@ -515,5 +517,7 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_SL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw)) #define VTD_SL_IGN_COM 0xbff0000000000000ULL #define VTD_SL_TM (1ULL << 62) +#define VTD_SL_A (1ULL << 8) +#define VTD_SL_D (1ULL << 9) #endif diff --git a/hw/i386/trace-events b/hw/i386/trace-events index eb5f075873cd..e4122ee8a999 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -66,6 +66,8 @@ vtd_frr_new(int index, uint64_t hi, uint64_t lo) "index %d high 0x%"PRIx64" low vtd_warn_invalid_qi_tail(uint16_t tail) "tail 0x%"PRIx16 vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 0x%"PRIx16" index %d vec %d (should be: %d)" vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 0x%"PRIx16" index %d trigger %d (should be: %d)" +vtd_access_update(uint64_t slpte) "slpte 0x%"PRIx64 +vtd_dirty_update(uint64_t slpte) "slpte 0x%"PRIx64 # amd_iommu.c amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32 -- 2.17.2