On Fri, Apr 29, 2022 at 5:14 AM Joao Martins <joao.m.martins@xxxxxxxxxx> wrote: > > IOMMU advertises Access/Dirty bits if the extended capability > DMAR register reports it (ECAP, mnemonic ECAP.SSADS albeit it used > to be known as SLADS before). The first stage table, though, has no bit for > advertising Access/Dirty, unless referenced via a scalable-mode PASID Entry. > Relevant Intel IOMMU SDM ref for first stage table "3.6.2 Accessed, Extended > Accessed, and Dirty Flags" and second stage table "3.7.2 Accessed and Dirty > Flags". > > To enable it we depend on scalable-mode for the second-stage table, > so we limit use of dirty-bit to scalable-mode To use SSADS, we set a bit in > the scalable-mode PASID Table entry, by setting bit 9 (SSADE). When > we do so we require flushing the context/pasid-table caches and IOTLB much > like AMD. Relevant SDM refs: > > "3.7.2 Accessed and Dirty Flags" > "6.5.3.3 Guidance to Software for Invalidations, > Table 23. Guidance to Software for Invalidations" > > To read out what's dirtied, same thing as past implementations is done. > Dirty bit support is located in the same location (bit 9). The IOTLB > caches some attributes when SSADE is enabled and dirty-ness information, > so we also need to flush IOTLB to make sure IOMMU attempts to set the > dirty bit again. Relevant manuals over the hardware translation is > chapter 6 with some special mention to: > > "6.2.3.1 Scalable-Mode PASID-Table Entry Programming Considerations" > "6.2.4 IOTLB" > > The first 12bits of the PTE are already cached via the SLPTE pointer, > similar to how it is added in amd-iommu. Use also the previously > added PASID entry cache in order to fetch whether Dirty bit was > enabled or not in the SM second stage table. > > This is useful for covering and prototyping IOMMU support for access/dirty > bits. > > Signed-off-by: Joao Martins <joao.m.martins@xxxxxxxxxx> > --- > hw/i386/intel_iommu.c | 85 ++++++++++++++++++++++++++++++---- > hw/i386/intel_iommu_internal.h | 4 ++ > hw/i386/trace-events | 2 + > 3 files changed, 82 insertions(+), 9 deletions(-) > > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c > index 752940fa4c0e..e946f793a968 100644 > --- a/hw/i386/intel_iommu.c > +++ b/hw/i386/intel_iommu.c > @@ -651,6 +651,21 @@ static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index) > return slpte; > } > > +/* Get the content of a spte located in @base_addr[@index] */ > +static uint64_t vtd_set_slpte(dma_addr_t base_addr, uint32_t index, > + uint64_t slpte) > +{ > + > + if (dma_memory_write(&address_space_memory, > + base_addr + index * sizeof(slpte), &slpte, > + sizeof(slpte), MEMTXATTRS_UNSPECIFIED)) { > + slpte = (uint64_t)-1; > + return slpte; > + } > + > + return vtd_get_slpte(base_addr, index); > +} > + > /* Given an iova and the level of paging structure, return the offset > * of current level. > */ > @@ -720,6 +735,11 @@ static inline bool vtd_pe_present(VTDPASIDEntry *pe) > return pe->val[0] & VTD_PASID_ENTRY_P; > } > > +static inline bool vtd_pe_slad_enabled(VTDPASIDEntry *pe) > +{ > + return pe->val[0] & VTD_SM_PASID_ENTRY_SLADE; > +} > + > static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, > uint32_t pasid, > dma_addr_t addr, > @@ -1026,6 +1046,33 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) > return NULL; > } > > +static inline bool vtd_ssad_update(IntelIOMMUState *s, uint16_t pe_flags, > + uint64_t *slpte, bool is_write, > + bool reads, bool writes) > +{ > + bool dirty, access = reads; > + > + if (!(pe_flags & VTD_SM_PASID_ENTRY_SLADE)) { > + return false; > + } > + > + dirty = access = false; > + > + if (is_write && writes && !(*slpte & VTD_SL_D)) { > + *slpte |= VTD_SL_D; > + trace_vtd_dirty_update(*slpte); > + dirty = true; > + } > + > + if (((!is_write && reads) || dirty) && !(*slpte & VTD_SL_A)) { > + *slpte |= VTD_SL_A; > + trace_vtd_access_update(*slpte); > + access = true; > + } > + > + return dirty || access; > +} > + > /* Given the @iova, get relevant @slptep. @slpte_level will be the last level > * of the translation, can be used for deciding the size of large page. > */ > @@ -1039,6 +1086,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, > uint32_t offset; > uint64_t slpte; > uint64_t access_right_check; > + uint16_t pe_flags; > > if (!vtd_iova_range_check(s, iova, ce, aw_bits)) { > error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ")", > @@ -1054,14 +1102,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, > slpte = vtd_get_slpte(addr, offset); > > if (slpte == (uint64_t)-1) { > - error_report_once("%s: detected read error on DMAR slpte " > - "(iova=0x%" PRIx64 ")", __func__, iova); > - if (level == vtd_get_iova_level(s, ce)) { > - /* Invalid programming of context-entry */ > - return -VTD_FR_CONTEXT_ENTRY_INV; > - } else { > - return -VTD_FR_PAGING_ENTRY_INV; > - } > + goto inv_slpte; > } > *reads = (*reads) && (slpte & VTD_SL_R); > *writes = (*writes) && (slpte & VTD_SL_W); > @@ -1081,6 +1122,14 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, > } > > if (vtd_is_last_slpte(slpte, level)) { > + pe_flags = vtd_sm_pasid_entry_flags(s, ce); > + if (vtd_ssad_update(s, pe_flags, &slpte, is_write, > + *reads, *writes)) { > + slpte = vtd_set_slpte(addr, offset, slpte); > + if (slpte == (uint64_t)-1) { > + goto inv_slpte; > + } > + } > *slptep = slpte; > *slpte_level = level; > return 0; > @@ -1088,6 +1137,16 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, > addr = vtd_get_slpte_addr(slpte, aw_bits); > level--; > } > + > +inv_slpte: > + error_report_once("%s: detected read error on DMAR slpte " > + "(iova=0x%" PRIx64 ")", __func__, iova); > + if (level == vtd_get_iova_level(s, ce)) { > + /* Invalid programming of context-entry */ > + return -VTD_FR_CONTEXT_ENTRY_INV; > + } else { > + return -VTD_FR_PAGING_ENTRY_INV; > + } > } > > typedef int (*vtd_page_walk_hook)(IOMMUTLBEvent *event, void *private); > @@ -1742,6 +1801,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, > slpte = iotlb_entry->slpte; > access_flags = iotlb_entry->access_flags; > page_mask = iotlb_entry->mask; > + if (vtd_ssad_update(s, iotlb_entry->sm_pe_flags, &slpte, is_write, > + access_flags & IOMMU_RO, access_flags & IOMMU_WO)) { > + uint32_t offset; > + > + offset = vtd_iova_level_offset(addr, vtd_get_iova_level(s, &ce)); > + vtd_set_slpte(addr, offset, slpte); > + } > goto out; > } > > @@ -3693,7 +3759,8 @@ static void vtd_init(IntelIOMMUState *s) > > /* TODO: read cap/ecap from host to decide which cap to be exposed. */ > if (s->scalable_mode) { > - s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS; > + s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS | > + VTD_ECAP_SLADS; > } We probably need a dedicated command line parameter and make it compat for pre 7.1 machines. Otherwise we may break migration. Thanks > > if (s->snoop_control) { > diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h > index 1ff13b40f9bb..c00f6e7b4a72 100644 > --- a/hw/i386/intel_iommu_internal.h > +++ b/hw/i386/intel_iommu_internal.h > @@ -192,6 +192,7 @@ > #define VTD_ECAP_MHMV (15ULL << 20) > #define VTD_ECAP_SRS (1ULL << 31) > #define VTD_ECAP_SMTS (1ULL << 43) > +#define VTD_ECAP_SLADS (1ULL << 45) > #define VTD_ECAP_SLTS (1ULL << 46) > > /* CAP_REG */ > @@ -492,6 +493,7 @@ typedef struct VTDRootEntry VTDRootEntry; > > #define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted guest-address-width */ > #define VTD_SM_PASID_ENTRY_DID(val) ((val) & VTD_DOMAIN_ID_MASK) > +#define VTD_SM_PASID_ENTRY_SLADE (1ULL << 9) > > /* Second Level Page Translation Pointer*/ > #define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL) > @@ -515,5 +517,7 @@ typedef struct VTDRootEntry VTDRootEntry; > #define VTD_SL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw)) > #define VTD_SL_IGN_COM 0xbff0000000000000ULL > #define VTD_SL_TM (1ULL << 62) > +#define VTD_SL_A (1ULL << 8) > +#define VTD_SL_D (1ULL << 9) > > #endif > diff --git a/hw/i386/trace-events b/hw/i386/trace-events > index eb5f075873cd..e4122ee8a999 100644 > --- a/hw/i386/trace-events > +++ b/hw/i386/trace-events > @@ -66,6 +66,8 @@ vtd_frr_new(int index, uint64_t hi, uint64_t lo) "index %d high 0x%"PRIx64" low > vtd_warn_invalid_qi_tail(uint16_t tail) "tail 0x%"PRIx16 > vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 0x%"PRIx16" index %d vec %d (should be: %d)" > vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 0x%"PRIx16" index %d trigger %d (should be: %d)" > +vtd_access_update(uint64_t slpte) "slpte 0x%"PRIx64 > +vtd_dirty_update(uint64_t slpte) "slpte 0x%"PRIx64 > > # amd_iommu.c > amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32 > -- > 2.17.2 >