IOMMU advertises Access/Dirty bits if the extended feature register reports it. Relevant AMD IOMMU SDM ref[0] "1.3.8 Enhanced Support for Access and Dirty Bits" To enable it we set the DTE flag in bits 7 and 8 to enable access, or access+dirty. With that, the IOMMU starts marking the D and A flags on every Memory Request or ATS translation request. Relevant AMD IOMMU SDM ref [0], "Table 7. Device Table Entry (DTE) Field Definitions" particularly the entry "HAD". The cached DTE information is then used on amdvi_had_update on both when we do an IO page walk, or when we found an IOTLB entry for it. To actually toggle on and off it's relatively simple as it's setting 2 bits on DTE and flush the device DTE cache. The information is then cleared and set again on the next device context cached or IOVA. Worthwhile sections from AMD IOMMU SDM: "2.2.3.1 Host Access Support" "2.2.3.2 Host Dirty Support" For details on how IOMMU hardware updates the dirty bit see, and expects from its consequent clearing by CPU: "2.2.7.4 Updating Accessed and Dirty Bits in the Guest Address Tables" "2.2.7.5 Clearing Accessed and Dirty Bits" This is useful to help prototypization of IOMMU dirty tracking, particularly the IOMMUFD and VFIO sides. Signed-off-by: Joao Martins <joao.m.martins@xxxxxxxxxx> --- hw/i386/amd_iommu.c | 52 ++++++++++++++++++++++++++++++++++++++++++++ hw/i386/amd_iommu.h | 11 ++++++++-- hw/i386/trace-events | 2 ++ 3 files changed, 63 insertions(+), 2 deletions(-) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 25b5c3be70ea..7f48a2601579 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -24,6 +24,7 @@ #include "hw/i386/pc.h" #include "hw/pci/msi.h" #include "hw/pci/pci_bus.h" +#include "hw/qdev-properties.h" #include "migration/vmstate.h" #include "amd_iommu.h" #include "qapi/error.h" @@ -901,6 +902,48 @@ static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr, return pte; } +static inline int amdvi_set_pte_entry(AMDVIState *s, uint64_t pte_addr, + uint16_t devid, uint64_t pte) +{ + if (dma_memory_write(&address_space_memory, pte_addr, &pte, sizeof(pte), + MEMTXATTRS_UNSPECIFIED)) { + trace_amdvi_get_pte_hwerror(pte_addr); + amdvi_log_pagetab_error(s, devid, pte_addr, 0); + return -EINVAL; + } + return 0; +} + +/* + * Checks if A/D bits need to be updated. + * It can only be called when PTE permissions have been + * validated against he transaction-requested ones. + */ +static bool amdvi_had_update(AMDVIAddressSpace *as, uint64_t dte, + uint64_t *pte, unsigned perms) +{ + bool is_write = perms & AMDVI_PERM_WRITE; + bool dirty, access; + + dirty = access = false; + + if (is_write && (dte & AMDVI_DEV_HADEN) && + !(*pte & AMDVI_DEV_PERM_DIRTY)) { + *pte |= AMDVI_DEV_PERM_DIRTY; + trace_amdvi_hd_update(*pte); + dirty = true; + } + + if ((!is_write | dirty) && (dte & AMDVI_DEV_HAEN) && + !(*pte & AMDVI_DEV_PERM_ACCESS)) { + *pte |= AMDVI_DEV_PERM_ACCESS; + trace_amdvi_ha_update(*pte); + access = true; + } + + return dirty || access; +} + static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte, IOMMUTLBEntry *ret, unsigned perms, hwaddr addr, uint64_t *iotlb_pte, @@ -948,6 +991,11 @@ static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte, page_mask = pte_get_page_mask(oldlevel); } + if (amdvi_had_update(as, dte[0], &pte, perms)) { + amdvi_set_pte_entry(as->iommu_state, pte_addr, as->devfn, + cpu_to_le64(pte)); + } + /* get access permissions from pte */ ret->iova = addr & page_mask; ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask; @@ -977,6 +1025,10 @@ static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr, if (iotlb_entry) { trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), addr, iotlb_entry->translated_addr); + if (amdvi_had_update(as, iotlb_entry->dte_flags, + &iotlb_entry->pte, iotlb_entry->perms)) + amdvi_set_pte_entry(as->iommu_state, iotlb_entry->pte_addr, + as->devfn, cpu_to_le64(iotlb_entry->pte)); ret->iova = addr & ~iotlb_entry->page_mask; ret->translated_addr = iotlb_entry->translated_addr; ret->addr_mask = iotlb_entry->page_mask; diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index 79d38a3e4184..b794596aa07d 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -135,6 +135,10 @@ #define AMDVI_DEV_PERM_SHIFT 61 #define AMDVI_DEV_PERM_READ (1ULL << 61) #define AMDVI_DEV_PERM_WRITE (1ULL << 62) +#define AMDVI_DEV_PERM_ACCESS (1ULL << 5) +#define AMDVI_DEV_PERM_DIRTY (1ULL << 6) +#define AMDVI_DEV_HADEN (3ULL << 7) +#define AMDVI_DEV_HAEN (1ULL << 7) /* Device table entry bits 64:127 */ #define AMDVI_DEV_DOMID_ID_MASK ((1ULL << 16) - 1) @@ -159,9 +163,11 @@ #define AMDVI_FEATURE_GA (1ULL << 7) /* guest VAPIC support */ #define AMDVI_FEATURE_HE (1ULL << 8) /* hardware error regs */ #define AMDVI_FEATURE_PC (1ULL << 9) /* Perf counters */ +#define AMDVI_FEATURE_HD (1ULL << 52) /* Host Dirty support */ +#define AMDVI_FEATURE_HA (1ULL << 49) /* Host Access */ /* reserved DTE bits */ -#define AMDVI_DTE_LOWER_QUAD_RESERVED 0x80300000000000fc +#define AMDVI_DTE_LOWER_QUAD_RESERVED 0x803000000000006c #define AMDVI_DTE_MIDDLE_QUAD_RESERVED 0x0000000000000100 #define AMDVI_DTE_UPPER_QUAD_RESERVED 0x08f0000000000000 @@ -176,7 +182,8 @@ /* extended feature support */ #define AMDVI_EXT_FEATURES (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \ AMDVI_FEATURE_IA | AMDVI_FEATURE_GT | AMDVI_FEATURE_HE | \ - AMDVI_GATS_MODE | AMDVI_HATS_MODE | AMDVI_FEATURE_GA) + AMDVI_GATS_MODE | AMDVI_HATS_MODE | AMDVI_FEATURE_GA | \ + AMDVI_FEATURE_HD | AMDVI_FEATURE_HA) /* capabilities header */ #define AMDVI_CAPAB_FEATURES (AMDVI_CAPAB_FLAT_EXT | \ diff --git a/hw/i386/trace-events b/hw/i386/trace-events index e49814dd642d..eb5f075873cd 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -107,6 +107,8 @@ amdvi_ir_intctl(uint8_t val) "int_ctl 0x%"PRIx8 amdvi_ir_target_abort(const char *str) "%s" amdvi_ir_delivery_mode(const char *str) "%s" amdvi_ir_irte_ga_val(uint64_t hi, uint64_t lo) "hi 0x%"PRIx64" lo 0x%"PRIx64 +amdvi_ha_update(uint64_t pte) "pte 0x%"PRIx64 +amdvi_hd_update(uint64_t pte) "pte 0x%"PRIx64 # vmport.c vmport_register(unsigned char command, void *func, void *opaque) "command: 0x%02x func: %p opaque: %p" -- 2.17.2