From: Ankit Agrawal <ankita@xxxxxxxxxx> Each VM stores the requires pgprots for its mappings in the vma->pgprot. Based on this we can determine the desired MT_DEVICE_* for the VMA directly, and do not have to guess based on heuristics based on pfn_is_map_memory(). There are the following kinds of pgprot available to userspace and their corresponding type: pgprot_noncached -> MT_DEVICE_nGnRnE pgprot_writecombine -> MT_NORMAL_NC pgprot_device -> MT_DEVICE_nGnRE pgprot_tagged -> MT_NORMAL_TAGGED Decode the relevant MT_* types in use and translate them into the corresponding KVM_PGTABLEPROT_*: - MT_DEVICE_nGnRE -> KVM_PGTABLE_PROT_DEVICE_nGnRE (device) - MT_DEVICE_nGnRnE -> KVM_PGTABLE_PROT_DEVICE_nGnRnE (noncached) - MT_NORMAL/_TAGGED/_NC -> 0 The selection of 0 for the S2 KVM_PGTABLE_PROT_DEVICE_nGnRnE is based on [2]. Also worth noting is the result of the stage-1 and stage-2. Ref [3] If FWB not set, then the combination is the one that is more restrictive. The sequence from lowest restriction to the highest: DEVICE_nGnRnE -> DEVICE_nGnRE -> NORMAL/_TAGGED/_NC If FWB is set, then stage-2 mapping type overrides the stage-1 [1]. This solves a problem where KVM cannot preserve the MT_NORMAL memory type for non-struct page backed memory into the S2 mapping. Instead the VMA creator determines the MT type and the S2 will follow it. [1] https://developer.arm.com/documentation/102376/0100/Combining-Stage-1-and-Stage-2-attributes [2] ARMv8 reference manual: https://developer.arm.com/documentation/ddi0487/gb/ Section D5.5.3, Table D5-38 [3] ARMv8 reference manual: https://developer.arm.com/documentation/ddi0487/gb/ Table G5-20 on page G5-6330 Signed-off-by: Ankit Agrawal <ankita@xxxxxxxxxx> --- arch/arm64/include/asm/kvm_pgtable.h | 8 +++++--- arch/arm64/include/asm/memory.h | 6 ++++-- arch/arm64/kvm/hyp/pgtable.c | 16 +++++++++++----- arch/arm64/kvm/mmu.c | 27 ++++++++++++++++++++++----- 4 files changed, 42 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 4cd6762bda80..d3166b6e6329 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -150,7 +150,8 @@ enum kvm_pgtable_stage2_flags { * @KVM_PGTABLE_PROT_X: Execute permission. * @KVM_PGTABLE_PROT_W: Write permission. * @KVM_PGTABLE_PROT_R: Read permission. - * @KVM_PGTABLE_PROT_DEVICE: Device attributes. + * @KVM_PGTABLE_PROT_DEVICE_nGnRE: Device nGnRE attributes. + * @KVM_PGTABLE_PROT_DEVICE_nGnRnE: Device nGnRnE attributes. * @KVM_PGTABLE_PROT_SW0: Software bit 0. * @KVM_PGTABLE_PROT_SW1: Software bit 1. * @KVM_PGTABLE_PROT_SW2: Software bit 2. @@ -161,7 +162,8 @@ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_W = BIT(1), KVM_PGTABLE_PROT_R = BIT(2), - KVM_PGTABLE_PROT_DEVICE = BIT(3), + KVM_PGTABLE_PROT_DEVICE_nGnRE = BIT(3), + KVM_PGTABLE_PROT_DEVICE_nGnRnE = BIT(4), KVM_PGTABLE_PROT_SW0 = BIT(55), KVM_PGTABLE_PROT_SW1 = BIT(56), @@ -178,7 +180,7 @@ enum kvm_pgtable_prot { #define PAGE_HYP KVM_PGTABLE_PROT_RW #define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X) #define PAGE_HYP_RO (KVM_PGTABLE_PROT_R) -#define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE) +#define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE_nGnRE) typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, enum kvm_pgtable_prot prot); diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 78e5163836a0..4ebbc4b1ba4d 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -147,14 +147,16 @@ * Memory types for Stage-2 translation */ #define MT_S2_NORMAL 0xf +#define MT_S2_DEVICE_nGnRnE 0x0 #define MT_S2_DEVICE_nGnRE 0x1 /* * Memory types for Stage-2 translation when ID_AA64MMFR2_EL1.FWB is 0001 * Stage-2 enforces Normal-WB and Device-nGnRE */ -#define MT_S2_FWB_NORMAL 6 -#define MT_S2_FWB_DEVICE_nGnRE 1 +#define MT_S2_FWB_NORMAL 0x6 +#define MT_S2_FWB_DEVICE_nGnRnE 0x0 +#define MT_S2_FWB_DEVICE_nGnRE 0x1 #ifdef CONFIG_ARM64_4K_PAGES #define IOREMAP_MAX_ORDER (PUD_SHIFT) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 3d61bd3e591d..7a8238b41590 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -355,7 +355,7 @@ struct hyp_map_data { static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) { - bool device = prot & KVM_PGTABLE_PROT_DEVICE; + bool device = prot & KVM_PGTABLE_PROT_DEVICE_nGnRE; u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL; kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype); u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS; @@ -636,14 +636,20 @@ static bool stage2_has_fwb(struct kvm_pgtable *pgt) static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot, kvm_pte_t *ptep) { - bool device = prot & KVM_PGTABLE_PROT_DEVICE; - kvm_pte_t attr = device ? KVM_S2_MEMATTR(pgt, DEVICE_nGnRE) : - KVM_S2_MEMATTR(pgt, NORMAL); u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS; + kvm_pte_t attr; + + if (prot & KVM_PGTABLE_PROT_DEVICE_nGnRE) + attr = KVM_S2_MEMATTR(pgt, DEVICE_nGnRE); + else if (prot & KVM_PGTABLE_PROT_DEVICE_nGnRnE) + attr = KVM_S2_MEMATTR(pgt, DEVICE_nGnRnE); + else + attr = KVM_S2_MEMATTR(pgt, NORMAL); if (!(prot & KVM_PGTABLE_PROT_X)) attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; - else if (device) + else if (prot & KVM_PGTABLE_PROT_DEVICE_nGnRE || + prot & KVM_PGTABLE_PROT_DEVICE_nGnRnE) return -EINVAL; if (prot & KVM_PGTABLE_PROT_R) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 7113587222ff..8d63aa951c33 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -897,7 +897,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, int ret = 0; struct kvm_mmu_memory_cache cache = { .gfp_zero = __GFP_ZERO }; struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; - enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE | + enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE_nGnRE | KVM_PGTABLE_PROT_R | (writable ? KVM_PGTABLE_PROT_W : 0); @@ -1186,6 +1186,15 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) return vma->vm_flags & VM_MTE_ALLOWED; } +/* + * Determine the memory region cacheability from VMA's pgprot. This + * is used to set the stage 2 PTEs. + */ +static unsigned long mapping_type(pgprot_t page_prot) +{ + return ((pgprot_val(page_prot) & PTE_ATTRINDX_MASK) >> 2); +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -1368,10 +1377,18 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault) prot |= KVM_PGTABLE_PROT_X; - if (device) - prot |= KVM_PGTABLE_PROT_DEVICE; - else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) - prot |= KVM_PGTABLE_PROT_X; + switch (mapping_type(vma->vm_page_prot)) { + case MT_DEVICE_nGnRE: + prot |= KVM_PGTABLE_PROT_DEVICE_nGnRE; + break; + case MT_DEVICE_nGnRnE: + prot |= KVM_PGTABLE_PROT_DEVICE_nGnRnE; + break; + /* MT_NORMAL/_TAGGED/_NC */ + default: + if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) + prot |= KVM_PGTABLE_PROT_X; + } /* * Under the premise of getting a FSC_PERM fault, we just need to relax -- 2.17.1