From: Haiwei Li <haiwei.li@xxxxxxxxx> During guest address translation, it needs to check if there is an exception happens, triggered by invalid address or permission. For the callers who invoke read_gva/write_gva, should check the `exception` and handle it, such as inject a #PF to guest. Signed-off-by: Haiwei Li <haiwei.li@xxxxxxxxx> --- arch/x86/kvm/vmx/pkvm/hyp/memory.c | 97 ++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx/pkvm/hyp/memory.c b/arch/x86/kvm/vmx/pkvm/hyp/memory.c index a42669ccf89c..6a400aef1bd8 100644 --- a/arch/x86/kvm/vmx/pkvm/hyp/memory.c +++ b/arch/x86/kvm/vmx/pkvm/hyp/memory.c @@ -78,11 +78,97 @@ static struct pkvm_mm_ops mm_ops = { .phys_to_virt = host_gpa2hva, }; -static int check_translation(struct kvm_vcpu *vcpu, gpa_t gpa, +static int check_translation(struct kvm_vcpu *vcpu, gva_t gva, gpa_t gpa, u64 prot, u32 access, struct x86_exception *exception) { - /* TODO: exception for #PF */ + u16 errcode = 0; + bool page_rw_flags_on = true; + bool user_mode_addr = true; + const int user_mode_access = access & PFERR_USER_MASK; + const int write_access = access & PFERR_WRITE_MASK; + bool cr4_smap = vmcs_readl(GUEST_CR4) & X86_CR4_SMAP; + bool cr0_wp = vmcs_readl(GUEST_CR0) & X86_CR0_WP; + + /* + * As pkvm hypervisor will not do instruction emulation, here we do not + * expect guest memory access for instruction fetch. + */ + WARN_ON(access & PFERR_FETCH_MASK); + + /* pte is not present */ + if (gpa == INVALID_ADDR) { + goto check_fault; + } else { + errcode |= PFERR_PRESENT_MASK; + + /*TODO: check reserved bits and PK */ + + /* check for R/W */ + if ((prot & _PAGE_RW) == 0) { + if (write_access && (user_mode_access || cr0_wp)) + /* + * case 1: Supermode and wp is 1 + * case 2: Usermode + */ + goto check_fault; + page_rw_flags_on = false; + } + + /* check for U/S */ + if ((prot & _PAGE_USER) == 0) { + user_mode_addr = false; + if (user_mode_access) + goto check_fault; + } + + /* + * When SMAP is on, we only need to apply check when address is + * user-mode address. + * + * Also SMAP only impacts the supervisor-mode access. + */ + /* if SMAP is enabled and supervisor-mode access */ + if (cr4_smap && (!user_mode_access) && user_mode_addr) { + bool acflag = vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_AC; + + /* read from user mode address, eflags.ac = 0 */ + if ((!write_access) && (!acflag)) { + goto check_fault; + } else if (write_access) { + /* write to user mode address */ + + /* cr0.wp = 0, eflags.ac = 0 */ + if ((!cr0_wp) && (!acflag)) + goto check_fault; + + /* + * cr0.wp = 1, eflags.ac = 1, r/w flag is 0 + * on any paging structure entry + */ + if (cr0_wp && acflag && (!page_rw_flags_on)) + goto check_fault; + + /* cr0.wp = 1, eflags.ac = 0 */ + if (cr0_wp && (!acflag)) + goto check_fault; + } else { + /* do nothing */ + } + } + } + return 0; + +check_fault: + errcode |= write_access | user_mode_access; + exception->error_code = errcode; + exception->vector = PF_VECTOR; + exception->error_code_valid = true; + exception->address = gva; + exception->nested_page_fault = false; + exception->async_page_fault = false; + return -EFAULT; + } int gva2gpa(struct kvm_vcpu *vcpu, gva_t gva, gpa_t *gpa, @@ -104,10 +190,8 @@ int gva2gpa(struct kvm_vcpu *vcpu, gva_t gva, gpa_t *gpa, pkvm_pgtable_lookup(&guest_mmu, (unsigned long)gva, (unsigned long *)&_gpa, &prot, &pg_level); *gpa = _gpa; - if (_gpa == INVALID_ADDR) - return -EFAULT; - return check_translation(vcpu, _gpa, prot, access, exception); + return check_translation(vcpu, gva, _gpa, prot, access, exception); } static inline int __copy_gpa(struct kvm_vcpu *vcpu, void *addr, gpa_t gpa, @@ -138,6 +222,9 @@ static int copy_gva(struct kvm_vcpu *vcpu, gva_t gva, void *addr, unsigned int len; int ret = 0; + if (!from_guest) + access |= PFERR_WRITE_MASK; + while ((bytes > 0) && (ret == 0)) { ret = gva2gpa(vcpu, gva, &gpa, access, exception); if (ret >= 0) { -- 2.25.1