Current KVM doesn't check reserved bits of guest page table, while may use reserved bits to bypass guest #PF in VMX. This patch add this check while leaving shadow pte un-constructed if guest RSVD=1. Comments? Thx, eddie diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 55fd4c5..9370ff0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -261,6 +261,8 @@ struct kvm_mmu { union kvm_mmu_page_role base_role; u64 *pae_root; + u64 rsvd_bits_mask[4]; + u64 large_page_rsvd_mask; }; struct kvm_vcpu_arch { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 31ba3cb..7f55c4a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -127,6 +127,7 @@ module_param(oos_shadow, bool, 0644); #define PFERR_PRESENT_MASK (1U << 0) #define PFERR_WRITE_MASK (1U << 1) #define PFERR_USER_MASK (1U << 2) +#define PFERR_RSVD_MASK (1U << 3) #define PFERR_FETCH_MASK (1U << 4) #define PT_DIRECTORY_LEVEL 2 @@ -179,6 +180,13 @@ static u64 __read_mostly shadow_user_mask; static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; static u64 __read_mostly shadow_mt_mask; +extern struct kvm_cpuid_entry2 *kvm_find_cpuid_entry( + struct kvm_vcpu *vcpu, u32 function, u32 index); + +static inline u64 rsvd_bits(int s, int e) +{ + return ((1ULL << (e - s + 1)) - 1) << s; +} void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) { @@ -251,6 +259,18 @@ static int is_rmap_pte(u64 pte) return is_shadow_present_pte(pte); } +static int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) +{ + u32 function=0x80000008; + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, function, 0); + if (best) { + return best->eax & 0xff; + } + return 40; +} + static pfn_t spte_to_pfn(u64 pte) { return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; @@ -2156,6 +2176,17 @@ static void paging_free(struct kvm_vcpu *vcpu) nonpaging_free(vcpu); } +static int is_rsvd_bits_set(struct kvm_vcpu *vcpu, unsigned long pte, int level) +{ + if (level == PT_DIRECTORY_LEVEL && (pte & PT_PAGE_SIZE_MASK)) { + /* large page */ + return (pte & vcpu->arch.mmu.large_page_rsvd_mask) != 0; + } + else + /* 4K page */ + return (pte & vcpu->arch.mmu.rsvd_bits_mask[level-1]) != 0; +} + #define PTTYPE 64 #include "paging_tmpl.h" #undef PTTYPE @@ -2184,6 +2215,18 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) static int paging64_init_context(struct kvm_vcpu *vcpu) { + struct kvm_mmu *context = &vcpu->arch.mmu; + int maxphyaddr = cpuid_maxphyaddr(vcpu); + + context->rsvd_bits_mask[3] = + rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); + context->rsvd_bits_mask[2] = + rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); + context->rsvd_bits_mask[1] = + rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); + context->rsvd_bits_mask[0] = rsvd_bits(maxphyaddr, 51); + context->large_page_rsvd_mask = /* 2MB PDE */ + rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20); return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); } @@ -2191,6 +2234,15 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) { struct kvm_mmu *context = &vcpu->arch.mmu; + /* no rsvd bits for 2 level 4K page table entries */ + context->rsvd_bits_mask[0] = 0; + context->rsvd_bits_mask[1] = 0; + if (is_cpuid_PSE36()) + /* 36bits PSE 4MB page */ + context->large_page_rsvd_mask = rsvd_bits(17, 21); + else + /* 32 bits PSE 4MB page */ + context->large_page_rsvd_mask = rsvd_bits(13, 21); context->new_cr3 = paging_new_cr3; context->page_fault = paging32_page_fault; context->gva_to_gpa = paging32_gva_to_gpa; @@ -2206,6 +2258,18 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) static int paging32E_init_context(struct kvm_vcpu *vcpu) { + struct kvm_mmu *context = &vcpu->arch.mmu; + int maxphyaddr = cpuid_maxphyaddr(vcpu); + + /* 3 levels */ + context->rsvd_bits_mask[2] = rsvd_bits(maxphyaddr, 63) | + rsvd_bits(7, 8) | rsvd_bits(1,2); /* PDPTE */ + context->rsvd_bits_mask[1] = rsvd_bits(maxphyaddr, 63); /* PDE */ + context->rsvd_bits_mask[0] = /* PTE */ + rsvd_bits(maxphyaddr, 63) | rsvd_bits(7, 8) | rsvd_bits(1, 2); + context->large_page_rsvd_mask = /* 2M page */ + rsvd_bits(maxphyaddr, 63) | rsvd_bits(13, 20); + return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 7314c09..844efe9 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, gfn_t table_gfn; unsigned index, pt_access, pte_access; gpa_t pte_gpa; + int rsvd_fault; pgprintk("%s: addr %lx\n", __func__, addr); walk: @@ -153,10 +154,13 @@ walk: walker->level - 1, table_gfn); kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); + rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level); if (!is_present_pte(pte)) goto not_present; + if (rsvd_fault) + goto access_error; if (write_fault && !is_writeble_pte(pte)) if (user_fault || is_write_protection(vcpu)) goto access_error; @@ -233,6 +237,8 @@ err: walker->error_code |= PFERR_USER_MASK; if (fetch_fault) walker->error_code |= PFERR_FETCH_MASK; + if (rsvd_fault) + walker->error_code |= PFERR_RSVD_MASK; return 0; } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html