On Sat, Sep 12, 2009 at 09:41:10AM +0300, Izik Eidus wrote: > Marcelo Tosatti wrote: >> On Thu, Sep 10, 2009 at 07:38:58PM +0300, Izik Eidus wrote: >> >>> this is needed for kvm if it want ksm to directly map pages into its >>> shadow page tables. >>> >>> Signed-off-by: Izik Eidus <ieidus@xxxxxxxxxx> >>> --- >>> arch/x86/include/asm/kvm_host.h | 1 + >>> arch/x86/kvm/mmu.c | 70 ++++++++++++++++++++++++++++++++++---- >>> virt/kvm/kvm_main.c | 14 ++++++++ >>> 3 files changed, 77 insertions(+), 8 deletions(-) >>> >>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >>> index 6046e6f..594d131 100644 >>> --- a/arch/x86/include/asm/kvm_host.h >>> +++ b/arch/x86/include/asm/kvm_host.h >>> @@ -797,6 +797,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void); >>> #define KVM_ARCH_WANT_MMU_NOTIFIER >>> int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); >>> int kvm_age_hva(struct kvm *kvm, unsigned long hva); >>> +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); >>> int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); >>> int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); >>> int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); >>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c >>> index a7151b8..3fd19f2 100644 >>> --- a/arch/x86/kvm/mmu.c >>> +++ b/arch/x86/kvm/mmu.c >>> @@ -282,6 +282,11 @@ static pfn_t spte_to_pfn(u64 pte) >>> return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; >>> } >>> +static pte_t ptep_val(pte_t *ptep) >>> +{ >>> + return *ptep; >>> +} >>> + >>> static gfn_t pse36_gfn_delta(u32 gpte) >>> { >>> int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT; >>> @@ -748,7 +753,8 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) >>> return write_protected; >>> } >>> -static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) >>> +static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, >>> + unsigned long data) >>> { >>> u64 *spte; >>> int need_tlb_flush = 0; >>> @@ -763,8 +769,48 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) >>> return need_tlb_flush; >>> } >>> +static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, >>> + unsigned long data) >>> +{ >>> + int need_flush = 0; >>> + u64 *spte, new_spte; >>> + pte_t *ptep = (pte_t *)data; >>> + pfn_t new_pfn; >>> + >>> + new_pfn = pte_pfn(ptep_val(ptep)); >>> + spte = rmap_next(kvm, rmapp, NULL); >>> + while (spte) { >>> + BUG_ON(!is_shadow_present_pte(*spte)); >>> + rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); >>> + need_flush = 1; >>> + if (pte_write(ptep_val(ptep))) { >>> + rmap_remove(kvm, spte); >>> + __set_spte(spte, shadow_trap_nonpresent_pte); >>> + spte = rmap_next(kvm, rmapp, NULL); >>> + } else { >>> + new_spte = *spte &~ (PT64_BASE_ADDR_MASK); >>> + new_spte |= new_pfn << PAGE_SHIFT; >>> + >>> + if (!pte_write(ptep_val(ptep))) { >>> + new_spte &= ~PT_WRITABLE_MASK; >>> + new_spte &= ~SPTE_HOST_WRITEABLE; >>> + if (is_writeble_pte(*spte)) >>> + kvm_set_pfn_dirty(spte_to_pfn(*spte)); >>> + } >>> + __set_spte(spte, new_spte); >>> + spte = rmap_next(kvm, rmapp, spte); >>> + } >>> + } >>> + if (need_flush) >>> + kvm_flush_remote_tlbs(kvm); >>> + >>> + return 0; >>> +} >>> + >>> static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, >>> - int (*handler)(struct kvm *kvm, unsigned long *rmapp)) >>> + unsigned long data, >>> + int (*handler)(struct kvm *kvm, unsigned long *rmapp, >>> + unsigned long data)) >>> { >>> int i, j; >>> int retval = 0; >>> @@ -786,13 +832,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, >>> if (hva >= start && hva < end) { >>> gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; >>> - retval |= handler(kvm, &memslot->rmap[gfn_offset]); >>> + retval |= handler(kvm, &memslot->rmap[gfn_offset], >>> + data); >>> for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { >>> int idx = gfn_offset; >>> idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); >>> retval |= handler(kvm, >>> - &memslot->lpage_info[j][idx].rmap_pde); >>> + &memslot->lpage_info[j][idx].rmap_pde, >>> + data); >>> >> >> If change_pte is called to modify a largepage pte, and the shadow has >> that largepage mapped with 4k sptes, you'll set the wrong pfn. That is, >> the patch does not attempt to handle different page sizes properly. >> >> So either disable change_pte updates to non-4k host vmas (making it >> explictly it does not handle different pagesizes), or handle largepages >> properly, although i don't see any use for change_pte or largepage >> mappings? >> > > change_pte doesn't get called on 4k pages... > So adding commet to this function saying it is working just on 4k pages > would be enough ? It would be better to fail/WARN on non-4k host ptes (can't it?), but if that is not possible i think a comment would be enough. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html