Re: [PATCH 3/3] add support for change_pte mmu notifiers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sat, Sep 12, 2009 at 09:41:10AM +0300, Izik Eidus wrote:
> Marcelo Tosatti wrote:
>> On Thu, Sep 10, 2009 at 07:38:58PM +0300, Izik Eidus wrote:
>>   
>>> this is needed for kvm if it want ksm to directly map pages into its
>>> shadow page tables.
>>>
>>> Signed-off-by: Izik Eidus <ieidus@xxxxxxxxxx>
>>> ---
>>>  arch/x86/include/asm/kvm_host.h |    1 +
>>>  arch/x86/kvm/mmu.c              |   70 ++++++++++++++++++++++++++++++++++----
>>>  virt/kvm/kvm_main.c             |   14 ++++++++
>>>  3 files changed, 77 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>> index 6046e6f..594d131 100644
>>> --- a/arch/x86/include/asm/kvm_host.h
>>> +++ b/arch/x86/include/asm/kvm_host.h
>>> @@ -797,6 +797,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
>>>  #define KVM_ARCH_WANT_MMU_NOTIFIER
>>>  int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
>>>  int kvm_age_hva(struct kvm *kvm, unsigned long hva);
>>> +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
>>>  int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
>>>  int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
>>>  int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
>>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>>> index a7151b8..3fd19f2 100644
>>> --- a/arch/x86/kvm/mmu.c
>>> +++ b/arch/x86/kvm/mmu.c
>>> @@ -282,6 +282,11 @@ static pfn_t spte_to_pfn(u64 pte)
>>>  	return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
>>>  }
>>>  +static pte_t ptep_val(pte_t *ptep)
>>> +{
>>> +	return *ptep;
>>> +}
>>> +
>>>  static gfn_t pse36_gfn_delta(u32 gpte)
>>>  {
>>>  	int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT;
>>> @@ -748,7 +753,8 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
>>>  	return write_protected;
>>>  }
>>>  -static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
>>> +static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
>>> +			   unsigned long data)
>>>  {
>>>  	u64 *spte;
>>>  	int need_tlb_flush = 0;
>>> @@ -763,8 +769,48 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
>>>  	return need_tlb_flush;
>>>  }
>>>  +static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
>>> +			     unsigned long data)
>>> +{
>>> +	int need_flush = 0;
>>> +	u64 *spte, new_spte;
>>> +	pte_t *ptep = (pte_t *)data;
>>> +	pfn_t new_pfn;
>>> +
>>> +	new_pfn = pte_pfn(ptep_val(ptep));
>>> +	spte = rmap_next(kvm, rmapp, NULL);
>>> +	while (spte) {
>>> +		BUG_ON(!is_shadow_present_pte(*spte));
>>> +		rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
>>> +		need_flush = 1;
>>> +		if (pte_write(ptep_val(ptep))) {
>>> +			rmap_remove(kvm, spte);
>>> +			__set_spte(spte, shadow_trap_nonpresent_pte);
>>> +			spte = rmap_next(kvm, rmapp, NULL);
>>> +		} else {
>>> +			new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
>>> +			new_spte |= new_pfn << PAGE_SHIFT;
>>> +
>>> +			if (!pte_write(ptep_val(ptep))) {
>>> +				new_spte &= ~PT_WRITABLE_MASK;
>>> +				new_spte &= ~SPTE_HOST_WRITEABLE;
>>> +				if (is_writeble_pte(*spte))
>>> +					kvm_set_pfn_dirty(spte_to_pfn(*spte));
>>> +			}
>>> +			__set_spte(spte, new_spte);
>>> +			spte = rmap_next(kvm, rmapp, spte);
>>> +		}
>>> +	}
>>> +	if (need_flush)
>>> +		kvm_flush_remote_tlbs(kvm);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>  static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
>>> -			  int (*handler)(struct kvm *kvm, unsigned long *rmapp))
>>> +			  unsigned long data,
>>> +			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
>>> +					 unsigned long data))
>>>  {
>>>  	int i, j;
>>>  	int retval = 0;
>>> @@ -786,13 +832,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
>>>  		if (hva >= start && hva < end) {
>>>  			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
>>>  -			retval |= handler(kvm, &memslot->rmap[gfn_offset]);
>>> +			retval |= handler(kvm, &memslot->rmap[gfn_offset],
>>> +					  data);
>>>   			for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
>>>  				int idx = gfn_offset;
>>>  				idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
>>>  				retval |= handler(kvm,
>>> -					&memslot->lpage_info[j][idx].rmap_pde);
>>> +					&memslot->lpage_info[j][idx].rmap_pde,
>>> +					data);
>>>     
>>
>> If change_pte is called to modify a largepage pte, and the shadow has
>> that largepage mapped with 4k sptes, you'll set the wrong pfn. That is,
>> the patch does not attempt to handle different page sizes properly.
>>
>> So either disable change_pte updates to non-4k host vmas (making it
>> explictly it does not handle different pagesizes), or handle largepages
>> properly, although i don't see any use for change_pte or largepage
>> mappings?
>>   
>
> change_pte doesn't get called on 4k pages...
> So adding commet to this function saying it is working just on 4k pages  
> would be enough ?

It would be better to fail/WARN on non-4k host ptes (can't it?), but if
that is not possible i think a comment would be enough.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux