Re: [RFC PATCH 17/32] KVM: PPC: Book3S HV: Refactor radix page fault handler

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Sep 21, 2018 at 08:01:48PM +1000, Paul Mackerras wrote:
> From: Suraj Jitindar Singh <sjitindarsingh@xxxxxxxxx>
> 
> The radix page fault handler accounts for all cases, including just
> needing to insert a pte.  This breaks it up into separate functions for
> the two main cases; setting rc and inserting a pte.
> 
> This allows us to make the setting of rc and inserting of a pte
> generic for any pgtable, not specific to the one for this guest.
> 
> [paulus@xxxxxxxxxx - reduced diffs from previous code]
> 
> Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@xxxxxxxxx>
> Signed-off-by: Paul Mackerras <paulus@xxxxxxxxxx>

Reviewed-by: David Gibson <david@xxxxxxxxxxxxxxxxxxxxx>

> ---
>  arch/powerpc/kvm/book3s_64_mmu_radix.c | 210 +++++++++++++++++++--------------
>  1 file changed, 123 insertions(+), 87 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> index f2976f4..47f2b18 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> @@ -400,8 +400,9 @@ static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
>   */
>  #define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED))
>  
> -static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
> -			     unsigned int level, unsigned long mmu_seq)
> +static int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
> +			     unsigned long gpa, unsigned int level,
> +			     unsigned long mmu_seq)
>  {
>  	pgd_t *pgd;
>  	pud_t *pud, *new_pud = NULL;
> @@ -410,7 +411,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
>  	int ret;
>  
>  	/* Traverse the guest's 2nd-level tree, allocate new levels needed */
> -	pgd = kvm->arch.pgtable + pgd_index(gpa);
> +	pgd = pgtable + pgd_index(gpa);
>  	pud = NULL;
>  	if (pgd_present(*pgd))
>  		pud = pud_offset(pgd, gpa);
> @@ -565,95 +566,49 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
>  	return ret;
>  }
>  
> -int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
> -				   unsigned long ea, unsigned long dsisr)
> +static bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable,
> +				    bool writing, unsigned long gpa)
> +{
> +	unsigned long pgflags;
> +	unsigned int shift;
> +	pte_t *ptep;
> +
> +	/*
> +	 * Need to set an R or C bit in the 2nd-level tables;
> +	 * since we are just helping out the hardware here,
> +	 * it is sufficient to do what the hardware does.
> +	 */
> +	pgflags = _PAGE_ACCESSED;
> +	if (writing)
> +		pgflags |= _PAGE_DIRTY;
> +	/*
> +	 * We are walking the secondary (partition-scoped) page table here.
> +	 * We can do this without disabling irq because the Linux MM
> +	 * subsystem doesn't do THP splits and collapses on this tree.
> +	 */
> +	ptep = __find_linux_pte(pgtable, gpa, NULL, &shift);
> +	if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) {
> +		kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift);
> +		return true;
> +	}
> +	return false;
> +}
> +
> +static int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
> +				unsigned long gpa,
> +				struct kvm_memory_slot *memslot,
> +				bool writing, bool kvm_ro,
> +				pte_t *inserted_pte, unsigned int *levelp)
>  {
>  	struct kvm *kvm = vcpu->kvm;
> -	unsigned long mmu_seq;
> -	unsigned long gpa, gfn, hva;
> -	struct kvm_memory_slot *memslot;
>  	struct page *page = NULL;
> -	long ret;
> -	bool writing;
> +	unsigned long mmu_seq;
> +	unsigned long hva, gfn = gpa >> PAGE_SHIFT;
>  	bool upgrade_write = false;
>  	bool *upgrade_p = &upgrade_write;
>  	pte_t pte, *ptep;
> -	unsigned long pgflags;
>  	unsigned int shift, level;
> -
> -	/* Check for unusual errors */
> -	if (dsisr & DSISR_UNSUPP_MMU) {
> -		pr_err("KVM: Got unsupported MMU fault\n");
> -		return -EFAULT;
> -	}
> -	if (dsisr & DSISR_BADACCESS) {
> -		/* Reflect to the guest as DSI */
> -		pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr);
> -		kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
> -		return RESUME_GUEST;
> -	}
> -
> -	/* Translate the logical address and get the page */
> -	gpa = vcpu->arch.fault_gpa & ~0xfffUL;
> -	gpa &= ~0xF000000000000000ul;
> -	gfn = gpa >> PAGE_SHIFT;
> -	if (!(dsisr & DSISR_PRTABLE_FAULT))
> -		gpa |= ea & 0xfff;
> -	memslot = gfn_to_memslot(kvm, gfn);
> -
> -	/* No memslot means it's an emulated MMIO region */
> -	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
> -		if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS |
> -			     DSISR_SET_RC)) {
> -			/*
> -			 * Bad address in guest page table tree, or other
> -			 * unusual error - reflect it to the guest as DSI.
> -			 */
> -			kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
> -			return RESUME_GUEST;
> -		}
> -		return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
> -					      dsisr & DSISR_ISSTORE);
> -	}
> -
> -	writing = (dsisr & DSISR_ISSTORE) != 0;
> -	if (memslot->flags & KVM_MEM_READONLY) {
> -		if (writing) {
> -			/* give the guest a DSI */
> -			dsisr = DSISR_ISSTORE | DSISR_PROTFAULT;
> -			kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
> -			return RESUME_GUEST;
> -		}
> -		upgrade_p = NULL;
> -	}
> -
> -	if (dsisr & DSISR_SET_RC) {
> -		/*
> -		 * Need to set an R or C bit in the 2nd-level tables;
> -		 * since we are just helping out the hardware here,
> -		 * it is sufficient to do what the hardware does.
> -		 */
> -		pgflags = _PAGE_ACCESSED;
> -		if (writing)
> -			pgflags |= _PAGE_DIRTY;
> -		/*
> -		 * We are walking the secondary page table here. We can do this
> -		 * without disabling irq.
> -		 */
> -		spin_lock(&kvm->mmu_lock);
> -		ptep = __find_linux_pte(kvm->arch.pgtable,
> -					gpa, NULL, &shift);
> -		if (ptep && pte_present(*ptep) &&
> -		    (!writing || pte_write(*ptep))) {
> -			kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
> -						gpa, shift);
> -			dsisr &= ~DSISR_SET_RC;
> -		}
> -		spin_unlock(&kvm->mmu_lock);
> -		if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
> -			       DSISR_PROTFAULT | DSISR_SET_RC)))
> -			return RESUME_GUEST;
> -	}
> +	int ret;
>  
>  	/* used to check for invalidations in progress */
>  	mmu_seq = kvm->mmu_notifier_seq;
> @@ -666,7 +621,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
>  	 * is that the page is writable.
>  	 */
>  	hva = gfn_to_hva_memslot(memslot, gfn);
> -	if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
> +	if (!kvm_ro && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
>  		upgrade_write = true;
>  	} else {
>  		unsigned long pfn;
> @@ -724,7 +679,12 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
>  	}
>  
>  	/* Allocate space in the tree and write the PTE */
> -	ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
> +	ret = kvmppc_create_pte(kvm, kvm->arch.pgtable, pte, gpa, level,
> +				mmu_seq);
> +	if (inserted_pte)
> +		*inserted_pte = pte;
> +	if (levelp)
> +		*levelp = level;
>  
>  	if (page) {
>  		if (!ret && (pte_val(pte) & _PAGE_WRITE))
> @@ -732,6 +692,82 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
>  		put_page(page);
>  	}
>  
> +	return ret;
> +}
> +
> +int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
> +				   unsigned long ea, unsigned long dsisr)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	unsigned long gpa, gfn;
> +	struct kvm_memory_slot *memslot;
> +	long ret;
> +	bool writing = !!(dsisr & DSISR_ISSTORE);
> +	bool kvm_ro = false;
> +
> +	/* Check for unusual errors */
> +	if (dsisr & DSISR_UNSUPP_MMU) {
> +		pr_err("KVM: Got unsupported MMU fault\n");
> +		return -EFAULT;
> +	}
> +	if (dsisr & DSISR_BADACCESS) {
> +		/* Reflect to the guest as DSI */
> +		pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr);
> +		kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
> +		return RESUME_GUEST;
> +	}
> +
> +	/* Translate the logical address */
> +	gpa = vcpu->arch.fault_gpa & ~0xfffUL;
> +	gpa &= ~0xF000000000000000ul;
> +	gfn = gpa >> PAGE_SHIFT;
> +	if (!(dsisr & DSISR_PRTABLE_FAULT))
> +		gpa |= ea & 0xfff;
> +
> +	/* Get the corresponding memslot */
> +	memslot = gfn_to_memslot(kvm, gfn);
> +
> +	/* No memslot means it's an emulated MMIO region */
> +	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
> +		if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS |
> +			     DSISR_SET_RC)) {
> +			/*
> +			 * Bad address in guest page table tree, or other
> +			 * unusual error - reflect it to the guest as DSI.
> +			 */
> +			kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
> +			return RESUME_GUEST;
> +		}
> +		return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing);
> +	}
> +
> +	if (memslot->flags & KVM_MEM_READONLY) {
> +		if (writing) {
> +			/* give the guest a DSI */
> +			kvmppc_core_queue_data_storage(vcpu, ea, DSISR_ISSTORE |
> +						       DSISR_PROTFAULT);
> +			return RESUME_GUEST;
> +		}
> +		kvm_ro = true;
> +	}
> +
> +	/* Failed to set the reference/change bits */
> +	if (dsisr & DSISR_SET_RC) {
> +		spin_lock(&kvm->mmu_lock);
> +		if (kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable,
> +					    writing, gpa))
> +			dsisr &= ~DSISR_SET_RC;
> +		spin_unlock(&kvm->mmu_lock);
> +
> +		if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
> +			       DSISR_PROTFAULT | DSISR_SET_RC)))
> +			return RESUME_GUEST;
> +	}
> +
> +	/* Try to insert a pte */
> +	ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot, writing,
> +					     kvm_ro, NULL, NULL);
> +
>  	if (ret == 0 || ret == -EAGAIN)
>  		ret = RESUME_GUEST;
>  	return ret;

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux