Re: [PATCH v4 22/46] KVM: PPC: Book3S HV P9: Stop handling hcalls in real-mode in the P9 path

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Excerpts from Cédric Le Goater's message of March 23, 2021 11:23 pm:
> On 3/23/21 2:02 AM, Nicholas Piggin wrote:
>> In the interest of minimising the amount of code that is run in
>> "real-mode", don't handle hcalls in real mode in the P9 path.
>> 
>> POWER8 and earlier are much more expensive to exit from HV real mode
>> and switch to host mode, because on those processors HV interrupts get
>> to the hypervisor with the MMU off, and the other threads in the core
>> need to be pulled out of the guest, and SLBs all need to be saved,
>> ERATs invalidated, and host SLB reloaded before the MMU is re-enabled
>> in host mode. Hash guests also require a lot of hcalls to run. The
>> XICS interrupt controller requires hcalls to run.
>> 
>> By contrast, POWER9 has independent thread switching, and in radix mode
>> the hypervisor is already in a host virtual memory mode when the HV
>> interrupt is taken. Radix + xive guests don't need hcalls to handle
>> interrupts or manage translations.
>> 
>> So it's much less important to handle hcalls in real mode in P9.
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx>
>> ---
>>  arch/powerpc/include/asm/kvm_ppc.h      |  5 ++
>>  arch/powerpc/kvm/book3s_hv.c            | 57 ++++++++++++++++----
>>  arch/powerpc/kvm/book3s_hv_rmhandlers.S |  5 ++
>>  arch/powerpc/kvm/book3s_xive.c          | 70 +++++++++++++++++++++++++
>>  4 files changed, 127 insertions(+), 10 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
>> index 73b1ca5a6471..db6646c2ade2 100644
>> --- a/arch/powerpc/include/asm/kvm_ppc.h
>> +++ b/arch/powerpc/include/asm/kvm_ppc.h
>> @@ -607,6 +607,7 @@ extern void kvmppc_free_pimap(struct kvm *kvm);
>>  extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
>>  extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
>>  extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
>> +extern int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req);
>>  extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
>>  extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
>>  extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
>> @@ -639,6 +640,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
>>  static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
>>  static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
>>  	{ return 0; }
>> +static inline int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
>> +	{ return 0; }
>>  #endif
>>  
>>  #ifdef CONFIG_KVM_XIVE
>> @@ -673,6 +676,7 @@ extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
>>  			       int level, bool line_status);
>>  extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
>>  extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu);
>> +extern void kvmppc_xive_cede_vcpu(struct kvm_vcpu *vcpu);
>>  
>>  static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
>>  {
>> @@ -714,6 +718,7 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir
>>  				      int level, bool line_status) { return -ENODEV; }
>>  static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
>>  static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { }
>> +static inline void kvmppc_xive_cede_vcpu(struct kvm_vcpu *vcpu) { }
>>  
>>  static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
>>  	{ return 0; }
>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index fa7614c37e08..17739aaee3d8 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -1142,12 +1142,13 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
>>  }
>>  
>>  /*
>> - * Handle H_CEDE in the nested virtualization case where we haven't
>> - * called the real-mode hcall handlers in book3s_hv_rmhandlers.S.
>> + * Handle H_CEDE in the P9 path where we don't call the real-mode hcall
>> + * handlers in book3s_hv_rmhandlers.S.
>> + *
>>   * This has to be done early, not in kvmppc_pseries_do_hcall(), so
>>   * that the cede logic in kvmppc_run_single_vcpu() works properly.
>>   */
>> -static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
>> +static void kvmppc_cede(struct kvm_vcpu *vcpu)
>>  {
>>  	vcpu->arch.shregs.msr |= MSR_EE;
>>  	vcpu->arch.ceded = 1;
>> @@ -1403,9 +1404,15 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
>>  		/* hcall - punt to userspace */
>>  		int i;
>>  
>> -		/* hypercall with MSR_PR has already been handled in rmode,
>> -		 * and never reaches here.
>> -		 */
>> +		if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) {
>> +			/*
>> +			 * Guest userspace executed sc 1, reflect it back as a
>> +			 * privileged program check interrupt.
>> +			 */
>> +			kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
>> +			r = RESUME_GUEST;
>> +			break;
>> +		}
>>  
>>  		run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
>>  		for (i = 0; i < 9; ++i)
>> @@ -3663,6 +3670,12 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
>>  	return trap;
>>  }
>>  
>> +static inline bool hcall_is_xics(unsigned long req)
>> +{
>> +	return (req == H_EOI || req == H_CPPR || req == H_IPI ||
>> +		req == H_IPOLL || req == H_XIRR || req == H_XIRR_X);
>> +}
>> +
>>  /*
>>   * Virtual-mode guest entry for POWER9 and later when the host and
>>   * guest are both using the radix MMU.  The LPIDR has already been set.
>> @@ -3774,15 +3787,36 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
>>  		/* H_CEDE has to be handled now, not later */
>>  		if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
>>  		    kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
>> -			kvmppc_nested_cede(vcpu);
>> +			kvmppc_cede(vcpu);
>>  			kvmppc_set_gpr(vcpu, 3, 0);
>>  			trap = 0;
>>  		}
>>  	} else {
>>  		kvmppc_xive_push_vcpu(vcpu);
>>  		trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
>> +		if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
>> +		    !(vcpu->arch.shregs.msr & MSR_PR)) {
>> +			unsigned long req = kvmppc_get_gpr(vcpu, 3);
>> +
>> +			/* H_CEDE has to be handled now, not later */
>> +			if (req == H_CEDE) {
>> +				kvmppc_cede(vcpu);
>> +				kvmppc_xive_cede_vcpu(vcpu); /* may un-cede */> +				kvmppc_set_gpr(vcpu, 3, 0);
>> +				trap = 0;
>> +
>> +			/* XICS hcalls must be handled before xive is pulled */
>> +			} else if (hcall_is_xics(req)) {
>> +				int ret;
>> +
>> +				ret = kvmppc_xive_xics_hcall(vcpu, req);
>> +				if (ret != H_TOO_HARD) {
>> +					kvmppc_set_gpr(vcpu, 3, ret);
>> +					trap = 0;
>> +				}
>> +			}
>> +		}
>>  		kvmppc_xive_pull_vcpu(vcpu);
>> -
>>  	}
>>  
>>  	vcpu->arch.slb_max = 0;
>> @@ -4442,8 +4476,11 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
>>  		else
>>  			r = kvmppc_run_vcpu(vcpu);
>>  
>> -		if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
>> -		    !(vcpu->arch.shregs.msr & MSR_PR)) {
>> +		if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
>> +			if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) {
>> +				r = RESUME_GUEST;
>> +				continue;
>> +			}
>>  			trace_kvm_hcall_enter(vcpu);
>>  			r = kvmppc_pseries_do_hcall(vcpu);
>>  			trace_kvm_hcall_exit(vcpu, r);
>> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
>> index c11597f815e4..2d0d14ed1d92 100644
>> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
>> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
>> @@ -1397,9 +1397,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>>  	mr	r4,r9
>>  	bge	fast_guest_return
>>  2:
>> +	/* If we came in through the P9 short path, no real mode hcalls */
>> +	lwz	r0, STACK_SLOT_SHORT_PATH(r1)
>> +	cmpwi	r0, 0
>> +	bne	no_try_real
>>  	/* See if this is an hcall we can handle in real mode */
>>  	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
>>  	beq	hcall_try_real_mode
>> +no_try_real:
>>  
>>  	/* Hypervisor doorbell - exit only if host IPI flag set */
>>  	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
>> diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
>> index 741bf1f4387a..dcc07ceaf5ca 100644
>> --- a/arch/powerpc/kvm/book3s_xive.c
>> +++ b/arch/powerpc/kvm/book3s_xive.c
>> @@ -158,6 +158,40 @@ void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
>>  }
>>  EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
>>  
>> +void kvmppc_xive_cede_vcpu(struct kvm_vcpu *vcpu)
> 
> kvmppc_xive_rearm_escalation() may be ? It has more meaning to me.  

Sure whatever you prefer.

> 
>> +{
>> +	void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr;
>> +
>> +	if (!esc_vaddr)
>> +		return;
>> +
>> +	/* we are using XIVE with single escalation */
>> +
>> +	if (vcpu->arch.xive_esc_on) {
>> +		/*
>> +		 * If we still have a pending escalation, abort the cede,
>> +		 * and we must set PQ to 10 rather than 00 so that we don't
>> +		 * potentially end up with two entries for the escalation
>> +		 * interrupt in the XIVE interrupt queue.  In that case
>> +		 * we also don't want to set xive_esc_on to 1 here in
>> +		 * case we race with xive_esc_irq().
>> +		 */
>> +		vcpu->arch.ceded = 0;
>> +		/*
>> +		 * The escalation interrupts are special as we don't EOI them.
>> +		 * There is no need to use the load-after-store ordering offset
>> +		 * to set PQ to 10 as we won't use StoreEOI.
>> +		 */
>> +		__raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10);
>> +	} else {
>> +		vcpu->arch.xive_esc_on = true;> +		mb();
>> +		__raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00);
>> +	}
>> +	mb();
>> +}
>> +EXPORT_SYMBOL_GPL(kvmppc_xive_cede_vcpu);
>> +
>>  /*
>>   * This is a simple trigger for a generic XIVE IRQ. This must
>>   * only be called for interrupts that support a trigger page
>> @@ -2106,6 +2140,42 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
>>  	return 0;
>>  }
>>  
>> +int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
>> +{
>> +	struct kvmppc_vcore *vc = vcpu->arch.vcore;
>> +
>> +	/*
>> +	 * This test covers the case in which a vCPU does XICS hcalls without
>> +	 * QEMU having connected the vCPU to a XICS ICP. The ICP is the KVM
>> +	 * XICS device on P8 or XICS-on-XIVE on P9. It catches QEMU errors when
>> +	 * the interrupt mode is negotiated, we don't want the OS to do XICS
>> +	 * hcalls after having negotiated the XIVE interrupt mode.
>> +	 */
> 
> I think a comment like the following should be enough.
> 
>   The VM should have configured XICS mode before doing XICS hcalls.

I'll change it.

> No need to resend for that though.
> 
> Reviewed-by: Cédric Le Goater <clg@xxxxxxxx>

Thanks,
Nick





[Index of Archives]     [KVM Development]     [KVM ARM]     [KVM ia64]     [Linux Virtualization]     [Linux USB Devel]     [Linux Video]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Big List of Linux Books]

  Powered by Linux