Re: [PATCH v4 14/25] KVM: arm64/sve: Context switch the SVE registers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 17/01/2019 20:33, Dave Martin wrote:
> In order to give each vcpu its own view of the SVE registers, this
> patch adds context storage via a new sve_state pointer in struct
> vcpu_arch.  An additional member sve_max_vl is also added for each
> vcpu, to determine the maximum vector length visible to the guest
> and thus the value to be configured in ZCR_EL2.LEN while the is
> active.  This also determines the layout and size of the storage in
> sve_state, which is read and written by the same backend functions
> that are used for context-switching the SVE state for host tasks.
> 
> On SVE-enabled vcpus, SVE access traps are now handled by switching
> in the vcpu's SVE context and disabling the trap before returning
> to the guest.  On other vcpus, the trap is not handled and an exit
> back to the host occurs, where the handle_sve() fallback path
> reflects an undefined instruction exception back to the guest,
> consistently with the behaviour of non-SVE-capable hardware (as was
> done unconditionally prior to this patch).
> 
> No SVE handling is added on non-VHE-only paths, since VHE is an
> architectural and Kconfig prerequisite of SVE.
> 
> Signed-off-by: Dave Martin <Dave.Martin@xxxxxxx>
> 
> ---
> 
> Changes since RFC v3:
> 
>  * Ensure guest_has_sve is initialised before use.  Previously the
>    order of the code allowed this to be used before initilaisation
>    on some paths.  Mysteriously the compiler failed to warn.
> ---
>  arch/arm64/include/asm/kvm_host.h |  6 ++++
>  arch/arm64/kvm/fpsimd.c           |  5 +--
>  arch/arm64/kvm/hyp/switch.c       | 70 ++++++++++++++++++++++++++++++---------
>  3 files changed, 64 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index c32f195..77b6f3e 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -212,6 +212,8 @@ typedef struct kvm_cpu_context kvm_cpu_context_t;
>  
>  struct kvm_vcpu_arch {
>  	struct kvm_cpu_context ctxt;
> +	void *sve_state;
> +	unsigned int sve_max_vl;
>  
>  	/* HYP configuration */
>  	u64 hcr_el2;
> @@ -304,6 +306,10 @@ struct kvm_vcpu_arch {
>  	bool sysregs_loaded_on_cpu;
>  };
>  
> +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
> +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
> +				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
> +
>  /* vcpu_arch flags field values: */
>  #define KVM_ARM64_DEBUG_DIRTY		(1 << 0)
>  #define KVM_ARM64_FP_ENABLED		(1 << 1) /* guest FP regs loaded */
> diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
> index 5ff2d90..9cc57d4 100644
> --- a/arch/arm64/kvm/fpsimd.c
> +++ b/arch/arm64/kvm/fpsimd.c
> @@ -87,10 +87,11 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
>  
>  	if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
>  		fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs,
> -					 NULL, SVE_VL_MIN);
> +					 vcpu->arch.sve_state,
> +					 vcpu->arch.sve_max_vl);
>  
>  		clear_thread_flag(TIF_FOREIGN_FPSTATE);
> -		clear_thread_flag(TIF_SVE);
> +		update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));
>  	}
>  }
>  
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 9f07403..f7a8e3b 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -98,7 +98,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu)
>  	val = read_sysreg(cpacr_el1);
>  	val |= CPACR_EL1_TTA;
>  	val &= ~CPACR_EL1_ZEN;
> -	if (!update_fp_enabled(vcpu)) {
> +	if (update_fp_enabled(vcpu)) {
> +		if (vcpu_has_sve(vcpu))
> +			val |= CPACR_EL1_ZEN;
> +	} else {
>  		val &= ~CPACR_EL1_FPEN;
>  		__activate_traps_fpsimd32(vcpu);
>  	}
> @@ -313,26 +316,59 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
>  	return true;
>  }
>  
> -static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
> +/*
> + * if () with a gating check for SVE support to minimise branch
> + * mispredictions in non-SVE systems.
> + * (system_supports_sve() is resolved at build time or via a static key.)
> + */
> +#define if_sve(cond) if (system_supports_sve() && (cond))

I really hate this kind of construct, as while it provides an apparent
benefit, it actually leads to code that is harder to read. Do we have
any data showing that this adversely impacts any known workload on
non-SVE systems, given how rarely this exception is triggered?

At any rate, I'd rather see system_supports_sve() as part of the condition.

> +
> +/* Check for an FPSIMD/SVE trap and handle as appropriate */
> +static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
>  {
> -	struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state;
> +	u8 trap_class;
> +	bool guest_has_sve;
>  
> -	if (has_vhe())
> -		write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
> -			     cpacr_el1);
> -	else
> +	if (!system_supports_fpsimd())
> +		return false;
> +
> +	guest_has_sve = vcpu_has_sve(vcpu);
> +	trap_class = kvm_vcpu_trap_get_class(vcpu);
> +
> +	if (trap_class == ESR_ELx_EC_FP_ASIMD)
> +		goto handle;
> +
> +	if_sve (guest_has_sve && trap_class == ESR_ELx_EC_SVE)
> +		goto handle;
> +
> +	return false;

In the end, this is the stuff we want to handle quickly: nothing to do
at all. Given that vcpu_has_sve() is already gated by
system_supports_sve(), I'm pretty sure that there is no difference in
code generation between your if_sve() construct and a simple if().

> +
> +handle:
> +	/* The trap is an FPSIMD/SVE trap: switch the context */
> +
> +	if (has_vhe()) {
> +		u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN;
> +
> +		if_sve (guest_has_sve)
> +			reg |= CPACR_EL1_ZEN;
> +
> +		write_sysreg(reg, cpacr_el1);
> +	} else {
>  		write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
>  			     cptr_el2);
> +	}
>  
>  	isb();
>  
>  	if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
> +		struct user_fpsimd_state *host_fpsimd =
> +			vcpu->arch.host_fpsimd_state;
> +

Single line, please. Or split declaration and assignment.

>  		/*
>  		 * In the SVE case, VHE is assumed: it is enforced by
>  		 * Kconfig and kvm_arch_init().
>  		 */
> -		if (system_supports_sve() &&
> -		    (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE)) {
> +		if_sve (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE) {
>  			struct thread_struct *thread = container_of(
>  				host_fpsimd,
>  				struct thread_struct, uw.fpsimd_state);
> @@ -345,10 +381,14 @@ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
>  		vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
>  	}
>  
> -	__fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
> -
> -	if (vcpu_has_sve(vcpu))
> +	if_sve (guest_has_sve) {
> +		sve_load_state(vcpu_sve_pffr(vcpu),
> +			       &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr,
> +			       sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
>  		write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12);
> +	} else {
> +		__fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
> +	}
>  
>  	/* Skip restoring fpexc32 for AArch64 guests */
>  	if (!(read_sysreg(hcr_el2) & HCR_RW))
> @@ -384,10 +424,10 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
>  	 * and restore the guest context lazily.
>  	 * If FP/SIMD is not implemented, handle the trap and inject an
>  	 * undefined instruction exception to the guest.
> +	 * Similarly for trapped SVE accesses.
>  	 */
> -	if (system_supports_fpsimd() &&
> -	    kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD)
> -		return __hyp_switch_fpsimd(vcpu);
> +	if (__hyp_handle_fpsimd(vcpu))
> +		return true;
>  
>  	if (!__populate_fault_info(vcpu))
>  		return true;
> 

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...
_______________________________________________
kvmarm mailing list
kvmarm@xxxxxxxxxxxxxxxxxxxxx
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm



[Index of Archives]     [Linux KVM]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux