Re: [PATCH 15/59] KVM: arm64: nv: Refactor vcpu_{read,write}_sys_reg

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 27/06/2019 10:21, Alexandru Elisei wrote:
> On 6/21/19 10:37 AM, Marc Zyngier wrote:
>> Extract the direct HW accessors for later reuse.
>>
>> Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx>
>> ---
>>  arch/arm64/kvm/sys_regs.c | 247 +++++++++++++++++++++-----------------
>>  1 file changed, 139 insertions(+), 108 deletions(-)
>>
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 2b8734f75a09..e181359adadf 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -182,99 +182,161 @@ const struct el2_sysreg_map *find_el2_sysreg(const struct el2_sysreg_map *map,
>>  	return entry;
>>  }
>>  
>> +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
>> +{
>> +	/*
>> +	 * System registers listed in the switch are not saved on every
>> +	 * exit from the guest but are only saved on vcpu_put.
>> +	 *
>> +	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
>> +	 * should never be listed below, because the guest cannot modify its
>> +	 * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
>> +	 * thread when emulating cross-VCPU communication.
>> +	 */
>> +	switch (reg) {
>> +	case CSSELR_EL1:	*val = read_sysreg_s(SYS_CSSELR_EL1);	break;
>> +	case SCTLR_EL1:		*val = read_sysreg_s(SYS_SCTLR_EL12);	break;
>> +	case ACTLR_EL1:		*val = read_sysreg_s(SYS_ACTLR_EL1);	break;
>> +	case CPACR_EL1:		*val = read_sysreg_s(SYS_CPACR_EL12);	break;
>> +	case TTBR0_EL1:		*val = read_sysreg_s(SYS_TTBR0_EL12);	break;
>> +	case TTBR1_EL1:		*val = read_sysreg_s(SYS_TTBR1_EL12);	break;
>> +	case TCR_EL1:		*val = read_sysreg_s(SYS_TCR_EL12);	break;
>> +	case ESR_EL1:		*val = read_sysreg_s(SYS_ESR_EL12);	break;
>> +	case AFSR0_EL1:		*val = read_sysreg_s(SYS_AFSR0_EL12);	break;
>> +	case AFSR1_EL1:		*val = read_sysreg_s(SYS_AFSR1_EL12);	break;
>> +	case FAR_EL1:		*val = read_sysreg_s(SYS_FAR_EL12);	break;
>> +	case MAIR_EL1:		*val = read_sysreg_s(SYS_MAIR_EL12);	break;
>> +	case VBAR_EL1:		*val = read_sysreg_s(SYS_VBAR_EL12);	break;
>> +	case CONTEXTIDR_EL1:	*val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
>> +	case TPIDR_EL0:		*val = read_sysreg_s(SYS_TPIDR_EL0);	break;
>> +	case TPIDRRO_EL0:	*val = read_sysreg_s(SYS_TPIDRRO_EL0);	break;
>> +	case TPIDR_EL1:		*val = read_sysreg_s(SYS_TPIDR_EL1);	break;
>> +	case AMAIR_EL1:		*val = read_sysreg_s(SYS_AMAIR_EL12);	break;
>> +	case CNTKCTL_EL1:	*val = read_sysreg_s(SYS_CNTKCTL_EL12);	break;
>> +	case PAR_EL1:		*val = read_sysreg_s(SYS_PAR_EL1);	break;
>> +	case DACR32_EL2:	*val = read_sysreg_s(SYS_DACR32_EL2);	break;
>> +	case IFSR32_EL2:	*val = read_sysreg_s(SYS_IFSR32_EL2);	break;
>> +	case DBGVCR32_EL2:	*val = read_sysreg_s(SYS_DBGVCR32_EL2);	break;
>> +	default:		return false;
>> +	}
>> +
>> +	return true;
>> +}
>> +
>> +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
>> +{
>> +	/*
>> +	 * System registers listed in the switch are not restored on every
>> +	 * entry to the guest but are only restored on vcpu_load.
>> +	 *
>> +	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
>> +	 * should never be listed below, because the the MPIDR should only be
>> +	 * set once, before running the VCPU, and never changed later.
>> +	 */
>> +	switch (reg) {
>> +	case CSSELR_EL1:	write_sysreg_s(val, SYS_CSSELR_EL1);	break;
>> +	case SCTLR_EL1:		write_sysreg_s(val, SYS_SCTLR_EL12);	break;
>> +	case ACTLR_EL1:		write_sysreg_s(val, SYS_ACTLR_EL1);	break;
>> +	case CPACR_EL1:		write_sysreg_s(val, SYS_CPACR_EL12);	break;
>> +	case TTBR0_EL1:		write_sysreg_s(val, SYS_TTBR0_EL12);	break;
>> +	case TTBR1_EL1:		write_sysreg_s(val, SYS_TTBR1_EL12);	break;
>> +	case TCR_EL1:		write_sysreg_s(val, SYS_TCR_EL12);	break;
>> +	case ESR_EL1:		write_sysreg_s(val, SYS_ESR_EL12);	break;
>> +	case AFSR0_EL1:		write_sysreg_s(val, SYS_AFSR0_EL12);	break;
>> +	case AFSR1_EL1:		write_sysreg_s(val, SYS_AFSR1_EL12);	break;
>> +	case FAR_EL1:		write_sysreg_s(val, SYS_FAR_EL12);	break;
>> +	case MAIR_EL1:		write_sysreg_s(val, SYS_MAIR_EL12);	break;
>> +	case VBAR_EL1:		write_sysreg_s(val, SYS_VBAR_EL12);	break;
>> +	case CONTEXTIDR_EL1:	write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
>> +	case TPIDR_EL0:		write_sysreg_s(val, SYS_TPIDR_EL0);	break;
>> +	case TPIDRRO_EL0:	write_sysreg_s(val, SYS_TPIDRRO_EL0);	break;
>> +	case TPIDR_EL1:		write_sysreg_s(val, SYS_TPIDR_EL1);	break;
>> +	case AMAIR_EL1:		write_sysreg_s(val, SYS_AMAIR_EL12);	break;
>> +	case CNTKCTL_EL1:	write_sysreg_s(val, SYS_CNTKCTL_EL12);	break;
>> +	case PAR_EL1:		write_sysreg_s(val, SYS_PAR_EL1);	break;
>> +	case DACR32_EL2:	write_sysreg_s(val, SYS_DACR32_EL2);	break;
>> +	case IFSR32_EL2:	write_sysreg_s(val, SYS_IFSR32_EL2);	break;
>> +	case DBGVCR32_EL2:	write_sysreg_s(val, SYS_DBGVCR32_EL2);	break;
>> +	default:		return false;
>> +	}
>> +
>> +	return true;
>> +}
>> +
>>  u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
>>  {
>> -	u64 val;
>> +	u64 val = 0x8badf00d8badf00d;
>>  
>>  	if (!vcpu->arch.sysregs_loaded_on_cpu)
>> -		goto immediate_read;
>> +		goto memory_read;
>>  
>>  	if (unlikely(sysreg_is_el2(reg))) {
>>  		const struct el2_sysreg_map *el2_reg;
>>  
>>  		if (!is_hyp_ctxt(vcpu))
>> -			goto immediate_read;
>> +			goto memory_read;
>>  
>>  		switch (reg) {
>> +		case ELR_EL2:
>> +			return read_sysreg_el1(SYS_ELR);
>>  		case SPSR_EL2:
>>  			val = read_sysreg_el1(SYS_SPSR);
>>  			return __fixup_spsr_el2_read(&vcpu->arch.ctxt, val);
>>  		}
>>  
>>  		el2_reg = find_el2_sysreg(nested_sysreg_map, reg);
>> -		if (el2_reg) {
>> -			/*
>> -			 * If this register does not have an EL1 counterpart,
>> -			 * then read the stored EL2 version.
>> -			 */
>> -			if (el2_reg->mapping == __INVALID_SYSREG__)
>> -				goto immediate_read;
>> -
>> -			/* Get the current version of the EL1 counterpart. */
>> -			reg = el2_reg->mapping;
>> -		}
>> -	} else {
>> -		/* EL1 register can't be on the CPU if the guest is in vEL2. */
>> -		if (unlikely(is_hyp_ctxt(vcpu)))
>> -			goto immediate_read;
>> +		BUG_ON(!el2_reg);
>> +
>> +		/*
>> +		 * If this register does not have an EL1 counterpart,
>> +		 * then read the stored EL2 version.
>> +		 */
>> +		if (el2_reg->mapping == __INVALID_SYSREG__)
>> +			goto memory_read;
>> +
>> +		if (!vcpu_el2_e2h_is_set(vcpu) &&
>> +		    el2_reg->translate)
>> +			goto memory_read;
> 
> Nit: the condition can be written on one line.
> 
> This condition wasn't present in patch 13 which introduced EL2 register
> handling, and I'm struggling to understand what it does. As I understand the
> code, this condition basically translates into:
> 
> - if the register is one of SCTLR_EL2, TTBR0_EL2, CPTR_EL2 or TCR_EL2, then read
> it from memory.
> 
> - if the register is an EL2 register whose value is written unmodified to the
> corresponding EL1 register, then read the corresponding EL1 register and return
> that value.
> 
> Looking at vcpu_write_sys_reg, the values for the EL2 registers are always saved
> in memory. The guest is a non-vhe guest, so writes to EL1 registers shouldn't be
> reflected in the corresponding EL2 register. I think it's safe to always return
> the value from memory.
> 
> I tried testing this with the following patch:
> 
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 1235a88ec575..27d39bb9564d 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -290,6 +290,9 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
>                 el2_reg = find_el2_sysreg(nested_sysreg_map, reg);
>                 BUG_ON(!el2_reg);
>  
> +               if (!vcpu_el2_e2h_is_set(vcpu))
> +                       goto memory_read;
> +
>                 /*
>                  * If this register does not have an EL1 counterpart,
>                  * then read the stored EL2 version.
> @@ -297,10 +300,6 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
>                 if (el2_reg->mapping == __INVALID_SYSREG__)
>                         goto memory_read;
>  
> -               if (!vcpu_el2_e2h_is_set(vcpu) &&
> -                   el2_reg->translate)
> -                       goto memory_read;
> -
>                 /* Get the current version of the EL1 counterpart. */
>                 reg = el2_reg->mapping;
>                 WARN_ON(!__vcpu_read_sys_reg_from_cpu(reg, &val));
> 
> I know it's not conclusive, but I was able to boot a L2 guest under a L1 non-vhe
> hypervisor.

And now you can't properly handle the terrible ARMv8.3 business of
SPSR_EL1 being changed behind your back if you get an exception at vEL2
to vEL2 on non-VHE. To handle this, you need both the live system
register and the memory backup (see __fixup_spsr_el2_read and co).

More generally, some registers can be modified behind your back. That's
ELR, SPSR, FAR, ESR.  Anything related to taking an exception. No, this
can't be observed with KVM because we don't allow exception to be taken
at EL2 in the absence of RAS errors.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux