Re: [PATCH v2] KVM: x86: KVM_CAP_SYNC_REGS

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 13.01.2018 01:36, Ken Hofsass wrote:
> This commit implements an enhanced x86 version of S390
> KVM_CAP_SYNC_REGS functionality. KVM_CAP_SYNC_REGS "allow[s]
> userspace to access certain guest registers without having
> to call SET/GET_*REGS”. This reduces ioctl overhead which
> is particularly important when userspace is making synchronous
> guest state modifications (e.g. when emulating and/or intercepting
> instructions).
> 
> Originally implemented upstream for the S390, the x86 differences
> follow:
> - the capability can be enabled/disabled

Is this still true? As far as I can tell, enabling/disabling works via
kvmrun.

> - the register sets to be copied out out to kvm_run are selectable
> by userspace
> - vcpu_events are available in addition to the basic register sets
> (regs, sregs).
> 
> Signed-off-by: Ken Hofsass <hofsass@xxxxxxxxxx>
> ---
>  Documentation/virtual/kvm/api.txt | 40 +++++++++++++++++++++++++
>  arch/x86/include/uapi/asm/kvm.h   | 20 ++++++++++++-
>  arch/x86/kvm/x86.c                | 62 +++++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/kvm.h          |  6 +++-
>  4 files changed, 126 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index 57d3ee9e4bde..a1050fdafcc4 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -3964,6 +3964,46 @@ Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be
>  accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from
>  the guest.
>  
> +6.74 KVM_CAP_SYNC_REGS
> +Architectures: s390, x86
> +Target: s390: always enabled, x86: vcpu
> +Parameters: none
> +Returns: x86: KVM_CHECK_EXTENSION returns a bit-array indicating which register
> +sets are supported (bitfields defined in arch/x86/include/uapi/asm/kvm.h).
> +
> +As described above in the kvm_sync_regs struct info in section 5 (kvm_run):
> +KVM_CAP_SYNC_REGS "allow[s] userspace to access certain guest registers
> +without having to call SET/GET_*REGS". This reduces overhead by eliminating
> +repeated ioctl calls for setting and/or getting register values. This is
> +particularly important when userspace is making synchronous guest state
> +modifications, e.g. when emulating and/or intercepting instructions in
> +userspace.
> +
> +For s390 specifics, please refer to the source code.
> +
> +For x86:
> +- the register sets to be copied out to kvm_run are selectable
> +  by userspace (rather that all sets being copied out for every exit).
> +- vcpu_events are available in addition to regs and sregs.
> +
> +For x86, the 'kvm_valid_regs' field of struct kvm_run is overloaded to
> +function as an input bit-array field set by userspace to indicate the
> +specific register sets to be copied out on the next exit.
> +
> +To indicate when userspace has modified values that should be copied into
> +the vCPU, the all architecture bitarray field, 'kvm_dirty_regs' must be set.
> +This is done using the same bitflags as for the 'kvm_valid_regs' field.
> +If the dirty bit is not set, then the register set values will not be copied
> +into the vCPU even if they've been modified.
> +
> +Unused bitfields in the bitarrays must be set to zero.
> +
> +struct kvm_sync_regs {
> +        struct kvm_regs regs;
> +        struct kvm_sregs sregs;
> +        struct kvm_vcpu_events events;
> +};
> +
>  7. Capabilities that can be enabled on VMs
>  ------------------------------------------
>  
> diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
> index f3a960488eae..3a51cc0e8d8d 100644
> --- a/arch/x86/include/uapi/asm/kvm.h
> +++ b/arch/x86/include/uapi/asm/kvm.h
> @@ -354,8 +354,26 @@ struct kvm_xcrs {
>  	__u64 padding[16];
>  };
>  
> -/* definition of registers in kvm_run */
> +#define KVM_SYNC_X86_REGS      (1UL << 0)
> +#define KVM_SYNC_X86_SREGS     (1UL << 1)
> +#define KVM_SYNC_X86_EVENTS    (1UL << 2)
> +#define KVM_SYNC_X86_NUM_FIELDS		3
> +
> +#define KVM_SYNC_X86_VALID_FIELDS \
> +	(KVM_SYNC_X86_REGS| \
> +	 KVM_SYNC_X86_SREGS| \
> +	 KVM_SYNC_X86_EVENTS)
> +
> +/* kvm_sync_regs struct included by kvm_run struct */
>  struct kvm_sync_regs {
> +	/* Members of this structure are potentially malicious.
> +	 * Care must be taken by code reading, esp. interpreting,
> +	 * data fields from them inside KVM to prevent TOCTOU and
> +	 * double-fetch types of vulnerabilities.
> +	 */
> +	struct kvm_regs regs;
> +	struct kvm_sregs sregs;
> +	struct kvm_vcpu_events events;
>  };
>  
>  #define KVM_X86_QUIRK_LINT0_REENABLED	(1 << 0)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index daa1918031df..d02525f6f3a1 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -100,6 +100,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu);
>  static void process_nmi(struct kvm_vcpu *vcpu);
>  static void enter_smm(struct kvm_vcpu *vcpu);
>  static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
> +static void sync_regs_store_to_kvmrun(struct kvm_vcpu *vcpu);
> +static int sync_regs_load_from_kvmrun(struct kvm_vcpu *vcpu);
>  
>  struct kvm_x86_ops *kvm_x86_ops __read_mostly;
>  EXPORT_SYMBOL_GPL(kvm_x86_ops);
> @@ -2762,6 +2764,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  	case KVM_CAP_IMMEDIATE_EXIT:
>  		r = 1;
>  		break;
> +	case KVM_CAP_SYNC_REGS:
> +		r = KVM_SYNC_X86_VALID_FIELDS;
> +		break;
>  	case KVM_CAP_ADJUST_CLOCK:
>  		r = KVM_CLOCK_TSC_STABLE;
>  		break;
> @@ -7356,6 +7361,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  		goto out;
>  	}
>  
> +	if (vcpu->run->kvm_dirty_regs) {
> +		r = sync_regs_load_from_kvmrun(vcpu);

If I am not wrong, at this point we already have called vcpu_load(), but
you are also calling it via kvm_arch_vcpu_ioctl_get_regs().

> +		if (r != 0)
> +			goto out;
> +	}
> +
>  	/* re-sync apic's tpr */
>  	if (!lapic_in_kernel(vcpu)) {
>  		if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
> @@ -7380,6 +7391,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  
>  out:
>  	kvm_put_guest_fpu(vcpu);
> +	if (vcpu->run->kvm_valid_regs)
> +		sync_regs_store_to_kvmrun(vcpu);

dito

>  	post_kvm_run_save(vcpu);
>  	kvm_sigset_deactivate(vcpu);
>  
> @@ -7796,6 +7809,55 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
>  	return 0;
>  }
>  
> +static void sync_regs_store_to_kvmrun(struct kvm_vcpu *vcpu)
> +{
> +	BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_UNION_SIZE_BYTES);
> +

This function will now do 3 * vcpu_load() + vcpu_put(). Something to
optimize.

> +	if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS) {
> +		kvm_arch_vcpu_ioctl_get_regs(vcpu, &vcpu->run->s.regs.regs);
> +	}
> +	if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS) {
> +		kvm_arch_vcpu_ioctl_get_sregs(vcpu, &vcpu->run->s.regs.sregs);
> +	}
> +	if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS) {
> +		kvm_vcpu_ioctl_x86_get_vcpu_events(
> +				vcpu, &vcpu->run->s.regs.events);
> +	}

you can drop all of the braces in this function

> +}
> +
> +static int sync_regs_load_from_kvmrun(struct kvm_vcpu *vcpu)
> +{
> +	int r = -EINVAL;
> +
> +	if (unlikely(vcpu->run->kvm_dirty_regs == 0))
> +		goto out;
> +
> +	if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)

simply return -EINVAL (also for the other error conditions). The you can
drop the local variable r and the out label.

> +		goto out;
> +
> +	if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
> +		if (kvm_arch_vcpu_ioctl_set_regs(
> +				vcpu, &vcpu->run->s.regs.regs))
> +			goto out;
> +		vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
> +	}
> +	if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
> +		if (kvm_arch_vcpu_ioctl_set_sregs(
> +				vcpu, &vcpu->run->s.regs.sregs))
> +			goto out;
> +		vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
> +	}
> +	if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
> +		if (kvm_vcpu_ioctl_x86_set_vcpu_events(
> +				vcpu, &vcpu->run->s.regs.events))
> +			goto out;
> +		vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
> +	}> +	r = 0;
> +out:
> +	return r;
> +}
> +
>  static void fx_init(struct kvm_vcpu *vcpu)
>  {
>  	fpstate_init(&vcpu->arch.guest_fpu.state);
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 496e59a2738b..99c50fa10103 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -396,6 +396,10 @@ struct kvm_run {
>  		char padding[256];
>  	};
>  

The following should be moved into a separate patch:

> +	/* 2048 is the size of the char array IBM used to bound/pad the size
> +	 * of the union that holds sync regs.
> +	 */
> +#define SYNC_REGS_UNION_SIZE_BYTES 2048

s/SYNC_REGS_UNION_SIZE_BYTES/SYNC_REGS_SIZE_BYTES/

>  	/*
>  	 * shared registers between kvm and userspace.
>  	 * kvm_valid_regs specifies the register classes set by the host
> @@ -407,7 +411,7 @@ struct kvm_run {
>  	__u64 kvm_dirty_regs;
>  	union {
>  		struct kvm_sync_regs regs;
> -		char padding[2048];
> +		char padding[SYNC_REGS_UNION_SIZE_BYTES];
>  	} s;
>  };
>  
> 


-- 

Thanks,

David / dhildenb



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux