On 13.01.2018 01:36, Ken Hofsass wrote: > This commit implements an enhanced x86 version of S390 > KVM_CAP_SYNC_REGS functionality. KVM_CAP_SYNC_REGS "allow[s] > userspace to access certain guest registers without having > to call SET/GET_*REGS”. This reduces ioctl overhead which > is particularly important when userspace is making synchronous > guest state modifications (e.g. when emulating and/or intercepting > instructions). > > Originally implemented upstream for the S390, the x86 differences > follow: > - the capability can be enabled/disabled Is this still true? As far as I can tell, enabling/disabling works via kvmrun. > - the register sets to be copied out out to kvm_run are selectable > by userspace > - vcpu_events are available in addition to the basic register sets > (regs, sregs). > > Signed-off-by: Ken Hofsass <hofsass@xxxxxxxxxx> > --- > Documentation/virtual/kvm/api.txt | 40 +++++++++++++++++++++++++ > arch/x86/include/uapi/asm/kvm.h | 20 ++++++++++++- > arch/x86/kvm/x86.c | 62 +++++++++++++++++++++++++++++++++++++++ > include/uapi/linux/kvm.h | 6 +++- > 4 files changed, 126 insertions(+), 2 deletions(-) > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > index 57d3ee9e4bde..a1050fdafcc4 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -3964,6 +3964,46 @@ Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be > accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from > the guest. > > +6.74 KVM_CAP_SYNC_REGS > +Architectures: s390, x86 > +Target: s390: always enabled, x86: vcpu > +Parameters: none > +Returns: x86: KVM_CHECK_EXTENSION returns a bit-array indicating which register > +sets are supported (bitfields defined in arch/x86/include/uapi/asm/kvm.h). > + > +As described above in the kvm_sync_regs struct info in section 5 (kvm_run): > +KVM_CAP_SYNC_REGS "allow[s] userspace to access certain guest registers > +without having to call SET/GET_*REGS". This reduces overhead by eliminating > +repeated ioctl calls for setting and/or getting register values. This is > +particularly important when userspace is making synchronous guest state > +modifications, e.g. when emulating and/or intercepting instructions in > +userspace. > + > +For s390 specifics, please refer to the source code. > + > +For x86: > +- the register sets to be copied out to kvm_run are selectable > + by userspace (rather that all sets being copied out for every exit). > +- vcpu_events are available in addition to regs and sregs. > + > +For x86, the 'kvm_valid_regs' field of struct kvm_run is overloaded to > +function as an input bit-array field set by userspace to indicate the > +specific register sets to be copied out on the next exit. > + > +To indicate when userspace has modified values that should be copied into > +the vCPU, the all architecture bitarray field, 'kvm_dirty_regs' must be set. > +This is done using the same bitflags as for the 'kvm_valid_regs' field. > +If the dirty bit is not set, then the register set values will not be copied > +into the vCPU even if they've been modified. > + > +Unused bitfields in the bitarrays must be set to zero. > + > +struct kvm_sync_regs { > + struct kvm_regs regs; > + struct kvm_sregs sregs; > + struct kvm_vcpu_events events; > +}; > + > 7. Capabilities that can be enabled on VMs > ------------------------------------------ > > diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h > index f3a960488eae..3a51cc0e8d8d 100644 > --- a/arch/x86/include/uapi/asm/kvm.h > +++ b/arch/x86/include/uapi/asm/kvm.h > @@ -354,8 +354,26 @@ struct kvm_xcrs { > __u64 padding[16]; > }; > > -/* definition of registers in kvm_run */ > +#define KVM_SYNC_X86_REGS (1UL << 0) > +#define KVM_SYNC_X86_SREGS (1UL << 1) > +#define KVM_SYNC_X86_EVENTS (1UL << 2) > +#define KVM_SYNC_X86_NUM_FIELDS 3 > + > +#define KVM_SYNC_X86_VALID_FIELDS \ > + (KVM_SYNC_X86_REGS| \ > + KVM_SYNC_X86_SREGS| \ > + KVM_SYNC_X86_EVENTS) > + > +/* kvm_sync_regs struct included by kvm_run struct */ > struct kvm_sync_regs { > + /* Members of this structure are potentially malicious. > + * Care must be taken by code reading, esp. interpreting, > + * data fields from them inside KVM to prevent TOCTOU and > + * double-fetch types of vulnerabilities. > + */ > + struct kvm_regs regs; > + struct kvm_sregs sregs; > + struct kvm_vcpu_events events; > }; > > #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index daa1918031df..d02525f6f3a1 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -100,6 +100,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu); > static void process_nmi(struct kvm_vcpu *vcpu); > static void enter_smm(struct kvm_vcpu *vcpu); > static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); > +static void sync_regs_store_to_kvmrun(struct kvm_vcpu *vcpu); > +static int sync_regs_load_from_kvmrun(struct kvm_vcpu *vcpu); > > struct kvm_x86_ops *kvm_x86_ops __read_mostly; > EXPORT_SYMBOL_GPL(kvm_x86_ops); > @@ -2762,6 +2764,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_IMMEDIATE_EXIT: > r = 1; > break; > + case KVM_CAP_SYNC_REGS: > + r = KVM_SYNC_X86_VALID_FIELDS; > + break; > case KVM_CAP_ADJUST_CLOCK: > r = KVM_CLOCK_TSC_STABLE; > break; > @@ -7356,6 +7361,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > goto out; > } > > + if (vcpu->run->kvm_dirty_regs) { > + r = sync_regs_load_from_kvmrun(vcpu); If I am not wrong, at this point we already have called vcpu_load(), but you are also calling it via kvm_arch_vcpu_ioctl_get_regs(). > + if (r != 0) > + goto out; > + } > + > /* re-sync apic's tpr */ > if (!lapic_in_kernel(vcpu)) { > if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { > @@ -7380,6 +7391,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > > out: > kvm_put_guest_fpu(vcpu); > + if (vcpu->run->kvm_valid_regs) > + sync_regs_store_to_kvmrun(vcpu); dito > post_kvm_run_save(vcpu); > kvm_sigset_deactivate(vcpu); > > @@ -7796,6 +7809,55 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) > return 0; > } > > +static void sync_regs_store_to_kvmrun(struct kvm_vcpu *vcpu) > +{ > + BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_UNION_SIZE_BYTES); > + This function will now do 3 * vcpu_load() + vcpu_put(). Something to optimize. > + if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS) { > + kvm_arch_vcpu_ioctl_get_regs(vcpu, &vcpu->run->s.regs.regs); > + } > + if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS) { > + kvm_arch_vcpu_ioctl_get_sregs(vcpu, &vcpu->run->s.regs.sregs); > + } > + if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS) { > + kvm_vcpu_ioctl_x86_get_vcpu_events( > + vcpu, &vcpu->run->s.regs.events); > + } you can drop all of the braces in this function > +} > + > +static int sync_regs_load_from_kvmrun(struct kvm_vcpu *vcpu) > +{ > + int r = -EINVAL; > + > + if (unlikely(vcpu->run->kvm_dirty_regs == 0)) > + goto out; > + > + if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS) simply return -EINVAL (also for the other error conditions). The you can drop the local variable r and the out label. > + goto out; > + > + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) { > + if (kvm_arch_vcpu_ioctl_set_regs( > + vcpu, &vcpu->run->s.regs.regs)) > + goto out; > + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS; > + } > + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) { > + if (kvm_arch_vcpu_ioctl_set_sregs( > + vcpu, &vcpu->run->s.regs.sregs)) > + goto out; > + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS; > + } > + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) { > + if (kvm_vcpu_ioctl_x86_set_vcpu_events( > + vcpu, &vcpu->run->s.regs.events)) > + goto out; > + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS; > + }> + r = 0; > +out: > + return r; > +} > + > static void fx_init(struct kvm_vcpu *vcpu) > { > fpstate_init(&vcpu->arch.guest_fpu.state); > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 496e59a2738b..99c50fa10103 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -396,6 +396,10 @@ struct kvm_run { > char padding[256]; > }; > The following should be moved into a separate patch: > + /* 2048 is the size of the char array IBM used to bound/pad the size > + * of the union that holds sync regs. > + */ > +#define SYNC_REGS_UNION_SIZE_BYTES 2048 s/SYNC_REGS_UNION_SIZE_BYTES/SYNC_REGS_SIZE_BYTES/ > /* > * shared registers between kvm and userspace. > * kvm_valid_regs specifies the register classes set by the host > @@ -407,7 +411,7 @@ struct kvm_run { > __u64 kvm_dirty_regs; > union { > struct kvm_sync_regs regs; > - char padding[2048]; > + char padding[SYNC_REGS_UNION_SIZE_BYTES]; > } s; > }; > > -- Thanks, David / dhildenb