> -/* definition of registers in kvm_run */ > +#define KVM_SYNC_X86_REGS (1UL << 0) > +#define KVM_SYNC_X86_SREGS (1UL << 1) > +#define KVM_SYNC_X86_EVENTS (1UL << 2) > +#define KVM_SYNC_X86_NUM_FIELDS 3 NUM_FIELDS is unused AFAICS, and KVM_SYNC_X86_VALID_FIELDS should be sufficient for our purpose. > + > +#define KVM_SYNC_X86_VALID_FIELDS \ > + (KVM_SYNC_X86_REGS| \ > + KVM_SYNC_X86_SREGS| \ > + KVM_SYNC_X86_EVENTS) > + > +/* kvm_sync_regs struct included by kvm_run struct */ > struct kvm_sync_regs { > + /* Members of this structure are potentially malicious. > + * Care must be taken by code reading, esp. interpreting, > + * data fields from them inside KVM to prevent TOCTOU and > + * double-fetch types of vulnerabilities. > + */ > + struct kvm_regs regs; > + struct kvm_sregs sregs; > + struct kvm_vcpu_events events; Will all of these be aligned in a way so we don't need any reserved fields in between? sizeof(struct kvm_sync_regs) == siezeof(struct kvm_regs) + ... > }; > > #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 0204b2b8a293..ef60e3cb7ef8 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -100,6 +100,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu); > static void process_nmi(struct kvm_vcpu *vcpu); > static void enter_smm(struct kvm_vcpu *vcpu); > static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); > +static int sync_regs_store_to_kvmrun(struct kvm_vcpu *vcpu); > +static int sync_regs_load_from_kvmrun(struct kvm_vcpu *vcpu); > > struct kvm_x86_ops *kvm_x86_ops __read_mostly; > EXPORT_SYMBOL_GPL(kvm_x86_ops); > @@ -2743,6 +2745,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_IMMEDIATE_EXIT: > r = 1; > break; > + case KVM_CAP_SYNC_REGS: > + r = KVM_SYNC_X86_VALID_FIELDS; > + break; > case KVM_CAP_ADJUST_CLOCK: > r = KVM_CLOCK_TSC_STABLE; > break; > @@ -7351,6 +7356,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > goto out; > } > > + if (vcpu->run->kvm_dirty_regs) { > + r = sync_regs_load_from_kvmrun(vcpu); > + if (r != 0) > + goto out; > + } > + > /* re-sync apic's tpr */ > if (!lapic_in_kernel(vcpu)) { > if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { > @@ -7375,6 +7386,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > > out: > kvm_put_guest_fpu(vcpu); > + if (vcpu->run->kvm_valid_regs) > + r = sync_regs_store_to_kvmrun(vcpu); You would end up overwriting an error return value. Not good. > post_kvm_run_save(vcpu); > kvm_sigset_deactivate(vcpu); > > @@ -7382,10 +7395,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > return r; > } > > -int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) > +static void __kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, > + struct kvm_regs *regs) Wonder if this would be any better if simply named e.g. __get_regs(). This would maybe allow to avoid line breaks in sync_regs_*. > { > - vcpu_load(vcpu); > - > if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { > /* > * We are here if userspace calls get_regs() in the middle of > @@ -7418,15 +7430,19 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) > > regs->rip = kvm_rip_read(vcpu); > regs->rflags = kvm_get_rflags(vcpu); > +} > > +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) > +{ > + vcpu_load(vcpu); > + __kvm_arch_vcpu_ioctl_get_regs(vcpu, regs); > vcpu_put(vcpu); > return 0; > } > > -int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) > +static void __kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, > + struct kvm_regs *regs) > { > - vcpu_load(vcpu); > - > vcpu->arch.emulate_regs_need_sync_from_vcpu = true; > vcpu->arch.emulate_regs_need_sync_to_vcpu = false; > > @@ -7455,7 +7471,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) > vcpu->arch.exception.pending = false; > > kvm_make_request(KVM_REQ_EVENT, vcpu); > +} > > +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) > +{ > + vcpu_load(vcpu); > + __kvm_arch_vcpu_ioctl_set_regs(vcpu, regs); > vcpu_put(vcpu); > return 0; > } > @@ -7470,13 +7491,11 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) > } > EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); > > -int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, > - struct kvm_sregs *sregs) > +static void __kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, > + struct kvm_sregs *sregs) > { > struct desc_ptr dt; > > - vcpu_load(vcpu); > - > kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); > kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); > kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); > @@ -7507,7 +7526,13 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, > if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft) > set_bit(vcpu->arch.interrupt.nr, > (unsigned long *)sregs->interrupt_bitmap); > +} > > +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, > + struct kvm_sregs *sregs) > +{ > + vcpu_load(vcpu); > + __kvm_arch_vcpu_ioctl_get_sregs(vcpu, sregs); > vcpu_put(vcpu); > return 0; > } > @@ -7579,8 +7604,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, > } > EXPORT_SYMBOL_GPL(kvm_task_switch); > > -int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, > - struct kvm_sregs *sregs) > +static int __kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, > + struct kvm_sregs *sregs) > { > struct msr_data apic_base_msr; > int mmu_reset_needed = 0; > @@ -7588,8 +7613,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, > struct desc_ptr dt; > int ret = -EINVAL; > > - vcpu_load(vcpu); > - > if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && > (sregs->cr4 & X86_CR4_OSXSAVE)) > goto out; > @@ -7662,9 +7685,18 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, > vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; > > kvm_make_request(KVM_REQ_EVENT, vcpu); > - unrelated change > ret = 0; > out: > + return ret; > +} > + > +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, > + struct kvm_sregs *sregs) > +{ > + int ret; > + > + vcpu_load(vcpu); > + ret = __kvm_arch_vcpu_ioctl_set_sregs(vcpu, sregs); > vcpu_put(vcpu); > return ret; > } > @@ -7791,6 +7823,52 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) > return 0; > } > > +static int sync_regs_store_to_kvmrun(struct kvm_vcpu *vcpu) Not sure if they can simply be named sync_regs/store_regs like on s390x. > +{ > + BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES); -- Thanks, David / dhildenb