From: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Extend fpstate reallocation mechanism to cover guest fpu. Unlike native tasks which have reallocation triggered from #NM handler, guest fpstate reallocation is requested by KVM when detecting the guest intention on using a dynamically-enabled XSAVE feature. Since KVM currently swaps host/guest fpstate when exiting to userspace VMM (see fpu_swap_kvm_fpstate()), deal with fpstate reallocation also at this point. The implication - KVM must break vcpu_run() loop to exit to userspace VMM instead of immediately returning back to the guest when fpstate requires reallocation. In this case KVM should set guest_fpu::realloc_request to mark those features in related VM exit handlers. Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Signed-off-by: Jing Liu <jing2.liu@xxxxxxxxx> Signed-off-by: Yang Zhong <yang.zhong@xxxxxxxxx> --- arch/x86/kernel/fpu/core.c | 26 +++++++++++++++++++--- arch/x86/kernel/fpu/xstate.c | 43 ++++++++++++++++++++++++++++++------ arch/x86/kernel/fpu/xstate.h | 2 ++ 3 files changed, 61 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index fae44fa27cdb..7a0436a0cb2c 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -261,11 +261,31 @@ void fpu_free_guest_fpstate(struct fpu_guest *gfpu) } EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate); +static int fpu_guest_realloc_fpstate(struct fpu_guest *guest_fpu, + bool enter_guest) +{ + /* + * Reallocation requests can only be handled when + * switching from guest to host mode. + */ + if (WARN_ON_ONCE(enter_guest || !IS_ENABLED(CONFIG_X86_64))) { + guest_fpu->realloc_request = 0; + return -EUNATCH; + } + return xfd_enable_guest_features(guest_fpu); +} + int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) { - struct fpstate *guest_fps = guest_fpu->fpstate; + struct fpstate *guest_fps, *cur_fps; struct fpu *fpu = ¤t->thread.fpu; - struct fpstate *cur_fps = fpu->fpstate; + int ret = 0; + + if (unlikely(guest_fpu->realloc_request)) + ret = fpu_guest_realloc_fpstate(guest_fpu, enter_guest); + + guest_fps = guest_fpu->fpstate; + cur_fps = fpu->fpstate; fpregs_lock(); if (!cur_fps->is_confidential && !test_thread_flag(TIF_NEED_FPU_LOAD)) @@ -298,7 +318,7 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) fpregs_mark_activate(); fpregs_unlock(); - return 0; + return ret; } EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 9856d579aa6e..fe3d8ed3db0e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1529,6 +1529,7 @@ static struct fpstate *fpu_install_fpstate(struct fpu *fpu, * of that task * @ksize: The required size for the kernel buffer * @usize: The required size for user space buffers + * @guest_fpu: Pointer to a guest FPU container. NULL for host allocations * * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer * terminates quickly, vfree()-induced IPIs may be a concern, but tasks @@ -1537,7 +1538,7 @@ static struct fpstate *fpu_install_fpstate(struct fpu *fpu, * Returns: 0 on success, -ENOMEM on allocation error. */ static int fpstate_realloc(u64 xfeatures, unsigned int ksize, - unsigned int usize) + unsigned int usize, struct fpu_guest *guest_fpu) { struct fpu *fpu = ¤t->thread.fpu; struct fpstate *curfps, *newfps = NULL; @@ -1553,6 +1554,12 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize, newfps->user_size = usize; newfps->is_valloc = true; + if (guest_fpu) { + newfps->is_guest = true; + newfps->is_confidential = curfps->is_confidential; + guest_fpu->user_xfeatures |= xfeatures; + } + fpregs_lock(); /* * Ensure that the current state is in the registers before @@ -1566,12 +1573,14 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize, newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; newfps->xfd = curfps->xfd & ~xfeatures; + if (guest_fpu) + guest_fpu->fpstate = newfps; + curfps = fpu_install_fpstate(fpu, newfps); /* Do the final updates within the locked region */ xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures); xfd_update_state(newfps); - fpregs_unlock(); vfree(curfps); @@ -1682,9 +1691,10 @@ static int xstate_request_perm(unsigned long idx, bool guest) return ret; } -int xfd_enable_feature(u64 xfd_err) +static int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu) { u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC; + struct fpu_state_perm *perm; unsigned int ksize, usize; struct fpu *fpu; @@ -1697,14 +1707,16 @@ int xfd_enable_feature(u64 xfd_err) spin_lock_irq(¤t->sighand->siglock); /* If not permitted let it die */ - if ((xstate_get_host_group_perm() & xfd_event) != xfd_event) { + if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) { spin_unlock_irq(¤t->sighand->siglock); return -EPERM; } fpu = ¤t->group_leader->thread.fpu; - ksize = fpu->perm.__state_size; - usize = fpu->perm.__user_state_size; + perm = guest_fpu ? &fpu->guest_perm : &fpu->perm; + ksize = perm->__state_size; + usize = perm->__user_state_size; + /* * The feature is permitted. State size is sufficient. Dropping * the lock is safe here even if more features are added from @@ -1717,10 +1729,27 @@ int xfd_enable_feature(u64 xfd_err) * Try to allocate a new fpstate. If that fails there is no way * out. */ - if (fpstate_realloc(xfd_event, ksize, usize)) + if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu)) return -EFAULT; return 0; } + +int xfd_enable_feature(u64 xfd_err) +{ + return __xfd_enable_feature(xfd_err, NULL); +} + +int xfd_enable_guest_features(struct fpu_guest *guest_fpu) +{ + u64 xfd_err = guest_fpu->realloc_request & XFEATURE_MASK_USER_SUPPORTED; + + guest_fpu->realloc_request = 0; + + if (!xfd_err) + return 0; + return __xfd_enable_feature(xfd_err, guest_fpu); +} + #else /* CONFIG_X86_64 */ static inline int xstate_request_perm(unsigned long idx, bool guest) { diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 98a472775c97..3254e2b5f17f 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -55,6 +55,8 @@ extern void fpu__init_system_xstate(unsigned int legacy_size); extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); +extern int xfd_enable_guest_features(struct fpu_guest *guest_fpu); + static inline u64 xfeatures_mask_supervisor(void) { return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED;