On Tue, Dec 07 2021 at 19:03, Yang Zhong wrote: > + > + /* > + * Update IA32_XFD to the guest value so #NM can be > + * raised properly in the guest. Instead of directly > + * writing the MSR, call a helper to avoid breaking > + * per-cpu cached value in fpu core. > + */ > + fpregs_lock(); > + current->thread.fpu.fpstate->xfd = data; > + xfd_update_state(current->thread.fpu.fpstate); > + fpregs_unlock(); > + break; Now looking at the actual callsite the previous patch really should be something like the below. Why? It preserves the inline which allows the compiler to generate better code in the other hotpathes and it keeps the FPU internals to the core code. Hmm? Thanks, tglx --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -125,8 +125,10 @@ DECLARE_PER_CPU(struct fpu *, fpu_fpregs /* Process cleanup */ #ifdef CONFIG_X86_64 extern void fpstate_free(struct fpu *fpu); +extern void fpu_update_xfd_state(u64 xfd); #else static inline void fpstate_free(struct fpu *fpu) { } +static inline void fpu_update_xfd_state(u64 xfd) { } #endif /* fpstate-related functions which are exported to KVM */ --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -322,6 +322,19 @@ int fpu_swap_kvm_fpstate(struct fpu_gues } EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate); +#ifdef CONFIG_X86_64 +void fpu_update_xfd_state(u64 xfd) +{ + struct fpstate *fps = current->thread.fpu.fpstate; + + fpregs_lock(); + fps->xfd = xfd; + xfd_update_state(fps); + fpregs_unlock(); +} +EXPORT_SYMBOL_GPL(fpu_update_xfd_state); +#endif + void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru) {