There is no reason to clone FPU in arch_dup_task_struct(). Quite the contrary it prevents optimizations. Move it to copy_thread() Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> --- arch/x86/kernel/process.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) --- diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 1d9463e3096b..d2227c55e683 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -87,7 +87,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) #ifdef CONFIG_VM86 dst->thread.vm86 = NULL; #endif - return fpu_clone(dst); + return 0; } /* @@ -154,6 +154,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, frame->flags = X86_EFLAGS_FIXED; #endif + fpu_clone(p); + /* Kernel thread ? */ if (unlikely(p->flags & PF_KTHREAD)) { p->thread.pkru = pkru_get_init_value();