There is no reason to clone FPU in arch_dup_task_struct(). Quite the contrary it prevents optimizations. Move it to copy_thread() Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> --- arch/x86/kernel/process.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -87,7 +87,7 @@ int arch_dup_task_struct(struct task_str #ifdef CONFIG_VM86 dst->thread.vm86 = NULL; #endif - return fpu_clone(dst); + return 0; } /* @@ -154,6 +154,8 @@ int copy_thread(unsigned long clone_flag frame->flags = X86_EFLAGS_FIXED; #endif + fpu_clone(p); + /* Kernel thread ? */ if (unlikely(p->flags & PF_KTHREAD)) { p->thread.pkru = pkru_get_init_value();