> > thus we need to: > > > > 1) *always* save guest FRED RSP0 in vmx_prepare_switch_to_host(). > > > > 2) *always* restore guest FRED RSP0 in vmx_prepare_switch_to_guest(), > > because sometimes context switches happen but the CPU does NOT > > return to user mode thus the user return framework detects no change. > > > > So it essentially becomes the same as what the original patch does. > > > > I guess It's probably not worth the change, how do you think? > > One idea would be to have the kernel write MSR_IA32_FRED_RSP0 on return to > userspace instead of on context switch. As is, amusingly, there's no need to > restore the host value if a context switch occurs as the kernel will have written > the new task's value. RSP0 only needs to be manually restored if the kernel > returns to userspace for the vCPU task. Using a TI flag to track if RSP0 needs to > be loaded would avoid a fair number of WRMSRs in both KVM and the kernel. I also thought about it (while in the FRED ERETU code path), however we will need performance data first, even an extra check (ti_work & _TIF_NEED_RSP0_LOAD) seems neglectable, but both return to user and context switch are hot paths, and I assume return to user is extremely hot. So let's revisit it at a later time? > diff --git a/arch/x86/include/asm/entry-common.h > b/arch/x86/include/asm/entry-common.h > index ce8f50192ae3..76724cc42869 100644 > --- a/arch/x86/include/asm/entry-common.h > +++ b/arch/x86/include/asm/entry-common.h > @@ -57,6 +57,11 @@ static inline void arch_exit_to_user_mode_prepare(struct > pt_regs *regs, > if (unlikely(ti_work & _TIF_NEED_FPU_LOAD)) > switch_fpu_return(); > > + if (cpu_feature_enabled(X86_FEATURE_FRED) && > + (ti_work & _TIF_NEED_RSP0_LOAD)) > + wrmsrns(MSR_IA32_FRED_RSP0, > + (unsigned long)task_stack_page(current) + > + THREAD_SIZE); > + > #ifdef CONFIG_COMPAT > /* > * Compat syscalls set TS_COMPAT. Make sure we clear it before diff --git > a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h > index c3bd0c0758c9..1674d98a8850 100644 > --- a/arch/x86/include/asm/switch_to.h > +++ b/arch/x86/include/asm/switch_to.h > @@ -71,8 +71,7 @@ static inline void update_task_stack(struct task_struct *task) > this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0); #else > if (cpu_feature_enabled(X86_FEATURE_FRED)) { > - /* WRMSRNS is a baseline feature for FRED. */ > - wrmsrns(MSR_IA32_FRED_RSP0, (unsigned long)task_stack_page(task) + > THREAD_SIZE); > + set_thread_flag(TIF_NEED_RSP0_LOAD); > } else if (cpu_feature_enabled(X86_FEATURE_XENPV)) { > /* Xen PV enters the kernel on the thread stack. */ > load_sp0(task_top_of_stack(task)); > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index > 5c6bb26463e8..cb7e3bcb001f 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -1338,15 +1338,9 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu > *vcpu) > > wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); > > - if (guest_can_use(vcpu, X86_FEATURE_FRED)) { > - /* > - * MSR_IA32_FRED_RSP0 is top of task stack, which never changes. > - * Thus it should be initialized only once. > - */ > - if (unlikely(vmx->msr_host_fred_rsp0 == 0)) > - vmx->msr_host_fred_rsp0 = read_msr(MSR_IA32_FRED_RSP0); > - wrmsrl(MSR_IA32_FRED_RSP0, vmx->msr_guest_fred_rsp0); > - } > + if (cpu_feature_enabled(X86_FEATURE_FRED) && This check seems unnecessary, I guess you add it to skip the following code when FRED is not enabled on host? > + guest_can_use(vcpu, X86_FEATURE_FRED)) > + wrmsrns(MSR_IA32_FRED_RSP0, vmx->msr_guest_fred_rsp0); > #else > savesegment(fs, fs_sel); > savesegment(gs, gs_sel); > @@ -1392,9 +1386,10 @@ static void vmx_prepare_switch_to_host(struct > vcpu_vmx *vmx) #ifdef CONFIG_X86_64 > wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); > > - if (guest_can_use(&vmx->vcpu, X86_FEATURE_FRED)) { > + if (cpu_feature_enabled(X86_FEATURE_FRED) && > + guest_can_use(&vmx->vcpu, X86_FEATURE_FRED)) { > vmx->msr_guest_fred_rsp0 = read_msr(MSR_IA32_FRED_RSP0); > - wrmsrl(MSR_IA32_FRED_RSP0, vmx->msr_host_fred_rsp0); > + set_thread_flag(TIF_NEED_RSP0_LOAD); > } > #endif > load_fixmap_gdt(raw_smp_processor_id());