From: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx> Invoke pvm_early_setup() after idt_setup_early_handler() to enable early kernel event delivery. Also, modify cpu_init_exception_handling() to call pvm_setup_event_handling() in order to enable event delivery for the current CPU. Additionally, for the syscall event, change MSR_LSTAR to PVM specific entry. Signed-off-by: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx> Signed-off-by: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx> --- arch/x86/entry/entry_64.S | 9 ++++++-- arch/x86/include/asm/pvm_para.h | 5 +++++ arch/x86/kernel/cpu/common.c | 11 ++++++++++ arch/x86/kernel/head64.c | 3 +++ arch/x86/kernel/idt.c | 2 ++ arch/x86/kernel/pvm.c | 37 +++++++++++++++++++++++++++++++++ 6 files changed, 65 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 5b25ea4a16ae..fe12605b3c05 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -124,10 +124,12 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) * a completely clean 64-bit userspace context. If we're not, * go to the slow exit path. * In the Xen PV case we must use iret anyway. + * In the PVM guest case we must use eretu synthetic instruction. */ - ALTERNATIVE "testb %al, %al; jz swapgs_restore_regs_and_return_to_usermode", \ - "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV + ALTERNATIVE_2 "testb %al, %al; jz swapgs_restore_regs_and_return_to_usermode", \ + "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV, \ + "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_KVM_PVM_GUEST /* * We win! This label is here just for ease of understanding @@ -597,6 +599,9 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) #ifdef CONFIG_XEN_PV ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV #endif +#ifdef CONFIG_PVM_GUEST + ALTERNATIVE "", "jmp pvm_restore_regs_and_return_to_usermode", X86_FEATURE_KVM_PVM_GUEST +#endif POP_REGS pop_rdi=0 diff --git a/arch/x86/include/asm/pvm_para.h b/arch/x86/include/asm/pvm_para.h index 72c74545dba6..f5d40a57c423 100644 --- a/arch/x86/include/asm/pvm_para.h +++ b/arch/x86/include/asm/pvm_para.h @@ -15,6 +15,7 @@ typedef void (*idtentry_t)(struct pt_regs *regs); void __init pvm_early_setup(void); void __init pvm_setup_early_traps(void); void __init pvm_install_sysvec(unsigned int sysvec, idtentry_t handler); +void pvm_setup_event_handling(void); bool __init pvm_kernel_layout_relocate(void); static inline void pvm_cpuid(unsigned int *eax, unsigned int *ebx, @@ -79,6 +80,10 @@ static inline void pvm_install_sysvec(unsigned int sysvec, idtentry_t handler) { } +static inline void pvm_setup_event_handling(void) +{ +} + static inline bool pvm_kernel_layout_relocate(void) { return false; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 45f214e41a9a..89874559dbc2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -66,6 +66,7 @@ #include <asm/set_memory.h> #include <asm/traps.h> #include <asm/sev.h> +#include <asm/pvm_para.h> #include "cpu.h" @@ -2066,7 +2067,15 @@ static void wrmsrl_cstar(unsigned long val) void syscall_init(void) { wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); + +#ifdef CONFIG_PVM_GUEST + if (boot_cpu_has(X86_FEATURE_KVM_PVM_GUEST)) + wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64_pvm); + else + wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); +#else wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); +#endif if (ia32_enabled()) { wrmsrl_cstar((unsigned long)entry_SYSCALL_compat); @@ -2217,6 +2226,8 @@ void cpu_init_exception_handling(void) /* Finally load the IDT */ load_current_idt(); + + pvm_setup_event_handling(); } /* diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index d0e8d648bd38..17cd11dd1f03 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -42,6 +42,7 @@ #include <asm/sev.h> #include <asm/tdx.h> #include <asm/init.h> +#include <asm/pvm_para.h> /* * Manage page tables very early on. @@ -286,6 +287,8 @@ asmlinkage __visible void __init __noreturn x86_64_start_kernel(char * real_mode idt_setup_early_handler(); + pvm_early_setup(); + /* Needed before cc_platform_has() can be used for TDX */ tdx_early_init(); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 660b601f1d6c..0dc3ded6da01 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -12,6 +12,7 @@ #include <asm/hw_irq.h> #include <asm/ia32.h> #include <asm/idtentry.h> +#include <asm/pvm_para.h> #define DPL0 0x0 #define DPL3 0x3 @@ -259,6 +260,7 @@ void __init idt_setup_early_pf(void) { idt_setup_from_table(idt_table, early_pf_idts, ARRAY_SIZE(early_pf_idts), true); + pvm_setup_early_traps(); } #endif diff --git a/arch/x86/kernel/pvm.c b/arch/x86/kernel/pvm.c index 352d74394c4a..c38e46a96ad3 100644 --- a/arch/x86/kernel/pvm.c +++ b/arch/x86/kernel/pvm.c @@ -286,12 +286,49 @@ __visible noinstr void pvm_event(struct pt_regs *regs) common_interrupt(regs, vector); } +extern void pvm_early_kernel_event_entry(void); + +/* + * Reserve a fixed-size area in the current stack during an event from + * supervisor mode. This is for the int3 handler to emulate a call instruction. + */ +#define PVM_SUPERVISOR_REDZONE_SIZE (2*8UL) + void __init pvm_early_setup(void) { if (!pvm_range_end) return; setup_force_cpu_cap(X86_FEATURE_KVM_PVM_GUEST); + + wrmsrl(MSR_PVM_VCPU_STRUCT, __pa(this_cpu_ptr(&pvm_vcpu_struct))); + wrmsrl(MSR_PVM_EVENT_ENTRY, (unsigned long)(void *)pvm_early_kernel_event_entry - 256); + wrmsrl(MSR_PVM_SUPERVISOR_REDZONE, PVM_SUPERVISOR_REDZONE_SIZE); + wrmsrl(MSR_PVM_RETS_RIP, (unsigned long)(void *)pvm_rets_rip); +} + +void pvm_setup_event_handling(void) +{ + if (boot_cpu_has(X86_FEATURE_KVM_PVM_GUEST)) { + u64 xpa = slow_virt_to_phys(this_cpu_ptr(&pvm_vcpu_struct)); + + wrmsrl(MSR_PVM_VCPU_STRUCT, xpa); + wrmsrl(MSR_PVM_EVENT_ENTRY, (unsigned long)(void *)pvm_user_event_entry); + wrmsrl(MSR_PVM_SUPERVISOR_REDZONE, PVM_SUPERVISOR_REDZONE_SIZE); + wrmsrl(MSR_PVM_RETU_RIP, (unsigned long)(void *)pvm_retu_rip); + wrmsrl(MSR_PVM_RETS_RIP, (unsigned long)(void *)pvm_rets_rip); + + /* + * PVM spec requires the hypervisor-maintained + * MSR_KERNEL_GS_BASE to be the same as the kernel GSBASE for + * event delivery for user mode. wrmsrl(MSR_KERNEL_GS_BASE) + * accesses only the user GSBASE in the PVCS via + * pvm_write_msr() without hypervisor involved, so use + * PVM_HC_WRMSR instead. + */ + pvm_hypercall2(PVM_HC_WRMSR, MSR_KERNEL_GS_BASE, + cpu_kernelmode_gs_base(smp_processor_id())); + } } #define TB_SHIFT 40 -- 2.19.1.6.gb485710b