From: Tianyu Lan <ltykernel@xxxxxxxxx> Sent: Monday, April 3, 2023 10:44 AM > The patch subject prefix of "x86/hyperv/sev:" doesn't make sense. There's no pathname like that in the kernel code. I think it should just be "x86/sev:". > Enable #HV exception to handle interrupt requests from hypervisor. > > Co-developed-by: Lendacky Thomas <thomas.lendacky@xxxxxxx> > Co-developed-by: Kalra Ashish <ashish.kalra@xxxxxxx> > Signed-off-by: Tianyu Lan <tiala@xxxxxxxxxxxxx> > --- > Change since RFC V3: > * Check NMI event when irq is disabled. > * Remove redundant variable > --- > arch/x86/include/asm/mem_encrypt.h | 2 + > arch/x86/include/uapi/asm/svm.h | 4 + > arch/x86/kernel/sev.c | 314 ++++++++++++++++++++++++----- > arch/x86/kernel/traps.c | 2 + > 4 files changed, 266 insertions(+), 56 deletions(-) > > diff --git a/arch/x86/include/asm/mem_encrypt.h > b/arch/x86/include/asm/mem_encrypt.h > index b7126701574c..9299caeca69f 100644 > --- a/arch/x86/include/asm/mem_encrypt.h > +++ b/arch/x86/include/asm/mem_encrypt.h > @@ -50,6 +50,7 @@ void __init early_set_mem_enc_dec_hypercall(unsigned long > vaddr, int npages, > void __init mem_encrypt_free_decrypted_mem(void); > > void __init sev_es_init_vc_handling(void); > +void __init sev_snp_init_hv_handling(void); > > #define __bss_decrypted __section(".bss..decrypted") > > @@ -73,6 +74,7 @@ static inline void __init sme_encrypt_kernel(struct boot_params > *bp) { } > static inline void __init sme_enable(struct boot_params *bp) { } > > static inline void sev_es_init_vc_handling(void) { } > +static inline void sev_snp_init_hv_handling(void) { } > > static inline int __init > early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; } > diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h > index 80e1df482337..828d624a38cf 100644 > --- a/arch/x86/include/uapi/asm/svm.h > +++ b/arch/x86/include/uapi/asm/svm.h > @@ -115,6 +115,10 @@ > #define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 > #define SVM_VMGEXIT_AP_CREATE 1 > #define SVM_VMGEXIT_AP_DESTROY 2 > +#define SVM_VMGEXIT_HV_DOORBELL_PAGE 0x80000014 > +#define SVM_VMGEXIT_GET_PREFERRED_HV_DOORBELL_PAGE 0 > +#define SVM_VMGEXIT_SET_HV_DOORBELL_PAGE 1 > +#define SVM_VMGEXIT_QUERY_HV_DOORBELL_PAGE 2 > #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd > #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe > #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ > diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c > index 6445f5356c45..7fcb3b548215 100644 > --- a/arch/x86/kernel/sev.c > +++ b/arch/x86/kernel/sev.c > @@ -122,6 +122,152 @@ struct sev_config { > > static struct sev_config sev_cfg __read_mostly; > > +static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state); > +static noinstr void __sev_put_ghcb(struct ghcb_state *state); > +static int vmgexit_hv_doorbell_page(struct ghcb *ghcb, u64 op, u64 pa); > +static void sev_snp_setup_hv_doorbell_page(struct ghcb *ghcb); > + > +union hv_pending_events { > + u16 events; > + struct { > + u8 vector; > + u8 nmi : 1; > + u8 mc : 1; > + u8 reserved1 : 5; > + u8 no_further_signal : 1; > + }; > +}; > + > +struct sev_hv_doorbell_page { > + union hv_pending_events pending_events; > + u8 no_eoi_required; > + u8 reserved2[61]; > + u8 padding[4032]; > +}; > + > +struct sev_snp_runtime_data { > + struct sev_hv_doorbell_page hv_doorbell_page; > +}; > + > +static DEFINE_PER_CPU(struct sev_snp_runtime_data*, snp_runtime_data); > + > +static inline u64 sev_es_rd_ghcb_msr(void) > +{ > + return __rdmsr(MSR_AMD64_SEV_ES_GHCB); > +} > + > +static __always_inline void sev_es_wr_ghcb_msr(u64 val) > +{ > + u32 low, high; > + > + low = (u32)(val); > + high = (u32)(val >> 32); > + > + native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); > +} > + > +struct sev_hv_doorbell_page *sev_snp_current_doorbell_page(void) > +{ > + return &this_cpu_read(snp_runtime_data)->hv_doorbell_page; > +} > + > +static u8 sev_hv_pending(void) > +{ > + return sev_snp_current_doorbell_page()->pending_events.events; > +} > + > +#define sev_hv_pending_nmi \ > + sev_snp_current_doorbell_page()->pending_events.nmi > + > +static void hv_doorbell_apic_eoi_write(u32 reg, u32 val) > +{ > + if (xchg(&sev_snp_current_doorbell_page()->no_eoi_required, 0) & 0x1) > + return; > + > + BUG_ON(reg != APIC_EOI); > + apic->write(reg, val); > +} > + > +static void do_exc_hv(struct pt_regs *regs) > +{ > + union hv_pending_events pending_events; > + > + while (sev_hv_pending()) { > + pending_events.events = xchg( > + &sev_snp_current_doorbell_page()->pending_events.events, > + 0); > + > + if (pending_events.nmi) > + exc_nmi(regs); > + > +#ifdef CONFIG_X86_MCE > + if (pending_events.mc) > + exc_machine_check(regs); > +#endif > + > + if (!pending_events.vector) > + return; > + > + if (pending_events.vector < FIRST_EXTERNAL_VECTOR) { > + /* Exception vectors */ > + WARN(1, "exception shouldn't happen\n"); > + } else if (pending_events.vector == FIRST_EXTERNAL_VECTOR) { > + sysvec_irq_move_cleanup(regs); > + } else if (pending_events.vector == IA32_SYSCALL_VECTOR) { > + WARN(1, "syscall shouldn't happen\n"); > + } else if (pending_events.vector >= FIRST_SYSTEM_VECTOR) { > + switch (pending_events.vector) { > +#if IS_ENABLED(CONFIG_HYPERV) > + case HYPERV_STIMER0_VECTOR: > + sysvec_hyperv_stimer0(regs); > + break; > + case HYPERVISOR_CALLBACK_VECTOR: > + sysvec_hyperv_callback(regs); > + break; > +#endif > +#ifdef CONFIG_SMP > + case RESCHEDULE_VECTOR: > + sysvec_reschedule_ipi(regs); > + break; > + case IRQ_MOVE_CLEANUP_VECTOR: > + sysvec_irq_move_cleanup(regs); > + break; > + case REBOOT_VECTOR: > + sysvec_reboot(regs); > + break; > + case CALL_FUNCTION_SINGLE_VECTOR: > + sysvec_call_function_single(regs); > + break; > + case CALL_FUNCTION_VECTOR: > + sysvec_call_function(regs); > + break; > +#endif > +#ifdef CONFIG_X86_LOCAL_APIC > + case ERROR_APIC_VECTOR: > + sysvec_error_interrupt(regs); > + break; > + case SPURIOUS_APIC_VECTOR: > + sysvec_spurious_apic_interrupt(regs); > + break; > + case LOCAL_TIMER_VECTOR: > + sysvec_apic_timer_interrupt(regs); > + break; > + case X86_PLATFORM_IPI_VECTOR: > + sysvec_x86_platform_ipi(regs); > + break; > +#endif > + case 0x0: > + break; > + default: > + panic("Unexpected vector %d\n", vector); > + unreachable(); > + } > + } else { > + common_interrupt(regs, pending_events.vector); > + } > + } > +} > + > static __always_inline bool on_vc_stack(struct pt_regs *regs) > { > unsigned long sp = regs->sp; > @@ -179,18 +325,19 @@ void noinstr __sev_es_ist_enter(struct pt_regs *regs) > this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist); > } > > -static void do_exc_hv(struct pt_regs *regs) > -{ > - /* Handle #HV exception. */ > -} > - > void check_hv_pending(struct pt_regs *regs) > { > if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) > return; > > - if ((regs->flags & X86_EFLAGS_IF) == 0) > + /* Handle NMI when irq is disabled. */ > + if ((regs->flags & X86_EFLAGS_IF) == 0) { > + if (sev_hv_pending_nmi) { > + exc_nmi(regs); > + sev_hv_pending_nmi = 0; > + } > return; > + } > > do_exc_hv(regs); > } > @@ -231,68 +378,35 @@ void noinstr __sev_es_ist_exit(void) > this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); > } > > -/* > - * Nothing shall interrupt this code path while holding the per-CPU > - * GHCB. The backup GHCB is only for NMIs interrupting this path. > - * > - * Callers must disable local interrupts around it. > - */ > -static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) > +static bool sev_restricted_injection_enabled(void) > +{ > + return sev_status & MSR_AMD64_SNP_RESTRICTED_INJ; > +} > + > +void __init sev_snp_init_hv_handling(void) > { > struct sev_es_runtime_data *data; > + struct ghcb_state state; > struct ghcb *ghcb; > + unsigned long flags; > > WARN_ON(!irqs_disabled()); > + if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP) || > !sev_restricted_injection_enabled()) > + return; > > data = this_cpu_read(runtime_data); > - ghcb = &data->ghcb_page; > - > - if (unlikely(data->ghcb_active)) { > - /* GHCB is already in use - save its contents */ > - > - if (unlikely(data->backup_ghcb_active)) { > - /* > - * Backup-GHCB is also already in use. There is no way > - * to continue here so just kill the machine. To make > - * panic() work, mark GHCBs inactive so that messages > - * can be printed out. > - */ > - data->ghcb_active = false; > - data->backup_ghcb_active = false; > - > - instrumentation_begin(); > - panic("Unable to handle #VC exception! GHCB and Backup > GHCB are already in use"); > - instrumentation_end(); > - } > - > - /* Mark backup_ghcb active before writing to it */ > - data->backup_ghcb_active = true; > > - state->ghcb = &data->backup_ghcb; > + local_irq_save(flags); > > - /* Backup GHCB content */ > - *state->ghcb = *ghcb; > - } else { > - state->ghcb = NULL; > - data->ghcb_active = true; > - } > + ghcb = __sev_get_ghcb(&state); > > - return ghcb; > -} > + sev_snp_setup_hv_doorbell_page(ghcb); > > -static inline u64 sev_es_rd_ghcb_msr(void) > -{ > - return __rdmsr(MSR_AMD64_SEV_ES_GHCB); > -} > - > -static __always_inline void sev_es_wr_ghcb_msr(u64 val) > -{ > - u32 low, high; > + __sev_put_ghcb(&state); > > - low = (u32)(val); > - high = (u32)(val >> 32); > + apic_set_eoi_write(hv_doorbell_apic_eoi_write); > > - native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); > + local_irq_restore(flags); > } > > static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, > @@ -553,6 +667,69 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb > *ghcb, struct es_em_ctxt > /* Include code shared with pre-decompression boot stage */ > #include "sev-shared.c" > > +/* > + * Nothing shall interrupt this code path while holding the per-CPU > + * GHCB. The backup GHCB is only for NMIs interrupting this path. > + * > + * Callers must disable local interrupts around it. > + */ > +static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) > +{ > + struct sev_es_runtime_data *data; > + struct ghcb *ghcb; > + > + WARN_ON(!irqs_disabled()); > + > + data = this_cpu_read(runtime_data); > + ghcb = &data->ghcb_page; > + > + if (unlikely(data->ghcb_active)) { > + /* GHCB is already in use - save its contents */ > + > + if (unlikely(data->backup_ghcb_active)) { > + /* > + * Backup-GHCB is also already in use. There is no way > + * to continue here so just kill the machine. To make > + * panic() work, mark GHCBs inactive so that messages > + * can be printed out. > + */ > + data->ghcb_active = false; > + data->backup_ghcb_active = false; > + > + instrumentation_begin(); > + panic("Unable to handle #VC exception! GHCB and Backup > GHCB are already in use"); > + instrumentation_end(); > + } > + > + /* Mark backup_ghcb active before writing to it */ > + data->backup_ghcb_active = true; > + > + state->ghcb = &data->backup_ghcb; > + > + /* Backup GHCB content */ > + *state->ghcb = *ghcb; > + } else { > + state->ghcb = NULL; > + data->ghcb_active = true; > + } > + > + return ghcb; > +} > + > +static void sev_snp_setup_hv_doorbell_page(struct ghcb *ghcb) > +{ > + u64 pa; > + enum es_result ret; > + > + pa = __pa(sev_snp_current_doorbell_page()); > + vc_ghcb_invalidate(ghcb); > + ret = vmgexit_hv_doorbell_page(ghcb, > + SVM_VMGEXIT_SET_HV_DOORBELL_PAGE, > + pa); > + if (ret != ES_OK) > + panic("SEV-SNP: failed to set up #HV doorbell page"); > +} > + > static noinstr void __sev_put_ghcb(struct ghcb_state *state) > { > struct sev_es_runtime_data *data; > @@ -1281,6 +1458,7 @@ static void snp_register_per_cpu_ghcb(void) > ghcb = &data->ghcb_page; > > snp_register_ghcb_early(__pa(ghcb)); > + sev_snp_setup_hv_doorbell_page(ghcb); > } > > void setup_ghcb(void) > @@ -1320,6 +1498,11 @@ void setup_ghcb(void) > snp_register_ghcb_early(__pa(&boot_ghcb_page)); > } > > +int vmgexit_hv_doorbell_page(struct ghcb *ghcb, u64 op, u64 pa) > +{ > + return sev_es_ghcb_hv_call(ghcb, NULL, SVM_VMGEXIT_HV_DOORBELL_PAGE, > op, pa); > +} > + > #ifdef CONFIG_HOTPLUG_CPU > static void sev_es_ap_hlt_loop(void) > { > @@ -1393,6 +1576,7 @@ static void __init alloc_runtime_data(int cpu) > static void __init init_ghcb(int cpu) > { > struct sev_es_runtime_data *data; > + struct sev_snp_runtime_data *snp_data; > int err; > > data = per_cpu(runtime_data, cpu); > @@ -1404,6 +1588,19 @@ static void __init init_ghcb(int cpu) > > memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); > > + snp_data = memblock_alloc(sizeof(*snp_data), PAGE_SIZE); > + if (!snp_data) > + panic("Can't allocate SEV-SNP runtime data"); > + > + err = early_set_memory_decrypted((unsigned long)&snp_data- > >hv_doorbell_page, > + sizeof(snp_data->hv_doorbell_page)); > + if (err) > + panic("Can't map #HV doorbell pages unencrypted"); > + > + memset(&snp_data->hv_doorbell_page, 0, sizeof(snp_data- > >hv_doorbell_page)); > + > + per_cpu(snp_runtime_data, cpu) = snp_data; > + > data->ghcb_active = false; > data->backup_ghcb_active = false; > } > @@ -2044,7 +2241,12 @@ DEFINE_IDTENTRY_VC_USER(exc_vmm_communication) > > static bool hv_raw_handle_exception(struct pt_regs *regs) > { > - return false; > + /* Clear the no_further_signal bit */ > + sev_snp_current_doorbell_page()->pending_events.events &= 0x7fff; > + > + check_hv_pending(regs); > + > + return true; > } > > static __always_inline bool on_hv_fallback_stack(struct pt_regs *regs) > diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c > index d29debec8134..1aa6cab2394b 100644 > --- a/arch/x86/kernel/traps.c > +++ b/arch/x86/kernel/traps.c > @@ -1503,5 +1503,7 @@ void __init trap_init(void) > cpu_init_exception_handling(); > /* Setup traps as cpu_init() might #GP */ > idt_setup_traps(); > + sev_snp_init_hv_handling(); > + > cpu_init(); > } > -- > 2.25.1