On Sat, Dec 30, 2023 at 10:19:49AM -0600, Michael Roth wrote: > From: Ashish Kalra <ashish.kalra@xxxxxxx> > > Add a kdump safe version of sev_firmware_shutdown() registered as a > crash_kexec_post_notifier, which is invoked during panic/crash to do > SEV/SNP shutdown. This is required for transitioning all IOMMU pages > to reclaim/hypervisor state, otherwise re-init of IOMMU pages during > crashdump kernel boot fails and panics the crashdump kernel. This > panic notifier runs in atomic context, hence it ensures not to > acquire any locks/mutexes and polls for PSP command completion > instead of depending on PSP command completion interrupt. > > Signed-off-by: Ashish Kalra <ashish.kalra@xxxxxxx> > [mdr: remove use of "we" in comments] > Signed-off-by: Michael Roth <michael.roth@xxxxxxx> Cleanups ontop, see if the below works too. Especially: * I've zapped the WBINVD before the TMR pages are freed because __sev_snp_shutdown_locked() will WBINVD anyway. * The mutex_is_locked() check in snp_shutdown_on_panic() is silly because the panic notifier runs on one CPU anyway. Thx. --- diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 435ba9bc4510..27323203e593 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -227,6 +227,7 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct sn void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 snp_get_unsupported_features(u64 status); u64 sev_get_status(void); +void kdump_sev_callback(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } @@ -255,6 +256,7 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline u64 snp_get_unsupported_features(u64 status) { return 0; } static inline u64 sev_get_status(void) { return 0; } +static inline void kdump_sev_callback(void) { } #endif #ifdef CONFIG_KVM_AMD_SEV diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 23ede774d31b..64ae3a1e5c30 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -40,6 +40,7 @@ #include <asm/intel_pt.h> #include <asm/crash.h> #include <asm/cmdline.h> +#include <asm/sev.h> /* Used while preparing memory map entries for second kernel */ struct crash_memmap_data { @@ -59,12 +60,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) */ cpu_emergency_stop_pt(); - /* - * for SNP do wbinvd() on remote CPUs to - * safely do SNP_SHUTDOWN on the local CPU. - */ - if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) - wbinvd(); + kdump_sev_callback(); disable_local_APIC(); } diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index c67285824e82..dbb2cc6b5666 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -2262,3 +2262,13 @@ static int __init snp_init_platform_device(void) return 0; } device_initcall(snp_init_platform_device); + +void kdump_sev_callback(void) +{ + /* + * Do wbinvd() on remote CPUs when SNP is enabled in order to + * safely do SNP_SHUTDOWN on the the local CPU. + */ + if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + wbinvd(); +} diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 598878e760bc..c342e5e54e45 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -161,7 +161,6 @@ static int sev_wait_cmd_ioc(struct sev_device *sev, udelay(10); } - return -ETIMEDOUT; } @@ -1654,7 +1653,7 @@ static int sev_update_firmware(struct device *dev) return ret; } -static int __sev_snp_shutdown_locked(int *error, bool in_panic) +static int __sev_snp_shutdown_locked(int *error, bool panic) { struct sev_device *sev = psp_master->sev_data; struct sev_data_snp_shutdown_ex data; @@ -1673,7 +1672,7 @@ static int __sev_snp_shutdown_locked(int *error, bool in_panic) * In that case, a wbinvd() is done on remote CPUs via the NMI * callback, so only a local wbinvd() is needed here. */ - if (!in_panic) + if (!panic) wbinvd_on_all_cpus(); else wbinvd(); @@ -2199,26 +2198,13 @@ int sev_dev_init(struct psp_device *psp) return ret; } -static void __sev_firmware_shutdown(struct sev_device *sev, bool in_panic) +static void __sev_firmware_shutdown(struct sev_device *sev, bool panic) { int error; __sev_platform_shutdown_locked(NULL); if (sev_es_tmr) { - /* - * The TMR area was encrypted, flush it from the cache - * - * If invoked during panic handling, local interrupts are - * disabled and all CPUs are stopped, so wbinvd_on_all_cpus() - * can't be used. In that case, wbinvd() is done on remote CPUs - * via the NMI callback, so a local wbinvd() is sufficient here. - */ - if (!in_panic) - wbinvd_on_all_cpus(); - else - wbinvd(); - __snp_free_firmware_pages(virt_to_page(sev_es_tmr), get_order(sev_es_tmr_size), true); @@ -2237,7 +2223,7 @@ static void __sev_firmware_shutdown(struct sev_device *sev, bool in_panic) snp_range_list = NULL; } - __sev_snp_shutdown_locked(&error, in_panic); + __sev_snp_shutdown_locked(&error, panic); } static void sev_firmware_shutdown(struct sev_device *sev) @@ -2262,26 +2248,18 @@ void sev_dev_destroy(struct psp_device *psp) psp_clear_sev_irq_handler(psp); } -static int sev_snp_shutdown_on_panic(struct notifier_block *nb, - unsigned long reason, void *arg) +static int snp_shutdown_on_panic(struct notifier_block *nb, + unsigned long reason, void *arg) { struct sev_device *sev = psp_master->sev_data; - /* - * Panic callbacks are executed with all other CPUs stopped, - * so don't wait for sev_cmd_mutex to be released since it - * would block here forever. - */ - if (mutex_is_locked(&sev_cmd_mutex)) - return NOTIFY_DONE; - __sev_firmware_shutdown(sev, true); return NOTIFY_DONE; } -static struct notifier_block sev_snp_panic_notifier = { - .notifier_call = sev_snp_shutdown_on_panic, +static struct notifier_block snp_panic_notifier = { + .notifier_call = snp_shutdown_on_panic, }; int sev_issue_cmd_external_user(struct file *filep, unsigned int cmd, @@ -2322,7 +2300,7 @@ void sev_pci_init(void) "-SNP" : "", sev->api_major, sev->api_minor, sev->build); atomic_notifier_chain_register(&panic_notifier_list, - &sev_snp_panic_notifier); + &snp_panic_notifier); return; err: @@ -2339,5 +2317,5 @@ void sev_pci_exit(void) sev_firmware_shutdown(sev); atomic_notifier_chain_unregister(&panic_notifier_list, - &sev_snp_panic_notifier); + &snp_panic_notifier); } -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette