On Sun, Jan 21, 2024 at 12:49:00PM +0100, Borislav Petkov wrote: > On Sat, Dec 30, 2023 at 10:19:49AM -0600, Michael Roth wrote: > > From: Ashish Kalra <ashish.kalra@xxxxxxx> > > > > Add a kdump safe version of sev_firmware_shutdown() registered as a > > crash_kexec_post_notifier, which is invoked during panic/crash to do > > SEV/SNP shutdown. This is required for transitioning all IOMMU pages > > to reclaim/hypervisor state, otherwise re-init of IOMMU pages during > > crashdump kernel boot fails and panics the crashdump kernel. This > > panic notifier runs in atomic context, hence it ensures not to > > acquire any locks/mutexes and polls for PSP command completion > > instead of depending on PSP command completion interrupt. > > > > Signed-off-by: Ashish Kalra <ashish.kalra@xxxxxxx> > > [mdr: remove use of "we" in comments] > > Signed-off-by: Michael Roth <michael.roth@xxxxxxx> > > Cleanups ontop, see if the below works too. Especially: > > * I've zapped the WBINVD before the TMR pages are freed because > __sev_snp_shutdown_locked() will WBINVD anyway. As Ashish mentioned the wbinvd_on_all_cpus() is still needed for general TMR handling even outside of panic/SNP, but I think you're right about the panic==true where the handling is SNP-specific and so the wbinvd() that gets called later during SNP shutdown will take care of it, so I've adjusted things accordingly. > > * The mutex_is_locked() check in snp_shutdown_on_panic() is silly > because the panic notifier runs on one CPU anyway. Based on discussion with Ashish I've updated the comments in v2 regarding this to make it a little clearer why it might still be a good idea to keep that in to avoid weird unexpected behavior if we try to issue PSP commands while another one was already in-flight by another task before the panic. But I squashed in all the other changes here as-is. -Mike > > Thx. > > --- > > diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h > index 435ba9bc4510..27323203e593 100644 > --- a/arch/x86/include/asm/sev.h > +++ b/arch/x86/include/asm/sev.h > @@ -227,6 +227,7 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct sn > void snp_accept_memory(phys_addr_t start, phys_addr_t end); > u64 snp_get_unsupported_features(u64 status); > u64 sev_get_status(void); > +void kdump_sev_callback(void); > #else > static inline void sev_es_ist_enter(struct pt_regs *regs) { } > static inline void sev_es_ist_exit(void) { } > @@ -255,6 +256,7 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in > static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } > static inline u64 snp_get_unsupported_features(u64 status) { return 0; } > static inline u64 sev_get_status(void) { return 0; } > +static inline void kdump_sev_callback(void) { } > #endif > > #ifdef CONFIG_KVM_AMD_SEV > diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c > index 23ede774d31b..64ae3a1e5c30 100644 > --- a/arch/x86/kernel/crash.c > +++ b/arch/x86/kernel/crash.c > @@ -40,6 +40,7 @@ > #include <asm/intel_pt.h> > #include <asm/crash.h> > #include <asm/cmdline.h> > +#include <asm/sev.h> > > /* Used while preparing memory map entries for second kernel */ > struct crash_memmap_data { > @@ -59,12 +60,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) > */ > cpu_emergency_stop_pt(); > > - /* > - * for SNP do wbinvd() on remote CPUs to > - * safely do SNP_SHUTDOWN on the local CPU. > - */ > - if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) > - wbinvd(); > + kdump_sev_callback(); > > disable_local_APIC(); > } > diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c > index c67285824e82..dbb2cc6b5666 100644 > --- a/arch/x86/kernel/sev.c > +++ b/arch/x86/kernel/sev.c > @@ -2262,3 +2262,13 @@ static int __init snp_init_platform_device(void) > return 0; > } > device_initcall(snp_init_platform_device); > + > +void kdump_sev_callback(void) > +{ > + /* > + * Do wbinvd() on remote CPUs when SNP is enabled in order to > + * safely do SNP_SHUTDOWN on the the local CPU. > + */ > + if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) > + wbinvd(); > +} > diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c > index 598878e760bc..c342e5e54e45 100644 > --- a/drivers/crypto/ccp/sev-dev.c > +++ b/drivers/crypto/ccp/sev-dev.c > @@ -161,7 +161,6 @@ static int sev_wait_cmd_ioc(struct sev_device *sev, > > udelay(10); > } > - > return -ETIMEDOUT; > } > > @@ -1654,7 +1653,7 @@ static int sev_update_firmware(struct device *dev) > return ret; > } > > -static int __sev_snp_shutdown_locked(int *error, bool in_panic) > +static int __sev_snp_shutdown_locked(int *error, bool panic) > { > struct sev_device *sev = psp_master->sev_data; > struct sev_data_snp_shutdown_ex data; > @@ -1673,7 +1672,7 @@ static int __sev_snp_shutdown_locked(int *error, bool in_panic) > * In that case, a wbinvd() is done on remote CPUs via the NMI > * callback, so only a local wbinvd() is needed here. > */ > - if (!in_panic) > + if (!panic) > wbinvd_on_all_cpus(); > else > wbinvd(); > @@ -2199,26 +2198,13 @@ int sev_dev_init(struct psp_device *psp) > return ret; > } > > -static void __sev_firmware_shutdown(struct sev_device *sev, bool in_panic) > +static void __sev_firmware_shutdown(struct sev_device *sev, bool panic) > { > int error; > > __sev_platform_shutdown_locked(NULL); > > if (sev_es_tmr) { > - /* > - * The TMR area was encrypted, flush it from the cache > - * > - * If invoked during panic handling, local interrupts are > - * disabled and all CPUs are stopped, so wbinvd_on_all_cpus() > - * can't be used. In that case, wbinvd() is done on remote CPUs > - * via the NMI callback, so a local wbinvd() is sufficient here. > - */ > - if (!in_panic) > - wbinvd_on_all_cpus(); > - else > - wbinvd(); > - > __snp_free_firmware_pages(virt_to_page(sev_es_tmr), > get_order(sev_es_tmr_size), > true); > @@ -2237,7 +2223,7 @@ static void __sev_firmware_shutdown(struct sev_device *sev, bool in_panic) > snp_range_list = NULL; > } > > - __sev_snp_shutdown_locked(&error, in_panic); > + __sev_snp_shutdown_locked(&error, panic); > } > > static void sev_firmware_shutdown(struct sev_device *sev) > @@ -2262,26 +2248,18 @@ void sev_dev_destroy(struct psp_device *psp) > psp_clear_sev_irq_handler(psp); > } > > -static int sev_snp_shutdown_on_panic(struct notifier_block *nb, > - unsigned long reason, void *arg) > +static int snp_shutdown_on_panic(struct notifier_block *nb, > + unsigned long reason, void *arg) > { > struct sev_device *sev = psp_master->sev_data; > > - /* > - * Panic callbacks are executed with all other CPUs stopped, > - * so don't wait for sev_cmd_mutex to be released since it > - * would block here forever. > - */ > - if (mutex_is_locked(&sev_cmd_mutex)) > - return NOTIFY_DONE; > - > __sev_firmware_shutdown(sev, true); > > return NOTIFY_DONE; > } > > -static struct notifier_block sev_snp_panic_notifier = { > - .notifier_call = sev_snp_shutdown_on_panic, > +static struct notifier_block snp_panic_notifier = { > + .notifier_call = snp_shutdown_on_panic, > }; > > int sev_issue_cmd_external_user(struct file *filep, unsigned int cmd, > @@ -2322,7 +2300,7 @@ void sev_pci_init(void) > "-SNP" : "", sev->api_major, sev->api_minor, sev->build); > > atomic_notifier_chain_register(&panic_notifier_list, > - &sev_snp_panic_notifier); > + &snp_panic_notifier); > return; > > err: > @@ -2339,5 +2317,5 @@ void sev_pci_exit(void) > sev_firmware_shutdown(sev); > > atomic_notifier_chain_unregister(&panic_notifier_list, > - &sev_snp_panic_notifier); > + &snp_panic_notifier); > } > > -- > Regards/Gruss, > Boris. > > https://people.kernel.org/tglx/notes-about-netiquette >