On Thu, Nov 23, 2023 at 09:38:13AM +0000, Huang, Kai wrote: > > > diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c > > index 171d86fe71ef..602b5d3982ff 100644 > > --- a/arch/x86/kernel/acpi/boot.c > > +++ b/arch/x86/kernel/acpi/boot.c > > @@ -22,6 +22,7 @@ > > #include <linux/efi-bgrt.h> > > #include <linux/serial_core.h> > > #include <linux/pgtable.h> > > +#include <linux/sched/hotplug.h> > > > > #include <asm/e820/api.h> > > #include <asm/irqdomain.h> > > @@ -33,6 +34,7 @@ > > #include <asm/smp.h> > > #include <asm/i8259.h> > > #include <asm/setup.h> > > +#include <asm/init.h> > > The above two are leftovers I believe? > > [...] > Right. > > + > > +static atomic_t waiting_for_crash_ipi; > > + > > +void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa); > > + > > +static void acpi_mp_play_dead(void) > > +{ > > + play_dead_common(); > > + asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, > > + acpi_mp_pgd); > > +} > > + > > +static void acpi_mp_cpu_die(unsigned int cpu) > > +{ > > + u32 apicid = per_cpu(x86_cpu_to_apicid, cpu); > > + unsigned long timeout; > > + > > + /* > > + * Use TEST mailbox command to prove that BIOS got control over > > + * the CPU before declaring it dead. > > + * > > + * BIOS has to clear 'command' field of the mailbox. > > + */ > > + acpi_mp_wake_mailbox->apic_id = apicid; > > + smp_store_release(&acpi_mp_wake_mailbox->command, > > + ACPI_MP_WAKE_COMMAND_TEST); > > + > > + /* Don't wait longer than a second. */ > > + timeout = USEC_PER_SEC; > > + while (READ_ONCE(acpi_mp_wake_mailbox->command) && timeout--) > > + udelay(1); > > +} > > + > > +static void acpi_mp_stop_other_cpus(int wait) > > +{ > > + smp_shutdown_nonboot_cpus(smp_processor_id()); > > +} > > + > > +static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) > > +{ > > + local_irq_disable(); > > + > > + crash_save_cpu(regs, raw_smp_processor_id()); > > + > > + cpu_emergency_stop_pt(); > > + > > + disable_local_APIC(); > > + > > + /* > > + * Prepare the CPU for reboot _after_ invoking the callback so that the > > + * callback can safely use virtualization instructions, e.g. VMCLEAR. > > + */ > > + cpu_emergency_disable_virtualization(); > > + > > + atomic_dec(&waiting_for_crash_ipi); > > + > > + asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, > > + acpi_mp_pgd); > > + > > + return NMI_HANDLED; > > +} > > + > > +static void acpi_mp_crash_stop_other_cpus(void) > > +{ > > + unsigned long timeout; > > + > > + /* The kernel is broken so disable interrupts */ > > + local_irq_disable(); > > + > > + > > + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); > > + > > + /* Would it be better to replace the trap vector here? */ > > + if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback, > > + NMI_FLAG_FIRST, "crash")) > > + return; /* Return what? */ > > + > > + apic_send_IPI_allbutself(NMI_VECTOR); > > + > > + /* Don't wait longer than a second. */ > > + timeout = USEC_PER_SEC; > > + while (atomic_read(&waiting_for_crash_ipi) && timeout--) > > + udelay(1); > > +} > > + > > > > [...] > > > + smp_ops.stop_other_cpus = acpi_mp_stop_other_cpus; > > + smp_ops.crash_stop_other_cpus = acpi_mp_crash_stop_other_cpus; > > + > > > > The above acpi_mp_crash_stop_other_cpus() and crash_nmi_callback() etc are kinda > duplicated code with the normal crash kexec() in reboot.c. > > I am not expert here but spent some time looking into the code, and it appears > the main reason preventing us from reusing that code should be TDX guest doesn't > play nicely with mwait/halt staff when putting the cpu to offline. > > I am thinking if we skip/replace them with the asm_acpi_mp_play_dead(), we > should be able to just reuse the existing smp_ops.stop_other_cpus() and > smp_ops.crash_stop_other_cpus()? Okay, fair enough. See fixup below. > > + > > int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, > > const unsigned long end) > > { > > struct acpi_madt_multiproc_wakeup *mp_wake; > > > > mp_wake = (struct acpi_madt_multiproc_wakeup *)header; > > - if (BAD_MADT_ENTRY(mp_wake, end)) > > + if (!mp_wake) > > + return -EINVAL; > > I think you can keep the BAD_MADT_ENTRY() check as a standard check, and ... No. BAD_MADT_ENTRY() will fail if the struct version is V0 because the size will be smaller than sizeof(struct acpi_madt_multiproc_wakeup). diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 4fab2ed454f3..3c8efba86d5c 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -38,6 +38,7 @@ struct smp_ops { int (*cpu_disable)(void); void (*cpu_die)(unsigned int cpu); void (*play_dead)(void); + void (*crash_play_dead)(void); void (*send_call_func_ipi)(const struct cpumask *mask); void (*send_call_func_single_ipi)(int cpu); diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c index 782fe8fd533c..a801f773f9f1 100644 --- a/arch/x86/kernel/acpi/madt_wakeup.c +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -25,6 +25,12 @@ static u64 acpi_mp_reset_vector_paddr __ro_after_init; void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa); +static void crash_acpi_mp_play_dead(void) +{ + asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, + acpi_mp_pgd); +} + static void acpi_mp_play_dead(void) { play_dead_common(); @@ -58,57 +64,6 @@ static void acpi_mp_stop_other_cpus(int wait) smp_shutdown_nonboot_cpus(smp_processor_id()); } -#ifdef CONFIG_KEXEC_CORE -static atomic_t waiting_for_crash_ipi; - -static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) -{ - local_irq_disable(); - - crash_save_cpu(regs, raw_smp_processor_id()); - - cpu_emergency_stop_pt(); - - disable_local_APIC(); - - /* - * Prepare the CPU for reboot _after_ invoking the callback so that the - * callback can safely use virtualization instructions, e.g. VMCLEAR. - */ - cpu_emergency_disable_virtualization(); - - atomic_dec(&waiting_for_crash_ipi); - - asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, - acpi_mp_pgd); - - return NMI_HANDLED; -} - -static void acpi_mp_crash_stop_other_cpus(void) -{ - unsigned long timeout; - - /* The kernel is broken so disable interrupts */ - local_irq_disable(); - - - atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); - - /* Would it be better to replace the trap vector here? */ - if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback, - NMI_FLAG_FIRST, "crash")) - return; /* Return what? */ - - apic_send_IPI_allbutself(NMI_VECTOR); - - /* Don't wait longer than a second. */ - timeout = USEC_PER_SEC; - while (atomic_read(&waiting_for_crash_ipi) && timeout--) - udelay(1); -} -#endif - /* The argument is required to match type of x86_mapping_info::alloc_pgt_page */ static void __init *alloc_pgt_page(void *dummy) { @@ -277,11 +232,9 @@ static int __init acpi_mp_setup_reset(u64 reset_vector) } smp_ops.play_dead = acpi_mp_play_dead; + smp_ops.crash_play_dead = crash_acpi_mp_play_dead; smp_ops.cpu_die = acpi_mp_cpu_die; smp_ops.stop_other_cpus = acpi_mp_stop_other_cpus; -#ifdef CONFIG_KEXEC_CORE - smp_ops.crash_stop_other_cpus = acpi_mp_crash_stop_other_cpus; -#endif acpi_mp_reset_vector_paddr = reset_vector; acpi_mp_pgd = __pa(pgd); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index c81afffaa954..99e6ab552da0 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -878,10 +878,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) cpu_emergency_disable_virtualization(); atomic_dec(&waiting_for_crash_ipi); - /* Assume hlt works */ - halt(); - for (;;) - cpu_relax(); + + if (smp_ops.crash_play_dead) { + smp_ops.crash_play_dead(); + } else { + halt(); + for (;;) + cpu_relax(); + } return NMI_HANDLED; } -- Kiryl Shutsemau / Kirill A. Shutemov _______________________________________________ kexec mailing list kexec@xxxxxxxxxxxxxxxxxxx http://lists.infradead.org/mailman/listinfo/kexec