On Mon, Oct 21, 2024 at 5:17 PM Zhang Rui <rui.zhang@xxxxxxxxx> wrote: > > On some systems, the same CPU (with the same APIC ID) is assigned a > different logical CPU id after commit ec9aedb2aa1a ("x86/acpi: Ignore > invalid x2APIC entries"). > > This means that Linux enumerates the CPUs in a different order, which > violates ACPI specification[1] that states: > > "OSPM should initialize processors in the order that they appear in > the MADT" > > The problematic commit parses all LAPIC entries before any x2APIC > entries, aiming to ignore x2APIC entries with APIC ID < 255 when valid > LAPIC entries exist. However, it disrupts the CPU enumeration order on > systems where x2APIC entries precede LAPIC entries in the MADT. > > Fix the problem by separately checking LAPIC entries before parsing any > LAPIC or x2APIC entries. > > 1. https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#madt-processor-local-apic-sapic-structure-entry-order > > Cc: stable@xxxxxxxxxxxxxxx > Reported-by: Jim Mattson <jmattson@xxxxxxxxxx> > Closes: https://lore.kernel.org/all/20241010213136.668672-1-jmattson@xxxxxxxxxx/ > Fixes: ec9aedb2aa1a ("x86/acpi: Ignore invalid x2APIC entries") > Signed-off-by: Zhang Rui <rui.zhang@xxxxxxxxx> > Reviewed-by: Jim Mattson <jmattson@xxxxxxxxxx> > Tested-by: Jim Mattson <jmattson@xxxxxxxxxx> > --- > arch/x86/kernel/acpi/boot.c | 50 +++++++++++++++++++++++++++++++++---- > 1 file changed, 45 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c > index 4efecac49863..c70b86f1f295 100644 > --- a/arch/x86/kernel/acpi/boot.c > +++ b/arch/x86/kernel/acpi/boot.c > @@ -226,6 +226,28 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end) > return 0; > } > > +static int __init > +acpi_check_lapic(union acpi_subtable_headers *header, const unsigned long end) > +{ > + struct acpi_madt_local_apic *processor = NULL; > + > + processor = (struct acpi_madt_local_apic *)header; > + > + if (BAD_MADT_ENTRY(processor, end)) > + return -EINVAL; > + > + /* Ignore invalid ID */ > + if (processor->id == 0xff) > + return 0; > + > + /* Ignore processors that can not be onlined */ > + if (!acpi_is_processor_usable(processor->lapic_flags)) > + return 0; > + > + has_lapic_cpus = true; > + return 0; > +} > + > static int __init > acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) > { > @@ -257,7 +279,6 @@ acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) > processor->processor_id, /* ACPI ID */ > processor->lapic_flags & ACPI_MADT_ENABLED); > > - has_lapic_cpus = true; > return 0; > } > > @@ -1029,6 +1050,8 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) > static int __init acpi_parse_madt_lapic_entries(void) > { > int count, x2count = 0; > + struct acpi_subtable_proc madt_proc[2]; > + int ret; > > if (!boot_cpu_has(X86_FEATURE_APIC)) > return -ENODEV; > @@ -1037,10 +1060,27 @@ static int __init acpi_parse_madt_lapic_entries(void) > acpi_parse_sapic, MAX_LOCAL_APIC); > > if (!count) { > - count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, > - acpi_parse_lapic, MAX_LOCAL_APIC); > - x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, > - acpi_parse_x2apic, MAX_LOCAL_APIC); > + /* Check if there are valid LAPIC entries */ > + acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_check_lapic, MAX_LOCAL_APIC); > + > + /* > + * Enumerate the APIC IDs in the order that they appear in the > + * MADT, no matter LAPIC entry or x2APIC entry is used. > + */ > + memset(madt_proc, 0, sizeof(madt_proc)); > + madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC; > + madt_proc[0].handler = acpi_parse_lapic; > + madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC; > + madt_proc[1].handler = acpi_parse_x2apic; > + ret = acpi_table_parse_entries_array(ACPI_SIG_MADT, > + sizeof(struct acpi_table_madt), > + madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC); > + if (ret < 0) { > + pr_err("Error parsing LAPIC/X2APIC entries\n"); > + return ret; > + } > + count = madt_proc[0].count; > + x2count = madt_proc[1].count; > } > if (!count && !x2count) { > pr_err("No LAPIC entries present\n"); > -- > 2.34.1 > Does anyone have any thoughts on this patch?