On Friday, January 6, 2023 11:04:49 PM CET Bjorn Helgaas wrote: > On Fri, Jan 06, 2023 at 09:37:06PM +0000, Luck, Tony wrote: > > > Thanks, Tony! Something is wrong with the EFI MMIO removal > > > (obviously), but I don't see what it is. Could you try the patch > > > below (replacement for previous one, with more debug)? > > > > Swapped out old patch for this new one. Booted with efi-debug > > > > New dmesg attached. > > Thanks! Ah, off-by-one error because e820__mapped_all() expects > "[start, end)" which means is_acpi_reserved() and is_efi_mmio() must > use the same, and I was thinking "[start, end]" like a struct > resource. > > The below should work better. > > commit 696ac9286d3d ("x86/pci: Treat EfiMemoryMappedIO as reservation of ECAM space") > parent 1b929c02afd3 > Author: Bjorn Helgaas <bhelgaas@xxxxxxxxxx> > Date: Thu Jan 5 16:02:58 2023 -0600 > > x86/pci: Treat EfiMemoryMappedIO as reservation of ECAM space > > Normally we reject ECAM space unless it is reported as reserved in the E820 > table or via a PNP0C02 _CRS method (PCI Firmware, r3.3, sec 4.1.2). This > means extended config space (offsets 0x100-0xfff) may not be accessible. > > Some firmware doesn't report ECAM space via PNP0C02 _CRS methods, but does > mention it as an EfiMemoryMappedIO region via EFI GetMemoryMap(), which is > normally converted to an E820 entry by a bootloader or EFI stub. > > 07eab0901ede ("efi/x86: Remove EfiMemoryMappedIO from E820 map"), removes > E820 entries that correspond to EfiMemoryMappedIO regions because some > other firmware uses EfiMemoryMappedIO for PCI host bridge windows, and the > E820 entries prevent Linux from allocating BAR space for hot-added devices. > > Allow use of ECAM for extended config space when the region is covered by > an EfiMemoryMappedIO region, even if it's not included in E820 or PNP0C02 > _CRS. > > Fixes: 07eab0901ede ("efi/x86: Remove EfiMemoryMappedIO from E820 map") > Link: https://lore.kernel.org/r/ac2693d8-8ba3-72e0-5b66-b3ae008d539d@xxxxxxxxxxxxxxx This prints quite a few debug-level messages to dmesg. I guess you'll post an update with fewer of them? In any case, please feel free to add Acked-by: Rafael J. Wysocki <rafael@xxxxxxxxxx> to it. > > diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c > index 758cbfe55daa..5c6cadd60fef 100644 > --- a/arch/x86/pci/mmconfig-shared.c > +++ b/arch/x86/pci/mmconfig-shared.c > @@ -12,6 +12,7 @@ > */ > > #include <linux/acpi.h> > +#include <linux/efi.h> > #include <linux/pci.h> > #include <linux/init.h> > #include <linux/bitmap.h> > @@ -442,6 +443,34 @@ static bool is_acpi_reserved(u64 start, u64 end, enum e820_type not_used) > return mcfg_res.flags; > } > > +static bool is_efi_mmio(u64 start, u64 end, enum e820_type not_used) > +{ > + efi_memory_desc_t *md; > + u64 size, mmio_start, mmio_end; > + > + end--; /* caller supplies start, end = start + size */ > + pr_info("is_efi_mmio %#lx-%#lx\n", > + (unsigned long) start, (unsigned long) end); > + for_each_efi_memory_desc(md) { > + if (md->type == EFI_MEMORY_MAPPED_IO) { > + size = md->num_pages << EFI_PAGE_SHIFT; > + mmio_start = md->phys_addr; > + mmio_end = mmio_start + size - 1; > + > + pr_info(" efi_mmio %#lx-%#lx\n", > + (unsigned long) mmio_start, > + (unsigned long) mmio_end); > + if (mmio_start <= start && end <= mmio_end) { > + pr_info("is_efi_mmio true\n"); > + return true; > + } > + } > + } > + > + pr_info("is_efi_mmio false\n"); > + return false; > +} > + > typedef bool (*check_reserved_t)(u64 start, u64 end, enum e820_type type); > > static bool __ref is_mmconf_reserved(check_reserved_t is_reserved, > @@ -452,23 +481,24 @@ static bool __ref is_mmconf_reserved(check_reserved_t is_reserved, > u64 size = resource_size(&cfg->res); > u64 old_size = size; > int num_buses; > - char *method = with_e820 ? "E820" : "ACPI motherboard resources"; > + char *method = with_e820 ? "E820" : "ACPI motherboard resources or EFI"; > > + pr_info("is_mmconf_reserved %ps [bus %02x-%02x] %pR\n", > + is_reserved, cfg->start_bus, cfg->end_bus, &cfg->res); > while (!is_reserved(addr, addr + size, E820_TYPE_RESERVED)) { > + pr_info(" %#lx-%#lx (size %#lx) not reserved\n", > + (unsigned long) addr, (unsigned long) (addr + size - 1), > + (unsigned long) size); > size >>= 1; > + pr_info(" size reduced to %#lx\n", (unsigned long) size); > if (size < (16UL<<20)) > break; > } > > - if (size < (16UL<<20) && size != old_size) > + if (size < (16UL<<20) && size != old_size) { > + pr_info("is_mmconf_reserved %ps false\n", is_reserved); > return false; > - > - if (dev) > - dev_info(dev, "MMCONFIG at %pR reserved in %s\n", > - &cfg->res, method); > - else > - pr_info(PREFIX "MMCONFIG at %pR reserved in %s\n", > - &cfg->res, method); > + } > > if (old_size != size) { > /* update end_bus */ > @@ -487,30 +517,42 @@ static bool __ref is_mmconf_reserved(check_reserved_t is_reserved, > &cfg->res, (unsigned long) cfg->address); > else > pr_info(PREFIX > - "MMCONFIG for %04x [bus%02x-%02x] " > + "MMCONFIG for %04x [bus %02x-%02x] " > "at %pR (base %#lx) (size reduced!)\n", > cfg->segment, cfg->start_bus, cfg->end_bus, > &cfg->res, (unsigned long) cfg->address); > } > > + if (dev) > + dev_info(dev, "MMCONFIG at %pR reserved in %s\n", > + &cfg->res, method); > + else > + pr_info(PREFIX "MMCONFIG at %pR reserved in %s\n", > + &cfg->res, method); > + > return true; > } > > static bool __ref > pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int early) > { > + pr_info("pci_mmcfg_check_reserved([bus %02x-%02x] %pR, %s)\n", > + cfg->start_bus, cfg->end_bus, &cfg->res, > + early ? "early" : "late"); > if (!early && !acpi_disabled) { > if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, 0)) > return true; > + if (is_mmconf_reserved(is_efi_mmio, cfg, dev, 0)) > + return true; > > if (dev) > dev_info(dev, FW_INFO > - "MMCONFIG at %pR not reserved in " > + "MMCONFIG at %pR not reserved in EFI or " > "ACPI motherboard resources\n", > &cfg->res); > else > pr_info(FW_INFO PREFIX > - "MMCONFIG at %pR not reserved in " > + "MMCONFIG at %pR not reserved in EFI or " > "ACPI motherboard resources\n", > &cfg->res); > } > @@ -536,6 +578,7 @@ static void __init pci_mmcfg_reject_broken(int early) > { > struct pci_mmcfg_region *cfg; > > + pr_info("pci_mmcfg_reject_broken(%s)\n", early ? "early" : "late"); > list_for_each_entry(cfg, &pci_mmcfg_list, list) { > if (pci_mmcfg_check_reserved(NULL, cfg, early) == 0) { > pr_info(PREFIX "not using MMCONFIG\n"); > @@ -570,6 +613,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) > unsigned long i; > int entries; > > + pr_info("pci_parse_mcfg\n"); > if (!header) > return -EINVAL; > > @@ -661,6 +705,7 @@ static int __initdata known_bridge; > > void __init pci_mmcfg_early_init(void) > { > + pr_info("pci_mmcfg_early_init\n"); > if (pci_probe & PCI_PROBE_MMCONF) { > if (pci_mmcfg_check_hostbridge()) > known_bridge = 1; > @@ -674,6 +719,7 @@ void __init pci_mmcfg_early_init(void) > > void __init pci_mmcfg_late_init(void) > { > + pr_info("pci_mmcfg_late_init\n"); > /* MMCONFIG disabled */ > if ((pci_probe & PCI_PROBE_MMCONF) == 0) > return; > @@ -725,6 +771,8 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end, > struct resource *tmp = NULL; > struct pci_mmcfg_region *cfg; > > + dev_info(dev, "pci_mmconfig_insert %02x-%02x addr %#lx\n", > + start, end, (unsigned long)addr); > if (!(pci_probe & PCI_PROBE_MMCONF) || pci_mmcfg_arch_init_failed) > return -ENODEV; > > @@ -788,6 +836,7 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end, > > mutex_unlock(&pci_mmcfg_lock); > > + dev_info(dev, "pci_mmconfig_insert returns %d\n", rc); > return rc; > } > >