On Thu, Jun 11, 2009 at 02:23:11PM -0700, Jesse Barnes wrote: > --- a/arch/x86/pci/fixup.c > +++ b/arch/x86/pci/fixup.c > @@ -521,3 +521,13 @@ static void sb600_disable_hpet_bar(struct pci_dev *dev) > } > } > DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar); > +/* Intel Moorestown graphics controller has new capabilities but the status > + * register does not indicate that in bit 4. > + */ > +static void __devinit pci_mrst_gfx_controller_cap(struct pci_dev *dev) > +{ > + /* force new cap flag */ > + pci_write_config_byte(dev, PCI_STATUS, PCI_STATUS_CAP_LIST); > +} > +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MRST_GFX, > + pci_mrst_gfx_controller_cap); As this is an "unreleased" chip, can't this be fixed up properly before it ships? > diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c > index 25a1f8e..1726d55 100644 > --- a/arch/x86/pci/init.c > +++ b/arch/x86/pci/init.c > @@ -14,6 +14,10 @@ static __init int pci_arch_init(void) > > if (!(pci_probe & PCI_PROBE_NOEARLY)) > pci_mmcfg_early_init(); > +#ifdef CONFIG_PCI_MMCONFIG > + pci_mmcfg_late_init(); > + pci_mrst_init(); > +#endif Is the #ifdef really needed here? We don't do it for pci_mmcfg_early_init(). > + if (!platform_has(X86_PLATFORM_FEATURE_BIOS)) { > + DBG(KERN_DEBUG "PCI: No BIOS, skip IRQ fixup\n"); > + return; > + } That's funny to see :) > +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SFI) > + /* For platforms only have IOAPIC, the PCI irq line is 1:1 mapped to > + * IOAPIC RTE entries, so we just enable RTE for the device. > + */ > + if (platform_has(X86_PLATFORM_FEATURE_IOAPIC) && > + !platform_has(X86_PLATFORM_FEATURE_8259) && > + !platform_has(X86_PLATFORM_FEATURE_ACPI) && > + !platform_has(X86_PLATFORM_FEATURE_BIOS) && > + pin) { > + ioapic = mp_sfi_find_ioapic(dev->irq); > + io_apic_set_pci_routing(ioapic, > + dev->irq, /* IOAPIC pin */ > + dev->irq, /* IRQ line */ > + 1, /* level trigger */ > + 1); /* polarity active low */ > + } > +#endif Does there really need to be a #ifdef here? Wouldn't the first platform_has() catch things? And what is CONFIG_SFI? > diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c > index 8766b0e..4ff971a 100644 > --- a/arch/x86/pci/mmconfig-shared.c > +++ b/arch/x86/pci/mmconfig-shared.c > @@ -17,6 +17,7 @@ > #include <linux/sort.h> > #include <asm/e820.h> > #include <asm/pci_x86.h> > +#include <linux/sfi.h> Is sfi.h in some other tree? > - if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked) > +#ifdef CONFIG_ACPI > + > + if (!acpi_disabled) > + return NULL; > +#endif Is the #ifdef really needed? What happens if you run a CONFIG_ACPI enabled kernel on this platform? Does it not boot properly? Same #ifdef questions for other additions to this file, why is it needed? > +#ifdef CONFIG_SFI > + sfi_table_parse(SFI_SIG_MCFG, sfi_parse_mcfg); > +#else > acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); > +#endif Is #config needed? What happens if SFI and ACPI are enabled in the .config? > +#ifndef CONFIG_SFI > pci_mmcfg_reject_broken(early); > +#endif #ifdef needed? > +++ b/arch/x86/pci/mrst.c > @@ -0,0 +1,236 @@ > +/* > + * Moorestown PCI support > + * Copyright (c) 2008 Intel Corporation > + * Jesse Barnes <jesse.barnes@xxxxxxxxx> > + * > + * Moorestown has an interesting PCI implementation: > + * - configuration space is memory mapped (as defined by MCFG) > + * - Lincroft devices also have a real, type 1 configuration space > + * - Early Lincroft silicon has a type 1 access bug that will cause > + * a hang if non-existent devices are accessed > + * - some devices have the "fixed BAR" capability, which means > + * they can't be relocated or modified; check for that during > + * BAR sizing > + * > + * So, we use the MCFG space for all reads and writes, but also send > + * Lincroft writes to type 1 space. But only read/write if the device > + * actually exists, otherwise return all 1s for reads and bit bucket > + * the writes. > + * > + * Here we assume that type 1 and MCFG based access has already been set up, > + * such that raw_pci_ops is type 1 and raw_pci_ext_ops is MCFG. > + */ > + > +#include <linux/sched.h> > +#include <linux/pci.h> > +#include <linux/ioport.h> > +#include <linux/init.h> > +#include <linux/dmi.h> > + > +#include <asm/acpi.h> > +#include <asm/segment.h> > +#include <asm/io.h> > +#include <asm/smp.h> > +#include <asm/pci_x86.h> > +/* > + * Yuck, we really need to add PCIe capability functions to the core > + */ > +#define PCIE_CAP_OFFSET 0x100 > + > +/* Fixed BAR fields */ > +#define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00 /* Fixed BAR (TBD) */ > +#define PCI_FIXED_BAR_0_SIZE 0x04 > +#define PCI_FIXED_BAR_1_SIZE 0x08 > +#define PCI_FIXED_BAR_2_SIZE 0x0c > +#define PCI_FIXED_BAR_3_SIZE 0x10 > +#define PCI_FIXED_BAR_4_SIZE 0x14 > +#define PCI_FIXED_BAR_5_SIZE 0x1c > + > +int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn) > +{ > + int pos; > + u32 pcie_cap = 0, cap_data; > + > + pos = PCIE_CAP_OFFSET; > + while (pos) { > + if (raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, > + devfn, pos, 4, &pcie_cap)) > + return 0; > + > + if (pcie_cap == 0xffffffff) > + return 0; > + > + if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) { > + raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, > + devfn, pos + 4, 4, &cap_data); > + if ((cap_data & 0xffff) == PCIE_VNDR_CAP_ID_FIXED_BAR) > + return pos; > + } > + > + pos = pcie_cap >> 20; > + } > + > + return 0; > +} > + > +static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn, > + int reg, int len, u32 val, int offset) > +{ > + u32 size; > + unsigned int domain, busnum; > + int bar = (reg - PCI_BASE_ADDRESS_0) >> 2; > + > + domain = pci_domain_nr(bus); > + busnum = bus->number; > + > + if (val == ~0 && len == 4) { > + unsigned long decode; > + > + raw_pci_ext_ops->read(domain, busnum, devfn, > + offset + 8 + (bar * 4), 4, &size); > + > + /* Turn the size into a decode pattern for the sizing code */ > + if (size) { > + decode = size - 1; > + decode |= decode >> 1; > + decode |= decode >> 2; > + decode |= decode >> 4; > + decode |= decode >> 8; > + decode |= decode >> 16; > + decode++; > + decode = ~(decode - 1); > + } else { > + decode = ~0; > + } > + > + /* > + * If val is all ones, the core code is trying to size the reg, > + * so update the mmconfig space with the real size. > + * > + * Note: this assumes the fixed size we got is a power of two. > + */ > + return raw_pci_ext_ops->write(domain, busnum, devfn, reg, 4, > + decode); > + } > + > + /* This is some other kind of BAR write, so just do it. */ > + return raw_pci_ext_ops->write(domain, busnum, devfn, reg, len, val); > +} > + > +/** > + * type1_access_ok - check whether to use type 1 > + * @bus: bus number > + * @devfn: device & function in question > + * > + * If the bus is on a Lincroft chip and it exists, or is not on a Lincroft at > + * all, the we can go ahead with any reads & writes. If it's on a Lincroft, > + * but doesn't exist, avoid the access altogether to keep the chip from > + * hanging. > + */ > +static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg) > +{ > + /* This is a workaround for A0 LNC bug where PCI status register does > + * not have new CAP bit set. can not be written by SW either. > + * > + * PCI header type in real LNC indicates a single function device, this > + * will prevent probing other devices under the same function in PCI > + * shim. Therefore, use the header type in shim instead. > + */ > + if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE) > + return 0; > + if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0))) > + return 1; > + return 0; /* langwell on others */ > +} > + > +static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, > + int size, u32 *value) > +{ > + if (type1_access_ok(bus->number, devfn, where)) > + return raw_pci_ops->read(pci_domain_nr(bus), bus->number, devfn, > + where, size, value); > + return raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, > + devfn, where, size, value); > +} > + > +static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, > + int size, u32 value) > +{ > + int offset; > + > + /* On MRST, there is no PCI ROM BAR, this will cause a subsequent read > + * to ROM BAR return 0 then being ignored. > + */ > + if (where == PCI_ROM_ADDRESS) > + return 0; > + /* > + * Devices with fixed BARs need special handling: > + * - BAR sizing code will save, write ~0, read size, restore > + * - so writes to fixed BARs need special handling > + * - other writes to fixed BAR devices should go through mmconfig > + */ > + offset = fixed_bar_cap(bus, devfn); > + if (offset && > + (where >= PCI_BASE_ADDRESS_0 && where <= PCI_BASE_ADDRESS_5)) { > + return pci_device_update_fixed(bus, devfn, where, size, value, > + offset); > + } > + > + /* > + * On Moorestown update both real & mmconfig space > + * Note: assumes raw_pci_ext_ops is mmconfig if we're using Lincroft > + * Note 2: early Lincroft silicon can't handle type 1 accesses to > + * non-existent devices, so just eat the write in that case. > + */ > + if (type1_access_ok(bus->number, devfn, where)) > + return raw_pci_ops->write(pci_domain_nr(bus), bus->number, > + devfn, where, size, value); > + return raw_pci_ext_ops->write(pci_domain_nr(bus), bus->number, devfn, > + where, size, value); > +} > + > +struct pci_ops pci_mrst_ops = { > + .read = pci_read, > + .write = pci_write, > +}; > +#ifdef CONFIG_MRST Why would you want the other portions of this file to be enabled/built in, if CONFIG_MRST is not set? Could you just not build the file in if the config option isn't enabled? > +/** > + * pci_mrst_init - figure out if this platform should use pci_mrst_ops > + * > + * Moorestown has an interesting PCI implementation (see above). This > + * function checks whether the current platform should use MRST-style PCI > + * config space ops or not, and sets pci_root_ops accordingly. > + */ > +void pci_mrst_init(void) > +{ > + printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n"); > + pci_root_ops = pci_mrst_ops; > +} > +#else > +void pci_mrst_init(void) { } > +#endif That looks like something that should go into a .h file. Other than the minor structuring stuff, it looks almost sane :) Nice job to Intel for doing the implementation this way, it could have been so much worse. thanks, greg k-h -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html