On Thu, Jun 11, 2009 at 02:23:11PM -0700, Jesse Barnes wrote: > +++ b/arch/x86/pci/Makefile > @@ -13,5 +13,5 @@ obj-$(CONFIG_X86_VISWS) += visws.o > > obj-$(CONFIG_X86_NUMAQ) += numaq_32.o > > -obj-y += common.o early.o > +obj-y += common.o early.o mrst.o We don't want a CONFIG_PCI_MOORESTOWN, like we have for CONFIG_PCI_OLPC? > +++ b/arch/x86/pci/init.c > @@ -14,6 +14,10 @@ static __init int pci_arch_init(void) > > if (!(pci_probe & PCI_PROBE_NOEARLY)) > pci_mmcfg_early_init(); > +#ifdef CONFIG_PCI_MMCONFIG > + pci_mmcfg_late_init(); > + pci_mrst_init(); > +#endif We should have empty stubs for these functions for the !MMCONFIG case. > +++ b/arch/x86/pci/irq.c > pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); > +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SFI) > + /* For platforms only have IOAPIC, the PCI irq line is 1:1 mapped to > + * IOAPIC RTE entries, so we just enable RTE for the device. > + */ The comment is a little garbled. Is the assumption here that all SFI platforms have an IOAPIC? > + if (platform_has(X86_PLATFORM_FEATURE_IOAPIC) && > + !platform_has(X86_PLATFORM_FEATURE_8259) && > + !platform_has(X86_PLATFORM_FEATURE_ACPI) && > + !platform_has(X86_PLATFORM_FEATURE_BIOS) && > + pin) { > + ioapic = mp_sfi_find_ioapic(dev->irq); > + io_apic_set_pci_routing(ioapic, > + dev->irq, /* IOAPIC pin */ > + dev->irq, /* IRQ line */ > + 1, /* level trigger */ > + 1); /* polarity active low */ > + } > +#endif > if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { > char *msg = ""; > > diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c > index 8766b0e..4ff971a 100644 > --- a/arch/x86/pci/mmconfig-shared.c > +++ b/arch/x86/pci/mmconfig-shared.c > @@ -17,6 +17,7 @@ > #include <linux/sort.h> > #include <asm/e820.h> > #include <asm/pci_x86.h> > +#include <linux/sfi.h> > > /* aperture is up to 256MB but BIOS may reserve less */ > #define MMCONFIG_APER_MIN (2 * 1024*1024) > @@ -186,7 +187,12 @@ static const char __init *pci_mmcfg_nvidia_mcp55(void) > /* > * do check if amd fam10h already took over > */ > - if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked) > +#ifdef CONFIG_ACPI > + > + if (!acpi_disabled) > + return NULL; > +#endif > + if (pci_mmcfg_config_num || mcp55_checked) > return NULL; acpi_disabled definitely should be 0 if CONFIG_ACPI isn't set. > > - if (!known_bridge) > + if (!known_bridge) { > +#ifdef CONFIG_SFI > + sfi_table_parse(SFI_SIG_MCFG, sfi_parse_mcfg); > +#else > acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); > +#endif > + } SFI and ACPI are mutually exclusive? > +#ifndef CONFIG_SFI > pci_mmcfg_reject_broken(early); > +#endif > > if ((pci_mmcfg_config_num == 0) || > (pci_mmcfg_config == NULL) || > diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c > index 8b2d561..ee9373a 100644 > --- a/arch/x86/pci/mmconfig_32.c > +++ b/arch/x86/pci/mmconfig_32.c > @@ -12,6 +12,7 @@ > #include <linux/pci.h> > #include <linux/init.h> > #include <linux/acpi.h> > +#include <linux/sfi.h> > #include <asm/e820.h> > #include <asm/pci_x86.h> > That's the only change to this file? Looks stray. > diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c > new file mode 100644 > index 0000000..1d69381 > --- /dev/null > +++ b/arch/x86/pci/mrst.c > @@ -0,0 +1,236 @@ > +/* > + * Moorestown PCI support > + * Copyright (c) 2008 Intel Corporation > + * Jesse Barnes <jesse.barnes@xxxxxxxxx> > + * > + * Moorestown has an interesting PCI implementation: > + * - configuration space is memory mapped (as defined by MCFG) > + * - Lincroft devices also have a real, type 1 configuration space > + * - Early Lincroft silicon has a type 1 access bug that will cause > + * a hang if non-existent devices are accessed > + * - some devices have the "fixed BAR" capability, which means > + * they can't be relocated or modified; check for that during > + * BAR sizing > + * > + * So, we use the MCFG space for all reads and writes, but also send > + * Lincroft writes to type 1 space. But only read/write if the device > + * actually exists, otherwise return all 1s for reads and bit bucket > + * the writes. > + * > + * Here we assume that type 1 and MCFG based access has already been set up, > + * such that raw_pci_ops is type 1 and raw_pci_ext_ops is MCFG. How about using pci_direct_conf1 explicitly? We can make pci_mmcfg non-static too. > + */ > + > +#include <linux/sched.h> > +#include <linux/pci.h> > +#include <linux/ioport.h> > +#include <linux/init.h> > +#include <linux/dmi.h> > + > +#include <asm/acpi.h> > +#include <asm/segment.h> > +#include <asm/io.h> > +#include <asm/smp.h> > +#include <asm/pci_x86.h> > +/* > + * Yuck, we really need to add PCIe capability functions to the core > + */ > +#define PCIE_CAP_OFFSET 0x100 We've used PCI_CFG_SPACE_SIZE for this value before now ... > +/* Fixed BAR fields */ > +#define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00 /* Fixed BAR (TBD) */ > +#define PCI_FIXED_BAR_0_SIZE 0x04 > +#define PCI_FIXED_BAR_1_SIZE 0x08 > +#define PCI_FIXED_BAR_2_SIZE 0x0c > +#define PCI_FIXED_BAR_3_SIZE 0x10 > +#define PCI_FIXED_BAR_4_SIZE 0x14 > +#define PCI_FIXED_BAR_5_SIZE 0x1c > + > +int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn) > +{ > + int pos; > + u32 pcie_cap = 0, cap_data; > + > + pos = PCIE_CAP_OFFSET; > + while (pos) { > + if (raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, > + devfn, pos, 4, &pcie_cap)) > + return 0; > + > + if (pcie_cap == 0xffffffff) > + return 0; > + > + if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) { > + raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, > + devfn, pos + 4, 4, &cap_data); > + if ((cap_data & 0xffff) == PCIE_VNDR_CAP_ID_FIXED_BAR) > + return pos; > + } > + > + pos = pcie_cap >> 20; > + } > + > + return 0; > +} Do we need to find this before we can call pci_find_ext_capability()? Can we create pci_bus_find_ext_capability() analagous to pci_bus_find_capability()? I hate to see this kind of functionality hidden in an arch file somewhere. > +static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn, > + int reg, int len, u32 val, int offset) > +{ > + u32 size; > + unsigned int domain, busnum; > + int bar = (reg - PCI_BASE_ADDRESS_0) >> 2; > + > + domain = pci_domain_nr(bus); > + busnum = bus->number; > + > + if (val == ~0 && len == 4) { > + unsigned long decode; > + > + raw_pci_ext_ops->read(domain, busnum, devfn, > + offset + 8 + (bar * 4), 4, &size); > + > + /* Turn the size into a decode pattern for the sizing code */ > + if (size) { > + decode = size - 1; > + decode |= decode >> 1; > + decode |= decode >> 2; > + decode |= decode >> 4; > + decode |= decode >> 8; > + decode |= decode >> 16; > + decode++; > + decode = ~(decode - 1); > + } else { > + decode = ~0; > + } > + > + /* > + * If val is all ones, the core code is trying to size the reg, > + * so update the mmconfig space with the real size. > + * > + * Note: this assumes the fixed size we got is a power of two. > + */ > + return raw_pci_ext_ops->write(domain, busnum, devfn, reg, 4, > + decode); > + } > + > + /* This is some other kind of BAR write, so just do it. */ > + return raw_pci_ext_ops->write(domain, busnum, devfn, reg, len, val); > +} What if we made __pci_read_base() a __weak function, and you overrode it that way? > +/** > + * type1_access_ok - check whether to use type 1 > + * @bus: bus number > + * @devfn: device & function in question > + * > + * If the bus is on a Lincroft chip and it exists, or is not on a Lincroft at > + * all, the we can go ahead with any reads & writes. If it's on a Lincroft, > + * but doesn't exist, avoid the access altogether to keep the chip from > + * hanging. > + */ > +static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg) > +{ > + /* This is a workaround for A0 LNC bug where PCI status register does > + * not have new CAP bit set. can not be written by SW either. > + * > + * PCI header type in real LNC indicates a single function device, this > + * will prevent probing other devices under the same function in PCI > + * shim. Therefore, use the header type in shim instead. > + */ > + if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE) > + return 0; > + if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0))) > + return 1; > + return 0; /* langwell on others */ > +} > + > +static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, > + int size, u32 *value) > +{ > + if (type1_access_ok(bus->number, devfn, where)) > + return raw_pci_ops->read(pci_domain_nr(bus), bus->number, devfn, > + where, size, value); > + return raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, > + devfn, where, size, value); > +} > + > +static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, > + int size, u32 value) > +{ > + int offset; > + > + /* On MRST, there is no PCI ROM BAR, this will cause a subsequent read > + * to ROM BAR return 0 then being ignored. > + */ > + if (where == PCI_ROM_ADDRESS) > + return 0; > + /* > + * Devices with fixed BARs need special handling: > + * - BAR sizing code will save, write ~0, read size, restore > + * - so writes to fixed BARs need special handling > + * - other writes to fixed BAR devices should go through mmconfig > + */ > + offset = fixed_bar_cap(bus, devfn); > + if (offset && > + (where >= PCI_BASE_ADDRESS_0 && where <= PCI_BASE_ADDRESS_5)) { > + return pci_device_update_fixed(bus, devfn, where, size, value, > + offset); > + } > + > + /* > + * On Moorestown update both real & mmconfig space > + * Note: assumes raw_pci_ext_ops is mmconfig if we're using Lincroft > + * Note 2: early Lincroft silicon can't handle type 1 accesses to > + * non-existent devices, so just eat the write in that case. > + */ > + if (type1_access_ok(bus->number, devfn, where)) > + return raw_pci_ops->write(pci_domain_nr(bus), bus->number, > + devfn, where, size, value); > + return raw_pci_ext_ops->write(pci_domain_nr(bus), bus->number, devfn, > + where, size, value); > +} > + > +struct pci_ops pci_mrst_ops = { > + .read = pci_read, > + .write = pci_write, > +}; > +#ifdef CONFIG_MRST > +/** > + * pci_mrst_init - figure out if this platform should use pci_mrst_ops > + * > + * Moorestown has an interesting PCI implementation (see above). This > + * function checks whether the current platform should use MRST-style PCI > + * config space ops or not, and sets pci_root_ops accordingly. > + */ > +void pci_mrst_init(void) > +{ > + printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n"); > + pci_root_ops = pci_mrst_ops; > +} > +#else > +void pci_mrst_init(void) { } > +#endif > + > +#ifdef CONFIG_MRST > +/* > + * Langwell devices reside at fixed offsets, don't try to move them. > + */ > +static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev) > +{ > + unsigned long offset; > + u32 size; > + int i; > + /* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */ > + offset = fixed_bar_cap(dev->bus, dev->devfn); > + if (!offset || PCI_DEVFN(2, 0) == dev->devfn || > + PCI_DEVFN(2, 2) == dev->devfn) > + return; > + > + for (i = 0; i < PCI_ROM_RESOURCE; i++) { > + pci_read_config_dword(dev, offset + 8 + (i * 4), &size); > + dev->resource[i].end = dev->resource[i].start + size - 1; > + dev->resource[i].flags |= IORESOURCE_PCI_FIXED; > + } > +} > +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixed_bar_fixup); > +#endif > + > diff --git a/include/linux/pci.h b/include/linux/pci.h > index 72698d8..fe4d18d 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -708,6 +708,7 @@ int pci_execute_reset_function(struct pci_dev *dev); > void pci_update_resource(struct pci_dev *dev, int resno); > int __must_check pci_assign_resource(struct pci_dev *dev, int i); > int pci_select_bars(struct pci_dev *dev, unsigned long flags); > +int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn); > > /* ROM control related routines */ > int pci_enable_rom(struct pci_dev *pdev); > diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h > index 0f71812..1a6e34a 100644 > --- a/include/linux/pci_ids.h > +++ b/include/linux/pci_ids.h > @@ -2371,6 +2371,8 @@ > #define PCI_DEVICE_ID_INTEL_82375 0x0482 > #define PCI_DEVICE_ID_INTEL_82424 0x0483 > #define PCI_DEVICE_ID_INTEL_82378 0x0484 > +#define PCI_DEVICE_ID_INTEL_MRST_SD0 0x0807 > +#define PCI_DEVICE_ID_INTEL_MRST_SD1 0x0808 > #define PCI_DEVICE_ID_INTEL_I960 0x0960 > #define PCI_DEVICE_ID_INTEL_I960RM 0x0962 > #define PCI_DEVICE_ID_INTEL_8257X_SOL 0x1062 > @@ -2539,6 +2541,7 @@ > #define PCI_DEVICE_ID_INTEL_PCH_LPC_MAX 0x3b1f > #define PCI_DEVICE_ID_INTEL_PCH_SMBUS 0x3b30 > #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f > +#define PCI_DEVICE_ID_INTEL_MRST_GFX 0x4102 > #define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 > #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 > #define PCI_DEVICE_ID_INTEL_5100_22 0x65f6 > diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h > index 616bf8b..4b412ae 100644 > --- a/include/linux/pci_regs.h > +++ b/include/linux/pci_regs.h > @@ -503,6 +503,7 @@ > #define PCI_EXT_CAP_ID_PWR 4 > #define PCI_EXT_CAP_ID_ARI 14 > #define PCI_EXT_CAP_ID_SRIOV 16 > +#define PCI_EXT_CAP_ID_VNDR 11 I think these should be kept in order by value ... -- Matthew Wilcox Intel Open Source Technology Centre "Bill, look, we understand that you're interested in selling us this operating system, but compare it to ours. We can't possibly take such a retrograde step." -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html