On Thu, Oct 20, 2016 at 03:10:29PM +0200, Alexander Gordeev wrote: > Unlike x86, other architectures using generic ECAM PCI host > do not have a luxury of PCI bus initialized by a BIOS and > ready to use at start. Thus, we need allocate and assign > resources to all devices, much like an architecture's > firmware would do. > > There is no any sort of resource management for memory and > io spaces, since only ones-per-BAR allocations are expected > and no deallocations at all. > > Cc: Thomas Huth <thuth@xxxxxxxxxx> > Cc: Andrew Jones <drjones@xxxxxxxxxx> > Cc: Peter Xu <peterx@xxxxxxxxxx> > Reviewed-by: Andrew Jones <drjones@xxxxxxxxxx> > Signed-off-by: Alexander Gordeev <agordeev@xxxxxxxxxx> > --- > lib/asm-generic/pci-host-bridge.h | 26 ++++ > lib/pci-host-generic.c | 295 ++++++++++++++++++++++++++++++++++++++ > lib/pci-host-generic.h | 46 ++++++ > lib/pci.c | 4 +- > lib/pci.h | 3 + > 5 files changed, 372 insertions(+), 2 deletions(-) > create mode 100644 lib/asm-generic/pci-host-bridge.h > create mode 100644 lib/pci-host-generic.c > create mode 100644 lib/pci-host-generic.h > > diff --git a/lib/asm-generic/pci-host-bridge.h b/lib/asm-generic/pci-host-bridge.h > new file mode 100644 > index 000000000000..ac32b85198e9 > --- /dev/null > +++ b/lib/asm-generic/pci-host-bridge.h > @@ -0,0 +1,26 @@ > +#ifndef _ASM_PCI_HOST_BRIDGE_H_ > +#define _ASM_PCI_HOST_BRIDGE_H_ > +/* > + * Copyright (C) 2016, Red Hat Inc, Alexander Gordeev <agordeev@xxxxxxxxxx> > + * > + * This work is licensed under the terms of the GNU LGPL, version 2. > + */ > +#include "libcflat.h" > + > +phys_addr_t pci_host_bridge_get_paddr(uint64_t addr); > + > +static inline > +phys_addr_t pci_translate_addr(pcidevaddr_t dev __unused, uint64_t addr) > +{ > + /* > + * Assume we only have single PCI host bridge in a system. > + */ > + return pci_host_bridge_get_paddr(addr); > +} > + > +uint8_t pci_config_readb(pcidevaddr_t dev, uint8_t reg); > +uint16_t pci_config_readw(pcidevaddr_t dev, uint8_t reg); > +uint32_t pci_config_readl(pcidevaddr_t dev, uint8_t reg); > +void pci_config_writel(pcidevaddr_t dev, uint8_t reg, uint32_t val); > + > +#endif > diff --git a/lib/pci-host-generic.c b/lib/pci-host-generic.c > new file mode 100644 > index 000000000000..96f4b4b37d34 > --- /dev/null > +++ b/lib/pci-host-generic.c > @@ -0,0 +1,295 @@ > +/* > + * Generic PCI host controller as described in PCI Bus Binding to Open Firmware > + * > + * Copyright (C) 2016, Red Hat Inc, Alexander Gordeev <agordeev@xxxxxxxxxx> > + * > + * This work is licensed under the terms of the GNU LGPL, version 2. > + */ > +#include "libcflat.h" > +#include "devicetree.h" > +#include "alloc.h" > +#include "pci.h" > +#include "asm/pci.h" > +#include "asm/io.h" > +#include "pci-host-generic.h" > +#include <linux/pci_regs.h> > + > +static struct pci_host_bridge *pci_host_bridge; > + > +static int of_flags_to_pci_type(u32 of_flags) > +{ > + static int type_map[] = { > + [1] = PCI_BASE_ADDRESS_SPACE_IO, > + [2] = PCI_BASE_ADDRESS_MEM_TYPE_32, > + [3] = PCI_BASE_ADDRESS_MEM_TYPE_64 > + }; > + int idx = (of_flags >> 24) & 0x03; > + int res; > + > + assert(idx > 0); > + res = type_map[idx]; > + > + if (of_flags & 0x40000000) > + res |= PCI_BASE_ADDRESS_MEM_PREFETCH; > + > + return res; > +} > + > +static int pci_bar_type(u32 bar) > +{ > + if (bar & PCI_BASE_ADDRESS_SPACE) > + return PCI_BASE_ADDRESS_SPACE_IO; > + else > + return bar & (PCI_BASE_ADDRESS_MEM_TYPE_MASK | > + PCI_BASE_ADDRESS_MEM_PREFETCH); > +} > + > +/* > + * Probe DT for a generic PCI host controller > + * See kernel Documentation/devicetree/bindings/pci/host-generic-pci.txt > + * and function gen_pci_probe() in drivers/pci/host/pci-host-generic.c > + */ > +static struct pci_host_bridge *pci_dt_probe(void) > +{ > + struct pci_host_bridge *host; > + const void *fdt = dt_fdt(); > + const struct fdt_property *prop; > + struct dt_pbus_reg base; > + struct dt_device dt_dev; > + struct dt_bus dt_bus; > + struct pci_addr_space *as; > + fdt32_t *data; > + u32 bus, bus_max; > + u32 nac, nsc, nac_root, nsc_root; > + int nr_range_cells, nr_addr_spaces; > + int ret, node, len, i; > + > + if (!dt_available()) { > + printf("No device tree found\n"); > + return NULL; > + } > + > + dt_bus_init_defaults(&dt_bus); > + dt_device_init(&dt_dev, &dt_bus, NULL); > + > + node = fdt_path_offset(fdt, "/"); > + assert(node >= 0); > + > + ret = dt_get_nr_cells(node, &nac_root, &nsc_root); > + assert(ret == 0); > + assert(nac_root == 1 || nac_root == 2); > + > + node = fdt_node_offset_by_compatible(fdt, node, > + "pci-host-ecam-generic"); > + if (node == -FDT_ERR_NOTFOUND) { > + printf("No PCIe ECAM compatible controller found\n"); > + return NULL; > + } > + assert(node >= 0); > + > + prop = fdt_get_property(fdt, node, "device_type", &len); > + assert(prop && len == 4 && !strcmp((char *)prop->data, "pci")); > + > + dt_device_bind_node(&dt_dev, node); > + ret = dt_pbus_get_base(&dt_dev, &base); > + assert(ret == 0); > + > + prop = fdt_get_property(fdt, node, "bus-range", &len); > + if (prop == NULL) { > + assert(len == -FDT_ERR_NOTFOUND); > + bus = 0x00; > + bus_max = 0xff; > + } else { > + data = (fdt32_t *)prop->data; > + bus = fdt32_to_cpu(data[0]); > + bus_max = fdt32_to_cpu(data[1]); > + assert(bus <= bus_max); > + } > + assert(bus_max < base.size / (1 << PCI_ECAM_BUS_SHIFT)); > + > + ret = dt_get_nr_cells(node, &nac, &nsc); > + assert(ret == 0); > + assert(nac == 3 && nsc == 2); > + > + prop = fdt_get_property(fdt, node, "ranges", &len); > + assert(prop != NULL); > + > + nr_range_cells = nac + nsc + nac_root; > + nr_addr_spaces = (len / 4) / nr_range_cells; > + assert(nr_addr_spaces); > + > + host = malloc(sizeof(*host) + > + sizeof(host->addr_space[0]) * nr_addr_spaces); > + assert(host != NULL); > + > + host->start = base.addr; > + host->size = base.size; > + host->bus = bus; > + host->bus_max = bus_max; > + host->nr_addr_spaces = nr_addr_spaces; > + > + data = (fdt32_t *)prop->data; > + as = &host->addr_space[0]; > + > + for (i = 0; i < nr_addr_spaces; i++) { > + /* > + * The PCI binding encodes the PCI address with three > + * cells as follows: > + * > + * phys.hi cell: npt000ss bbbbbbbb dddddfff rrrrrrrr > + * phys.mid cell: hhhhhhhh hhhhhhhh hhhhhhhh hhhhhhhh > + * phys.lo cell: llllllll llllllll llllllll llllllll > + * > + * PCI device bus address and flags are encoded into phys.high > + * PCI 64 bit address is encoded into phys.mid and phys.low > + */ > + as->type = of_flags_to_pci_type(fdt32_to_cpu(data[0])); > + as->pci_start = ((u64)fdt32_to_cpu(data[1]) << 32) | > + fdt32_to_cpu(data[2]); > + > + if (nr_range_cells == 6) { > + as->start = fdt32_to_cpu(data[3]); > + as->size = ((u64)fdt32_to_cpu(data[4]) << 32) | > + fdt32_to_cpu(data[5]); > + } else { > + as->start = ((u64)fdt32_to_cpu(data[3]) << 32) | > + fdt32_to_cpu(data[4]); > + as->size = ((u64)fdt32_to_cpu(data[5]) << 32) | > + fdt32_to_cpu(data[6]); > + } > + > + data += nr_range_cells; > + as++; > + } > + > + return host; > +} > + > +static bool pci_alloc_resource(u64 *addr, u32 bar, u64 size) > +{ > + struct pci_host_bridge *host = pci_host_bridge; > + struct pci_addr_space *as = &host->addr_space[0]; > + u32 mask; > + int i; > + > + for (i = 0; i < host->nr_addr_spaces; i++) { > + if (as->type == pci_bar_type(bar)) > + break; > + as++; > + } > + if (i >= host->nr_addr_spaces) { > + printf("No PCI resource matching a device found\n"); > + return false; So this is the new code with v8, and it needs more work. First, this print message doesn't explain the problem. It should point out which device and which bar failed to have resources allocated and why. For the which, I think it should print the device number and function number in the lspci way %02x.%02x.%1x (bus.dev.fn) followed by the vendor-id and device-id, %04x:%04x. For the bar you'll want to refactor pci_dev_print to pull a pci_bar_print out of it that you can use here. But... what about the why? Is it really necessary to fail this allocation? How does Linux handle it with the same guest config? So I went a head and improved the print message in order to try to understand, and I think I do. The virtio-net-pci device that gets added by default wanted a MEM64/p region. We fail because of the '/p'. I don't know PCI, but shouldn't we be masking off that PCI_BASE_ADDRESS_MEM_PREFETCH bit in this check? Finally, I'd set *addr to some invalid address, zero? when returning false here. > + } > + > + mask = pci_bar_mask(bar); > + size = ALIGN(size, ~mask + 1); > + assert(as->allocated + size <= as->size); > + > + *addr = as->pci_start + as->allocated; > + as->allocated += size; > + > + return true; > +} > + > +bool pci_probe(void) > +{ > + pcidevaddr_t dev; > + u8 header; > + u32 cmd; > + int i; > + > + assert(!pci_host_bridge); > + pci_host_bridge = pci_dt_probe(); > + if (!pci_host_bridge) > + return false; > + > + for (dev = 0; dev < 256; dev++) { > + if (!pci_dev_exists(dev)) > + continue; > + > + /* We are only interested in normal PCI devices */ > + header = pci_config_readb(dev, PCI_HEADER_TYPE); > + if ((header & PCI_HEADER_TYPE_MASK) != PCI_HEADER_TYPE_NORMAL) > + continue; > + > + cmd = PCI_COMMAND_SERR; > + > + for (i = 0; i < 6; i++) { > + u64 addr, size; > + u32 bar; > + > + size = pci_bar_size(dev, i); > + if (!size) > + continue; > + > + bar = pci_bar_get(dev, i); > + if (pci_alloc_resource(&addr, bar, size)) { > + pci_bar_set_addr(dev, i, addr); > + > + if (pci_bar_is_memory(dev, i)) > + cmd |= PCI_COMMAND_MEMORY; > + else > + cmd |= PCI_COMMAND_IO; > + } > + > + if (pci_bar_is64(dev, i)) > + i++; > + } > + > + pci_config_writel(dev, PCI_COMMAND, cmd); Is it safe to still do this when we fail to allocate resources? > + } > + > + return true; > +} > + > +/* > + * This function is to be called from pci_translate_addr() to provide > + * mapping between this host bridge's PCI busses address and CPU physical > + * address. > + */ > +phys_addr_t pci_host_bridge_get_paddr(u64 pci_addr) > +{ > + struct pci_host_bridge *host = pci_host_bridge; > + struct pci_addr_space *as = &host->addr_space[0]; > + int i; > + > + for (i = 0; i < host->nr_addr_spaces; i++) { > + if (pci_addr >= as->pci_start && > + pci_addr < as->pci_start + as->size) > + return as->start + (pci_addr - as->pci_start); > + as++; > + } > + > + return 0; > +} > + > +static void __iomem *pci_get_dev_conf(struct pci_host_bridge *host, int devfn) > +{ > + return (void __iomem *)(unsigned long) > + host->start + (devfn << PCI_ECAM_DEVFN_SHIFT); > +} > + > +u8 pci_config_readb(pcidevaddr_t dev, u8 off) > +{ > + void __iomem *conf = pci_get_dev_conf(pci_host_bridge, dev); > + return readb(conf + off); > +} > + > +u16 pci_config_readw(pcidevaddr_t dev, u8 off) > +{ > + void __iomem *conf = pci_get_dev_conf(pci_host_bridge, dev); > + return readw(conf + off); > +} > + > +u32 pci_config_readl(pcidevaddr_t dev, u8 off) > +{ > + void __iomem *conf = pci_get_dev_conf(pci_host_bridge, dev); > + return readl(conf + off); > +} > + > +void pci_config_writel(pcidevaddr_t dev, u8 off, u32 val) > +{ > + void __iomem *conf = pci_get_dev_conf(pci_host_bridge, dev); > + writel(val, conf + off); > +} > diff --git a/lib/pci-host-generic.h b/lib/pci-host-generic.h > new file mode 100644 > index 000000000000..fd30e7c74ed8 > --- /dev/null > +++ b/lib/pci-host-generic.h > @@ -0,0 +1,46 @@ > +#ifndef PCI_HOST_GENERIC_H > +#define PCI_HOST_GENERIC_H > +/* > + * PCI host bridge supporting structures and constants > + * > + * Copyright (C) 2016, Red Hat Inc, Alexander Gordeev <agordeev@xxxxxxxxxx> > + * > + * This work is licensed under the terms of the GNU LGPL, version 2. > + */ > +#include "libcflat.h" > + > +struct pci_addr_space { > + phys_addr_t pci_start; > + phys_addr_t start; > + phys_addr_t size; > + phys_addr_t allocated; > + int type; > +}; > + > +struct pci_host_bridge { > + phys_addr_t start; > + phys_addr_t size; > + int bus; > + int bus_max; > + int nr_addr_spaces; > + struct pci_addr_space addr_space[]; > +}; > + > +/* > + * The following constants are derived from Linux, see this source: > + * > + * drivers/pci/host/pci-host-generic.c > + * struct gen_pci_cfg_bus_ops::bus_shift > + * int gen_pci_parse_map_cfg_windows(struct gen_pci *pci) > + * > + * Documentation/devicetree/bindings/pci/host-generic-pci.txt describes > + * ECAM Configuration Space is be memory-mapped by concatenating the various > + * components to form an offset: > + * > + * cfg_offset(bus, device, function, register) = > + * bus << 20 | device << 15 | function << 12 | register > + */ > +#define PCI_ECAM_BUS_SHIFT 20 > +#define PCI_ECAM_DEVFN_SHIFT 12 > + > +#endif > diff --git a/lib/pci.c b/lib/pci.c > index 462cddc8a806..2dbbba428144 100644 > --- a/lib/pci.c > +++ b/lib/pci.c > @@ -27,13 +27,13 @@ pcidevaddr_t pci_find_dev(uint16_t vendor_id, uint16_t device_id) > return PCIDEVADDR_INVALID; > } > > -static uint32_t pci_bar_mask(uint32_t bar) > +uint32_t pci_bar_mask(uint32_t bar) > { > return (bar & PCI_BASE_ADDRESS_SPACE_IO) ? > PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK; > } > > -static uint32_t pci_bar_get(pcidevaddr_t dev, int bar_num) > +uint32_t pci_bar_get(pcidevaddr_t dev, int bar_num) > { > return pci_config_readl(dev, PCI_BASE_ADDRESS_0 + bar_num * 4); > } > diff --git a/lib/pci.h b/lib/pci.h > index fc0940adc299..7acbbdf2eb16 100644 > --- a/lib/pci.h > +++ b/lib/pci.h > @@ -15,6 +15,7 @@ enum { > PCIDEVADDR_INVALID = 0xffff, > }; > > +extern bool pci_probe(void); > extern void pci_print(void); > extern bool pci_dev_exists(pcidevaddr_t dev); > extern pcidevaddr_t pci_find_dev(uint16_t vendor_id, uint16_t device_id); > @@ -34,6 +35,8 @@ extern pcidevaddr_t pci_find_dev(uint16_t vendor_id, uint16_t device_id); > extern phys_addr_t pci_bar_get_addr(pcidevaddr_t dev, int bar_num); > extern void pci_bar_set_addr(pcidevaddr_t dev, int bar_num, phys_addr_t addr); > extern phys_addr_t pci_bar_size(pcidevaddr_t dev, int bar_num); > +extern uint32_t pci_bar_get(pcidevaddr_t dev, int bar_num); > +extern uint32_t pci_bar_mask(uint32_t bar); > extern bool pci_bar_is64(pcidevaddr_t dev, int bar_num); > extern bool pci_bar_is_memory(pcidevaddr_t dev, int bar_num); > extern bool pci_bar_is_valid(pcidevaddr_t dev, int bar_num); > -- > 1.8.3.1 Thanks, drew -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html