nit: I'd prefer $SUBJECT to have prefixes like 'arm/arm64: pci:' rather than pci/arm. On Mon, Apr 11, 2016 at 01:04:25PM +0200, Alexander Gordeev wrote: > Unlike x86, ARM and PPC kvm-unit-tests do not have a luxury > of PCI bus initialized by a BIOS and ready to use at start. > Thus, we need allocate and assign resources to all devices. > > There is no any sort of resource management for memory and > io spaces, since only one-per-BAR allocations are expected > between calls to pci_probe() and pci_shutdown(), and no > deallocations at all. > > Cc: Thomas Huth <thuth@xxxxxxxxxx> > Cc: Andrew Jones <drjones@xxxxxxxxxx> > Signed-off-by: Alexander Gordeev <agordeev@xxxxxxxxxx> > --- > arm/Makefile.common | 6 +- > arm/pci-test.c | 23 ++++ > lib/arm64/asm/pci.h | 29 +++++ > lib/pci-host-generic.c | 321 +++++++++++++++++++++++++++++++++++++++++++++++++ > lib/pci-host-generic.h | 57 +++++++++ > 5 files changed, 435 insertions(+), 1 deletion(-) > create mode 100644 arm/pci-test.c > create mode 100644 lib/arm64/asm/pci.h > create mode 100644 lib/pci-host-generic.c > create mode 100644 lib/pci-host-generic.h > > diff --git a/arm/Makefile.common b/arm/Makefile.common > index 9a2d61fc88a2..372d2ad186a2 100644 > --- a/arm/Makefile.common > +++ b/arm/Makefile.common > @@ -11,7 +11,8 @@ endif > > tests-common = \ > $(TEST_DIR)/selftest.flat \ > - $(TEST_DIR)/spinlock-test.flat > + $(TEST_DIR)/spinlock-test.flat \ > + $(TEST_DIR)/pci-test.flat > > all: test_cases > > @@ -30,6 +31,8 @@ include scripts/asm-offsets.mak > cflatobjs += lib/util.o > cflatobjs += lib/alloc.o > cflatobjs += lib/devicetree.o > +cflatobjs += lib/pci.o > +cflatobjs += lib/pci-host-generic.o > cflatobjs += lib/virtio.o > cflatobjs += lib/virtio-mmio.o > cflatobjs += lib/chr-testdev.o > @@ -73,3 +76,4 @@ $(TEST_DIR)/selftest.elf: $(cstart.o) $(TEST_DIR)/selftest.o > $(TEST_DIR)/spinlock-test.elf: $(cstart.o) $(TEST_DIR)/spinlock-test.o > > $(TEST_DIR)/selftest.o $(cstart.o): $(asm-offsets) > +$(TEST_DIR)/pci-test.elf: $(cstart.o) $(TEST_DIR)/pci-test.o > diff --git a/arm/pci-test.c b/arm/pci-test.c > new file mode 100644 > index 000000000000..e7a8a28f7c6d > --- /dev/null > +++ b/arm/pci-test.c > @@ -0,0 +1,23 @@ > +/* > + * PCI bus operation test > + * > + * Copyright (C) 2016, Red Hat Inc, Alexander Gordeev <agordeev@xxxxxxxxxx> > + * > + * This work is licensed under the terms of the GNU LGPL, version 2. > + */ > +#include <libcflat.h> > +#include <pci.h> > + > +int main(void) > +{ > + int ret; > + > + ret = pci_probe(); > + report("PCI bus probing", ret); > + > + pci_print(); > + > + pci_shutdown(); I still think having a pci_shutdown() is overkill for kvm-unit-tests, but if it's already implemented then why not. Maybe it could be useful in a pci-related unit test? > + > + return report_summary(); > +} > diff --git a/lib/arm64/asm/pci.h b/lib/arm64/asm/pci.h > new file mode 100644 > index 000000000000..1109315c0424 > --- /dev/null > +++ b/lib/arm64/asm/pci.h > @@ -0,0 +1,29 @@ > +#ifndef ASM_PCI_H > +#define ASM_PCI_H nit: I'd prefer _ASMARM64_PCI_H_ > +/* > + * Copyright (C) 2016, Red Hat Inc, Alexander Gordeev <agordeev@xxxxxxxxxx> > + * > + * This work is licensed under the terms of the GNU LGPL, version 2. > + */ > +#include "libcflat.h" > + > +uint8_t pci_config_readb(pcidevaddr_t dev, uint8_t reg); > +uint16_t pci_config_readw(pcidevaddr_t dev, uint8_t reg); > +uint32_t pci_config_readl(pcidevaddr_t dev, uint8_t reg); > +void pci_config_writel(pcidevaddr_t dev, uint8_t reg, uint32_t val); > + > +#define pci_xslate_addr pci_xlate_addr > +phys_addr_t pci_xlate_addr(pcidevaddr_t dev, uint64_t addr); > + > +#define pci_print_arch pci_print_arch > +void pci_print_arch(void); > + > +#define pci_probe pci_probe > +bool pci_probe(void); > + > +#define pci_shutdown pci_shutdown > +void pci_shutdown(void); > + > +#include <asm-generic/pci.h> > + > +#endif > diff --git a/lib/pci-host-generic.c b/lib/pci-host-generic.c > new file mode 100644 > index 000000000000..8ee1e35a92ca > --- /dev/null > +++ b/lib/pci-host-generic.c > @@ -0,0 +1,321 @@ > +/* > + * PCI bus operation test Probably the wrong description. It sure isn't talking about host bridges. > + * > + * Copyright (C) 2016, Red Hat Inc, Alexander Gordeev <agordeev@xxxxxxxxxx> > + * > + * This work is licensed under the terms of the GNU LGPL, version 2. > + */ > +#include "libcflat.h" > +#include "devicetree.h" > +#include "asm/io.h" > +#include "pci.h" > +#include "pci-host-generic.h" > +#include <linux/pci_regs.h> > + > +static struct pci_host_bridge *pci_host_bridge = NULL; No need for the explicit '= NULL'. > + > +static void __iomem *pci_get_dev_conf(struct pci_host_bridge *host, int dev) > +{ > + return (void __iomem *)host->start + dev * (1 << PCI_ECAM_DEVICE_SHIFT); Let's remove the multiplication: ... + (dev << PCI_ECAM_DEVICE_SHIFT) > +} > + > +static int of_flags_to_pci_type(u32 of_flags) > +{ > + static int type_map[] = { > + [1] = PCI_BASE_ADDRESS_SPACE_IO, > + [2] = PCI_BASE_ADDRESS_MEM_TYPE_32, > + [3] = PCI_BASE_ADDRESS_MEM_TYPE_64 > + }; > + int res = type_map[(of_flags >> 24) & 0x03]; > + > + if (of_flags & 0x40000000) > + res |= PCI_BASE_ADDRESS_MEM_PREFETCH; > + > + return res; > +} > + > +static void pci_host_addr_space_init(struct pci_addr_space as[], int nr_as, > + fdt32_t rcells[], int nr_rcells) This is a DT specific function so should be named as such. Actually I don't really think we need it anyway. There's only one caller, so why not just opencode it at the callsite? > +{ > + int i; > + > + /* > + * The PCI binding claims the numerical representation of a PCI > + * address consists of three cells, encoded as follows: > + * > + * phys.hi cell: npt000ss bbbbbbbb dddddfff rrrrrrrr > + * phys.mid cell: hhhhhhhh hhhhhhhh hhhhhhhh hhhhhhhh > + * phys.lo cell: llllllll llllllll llllllll llllllll > + * > + * PCI device bus address and flags are encoded into phys.high > + * PCI 64 bit address is encoded into phys.mid and phys.low > + */ > + > + for (i = 0; i < nr_as; i++, as++, rcells += nr_rcells) { > + as->type = of_flags_to_pci_type(fdt32_to_cpu(rcells[0])); > + as->pci_start = ((u64)fdt32_to_cpu(rcells[1]) << 32) | > + fdt32_to_cpu(rcells[2]); > + > + if (nr_rcells == 6) { > + as->start = fdt32_to_cpu(rcells[3]); > + as->size = ((u64)fdt32_to_cpu(rcells[4]) << 32) | > + fdt32_to_cpu(rcells[5]); > + } else { > + as->start = ((u64)fdt32_to_cpu(rcells[3]) << 32) | > + fdt32_to_cpu(rcells[4]); > + as->size = ((u64)fdt32_to_cpu(rcells[5]) << 32) | > + fdt32_to_cpu(rcells[6]); > + } > + } > +} > + > +/* > + * Probe DT for a generic PCI host controller > + * See kernel Documentation/devicetree/bindings/pci/host-generic-pci.txt > + * and function gen_pci_probe() in drivers/pci/host/pci-host-generic.c > + */ > +static struct pci_host_bridge *pci_dt_probe(void) > +{ > + struct pci_host_bridge *host; > + const void *fdt = dt_fdt(); > + const struct fdt_property *prop; > + struct dt_pbus_reg base; > + struct dt_device dt_dev; > + struct dt_bus dt_bus; > + u32 bus, bus_max; > + u32 nac, nsc, nac_root, nsc_root; > + u32 nr_range_cells, nr_addr_spaces; > + int ret, node, len; > + > + if (!dt_available()) { > + printf("No device tree found"); > + return NULL; > + } > + > + dt_bus_init_defaults(&dt_bus); > + dt_device_init(&dt_dev, &dt_bus, NULL); > + > + node = fdt_path_offset(fdt, "/"); > + assert(node >= 0); > + > + ret = dt_get_nr_cells(node, &nac_root, &nsc_root); > + assert(ret == 0); > + assert(nac_root == 1 || nac_root == 2); > + > + node = fdt_subnode_offset(fdt, node, "pcie"); > + if (node == -FDT_ERR_NOTFOUND) { > + printf("No PCIe controller found"); missing '\n' > + return NULL; > + } > + assert(node >= 0); > + > + prop = fdt_get_property(fdt, node, "device_type", &len); > + if (!prop || len != 4 || strcmp((char*)prop->data, "pci")) { > + printf("PCIe controller device_type is not \"pci\""); > + return NULL; Documentation/devicetree/bindings/pci/host-generic-pci.txt says that device_type must be "pci", so this is an assert() case, not a return NULL case. > + } > + > + ret = fdt_node_check_compatible(fdt, node, "pci-host-ecam-generic"); This check should be done before the device_type property check. Actually, we might as well skip the fdt_subnode_offset(fdt, node, "pcie") search, and just search by this compatibility string, i.e. use fdt_node_offset_by_compatible() instead. If it's not found, return NULL, if it is found, but device_type != pci, then assert. Not your fault. The code I gave you for this function had it just like this, but now on review, it looks like there's room for improvement :-) > + assert(ret >= 0); > + if (ret != 0) { > + printf("PCIe controller is not ECAM compatible"); > + return NULL; > + } > + > + dt_device_bind_node(&dt_dev, node); > + ret = dt_pbus_get_base(&dt_dev, &base); > + assert(ret == 0); > + > + prop = fdt_get_property(fdt, node, "bus-range", &len); > + if (prop == NULL) { > + assert(len == -FDT_ERR_NOTFOUND); > + bus = 0x00; > + bus_max = 0xff; > + } else { > + fdt32_t *data = (fdt32_t*)prop->data; > + bus = fdt32_to_cpu(data[0]); > + bus_max = fdt32_to_cpu(data[1]); > + assert(bus <= bus_max); > + } > + assert(bus_max < base.size / (1 << PCI_ECAM_BUS_SHIFT)); > + > + ret = dt_get_nr_cells(node, &nac, &nsc); > + assert(ret == 0); > + assert(nac == 3 && nsc == 2); > + > + prop = fdt_get_property(fdt, node, "ranges", &len); > + assert(prop != NULL); > + > + nr_range_cells = nac + nsc + nac_root; > + nr_addr_spaces = (len / 4) / nr_range_cells; > + > + host = malloc(sizeof(*host) + > + sizeof(host->addr_space[0]) * nr_addr_spaces); > + assert(host != NULL); > + > + host->start = base.addr; > + host->size = base.size; > + host->bus = bus; > + host->bus_max = bus_max; > + host->nr_addr_spaces = nr_addr_spaces; > + > + pci_host_addr_space_init(&host->addr_space[0], nr_addr_spaces, > + (fdt32_t*)prop->data, nr_range_cells); > + > + return host; > +} > + > +static u64 pci_alloc_res(struct pci_host_bridge *host, int type, u64 size) No reason to take 'host' as an argument. It's global and this function is not. > +{ > + struct pci_addr_space *as = &host->addr_space[0]; > + phys_addr_t addr, mask; > + int i; > + > + for (i = 0; i < host->nr_addr_spaces; i++, as++) { > + if (as->type == type) > + break; > + } > + assert(i < host->nr_addr_spaces); I think this failure may justify an error message. > + > + if (type & PCI_BASE_ADDRESS_SPACE_IO) > + mask = PCI_BASE_ADDRESS_IO_MASK; > + else > + mask = PCI_BASE_ADDRESS_MEM_MASK; > + > + size = ALIGN(size, mask); > + assert(as->free + size <= as->size); > + > + addr = as->pci_start + as->free; > + as->free += size; Hmm, the name 'free' made me think it was pointing at the start of free memory, but it's just an offset. Maybe a name like free_offset would be better? Or initialize it to as->pci_start and use it as a free pointer? Whatever though, not a big deal. > + > + return addr; > +} > + > +static void pci_bus_scan(struct pci_host_bridge *host) Same as above. No need for host to be a parameter. > +{ > + u32 cmd = PCI_COMMAND_SERR; > + int dev; > + int bar; > + > + for (dev = 0; dev < 256; dev++) { > + void __iomem *conf = pci_get_dev_conf(host, dev); > + > + /* We are only interested in normal PCI devices */ > + if (readb(conf + PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) > + continue; you need to initialize cmd to PCI_COMMAND_SERR here, not above > + > + for (bar = 0; bar < 6; bar++) { > + u64 addr, size; > + u32 type; > + > + size = pci_bar_size(dev, bar); > + if (!size) > + break; > + > + type = readl(conf + PCI_BASE_ADDRESS_0 + (bar * 4)); > + addr = pci_alloc_res(host, type, size); > + pci_bar_set(dev, bar, addr); > + > + if (pci_bar_is_memory(dev, bar)) > + cmd |= PCI_COMMAND_MEMORY; > + else > + cmd |= PCI_COMMAND_IO; > + > + if (pci_bar_is64(dev, bar)) > + bar++; > + } > + > + writel(cmd, conf + PCI_COMMAND); > + } > +} > + > +bool pci_probe(void) > +{ > + assert(!pci_host_bridge); > + pci_host_bridge = pci_dt_probe(); > + if (!pci_host_bridge) > + return false; > + > + pci_bus_scan(pci_host_bridge); > + > + return true; > +} > + > +void pci_shutdown() > +{ > + int dev; > + > + assert(pci_host_bridge); Instead of an assert I think if (!pci_host_bridge) return; is ok here. > + > + for (dev = 0; dev < 256; dev++) { > + void __iomem *conf = pci_get_dev_conf(pci_host_bridge, dev); > + if (readw(conf + PCI_VENDOR_ID) == (u16)~0 && > + readw(conf + PCI_DEVICE_ID) == (u16)~0) here's where that dev-valid function would be nice again > + continue; > + writel(PCI_COMMAND_INTX_DISABLE, conf + PCI_COMMAND); > + } > + > + free(pci_host_bridge); > + pci_host_bridge = NULL; > +} > + > +void pci_print_arch() There's nothing arch-specific about this function. It should just be named pci_host_bridge_print() > +{ > + struct pci_host_bridge *host; > + struct pci_addr_space *as; > + char desc[8]; > + int i; > + > + assert(pci_host_bridge); > + host = pci_host_bridge; > + as = &host->addr_space[0]; > + > + printf("PCIe start %" PRIx64 " size %" PRIx64 " " > + "bus %02x bus_max %02x #spaces %d\n\n", > + host->start, host->size, > + host->bus, host->bus_max, host->nr_addr_spaces); > + > + for (i = 0; i < host->nr_addr_spaces; i++, as++) { > + pci_type_desc(as->type, desc, sizeof(desc)); > + printf("%s address space:\n" > + "CPU start %" PRIx64 " " > + "PCI start %" PRIx64 " size %" PRIx64 "\n\n", > + desc, as->start, as->pci_start, as->size); > + } > +} > + > +phys_addr_t pci_xslate_addr(pcidevaddr_t __unused dev, u64 pci_addr) This is pci_host_bridge_get_paddr(), or some such, also not arch specific, but rather host bridge specific. > +{ > + struct pci_host_bridge *host = pci_host_bridge; > + struct pci_addr_space *as = &host->addr_space[0]; > + int i; > + > + for (i = 0; i < host->nr_addr_spaces; i++, as++) { > + if (pci_addr >= as->pci_start && > + pci_addr < as->pci_start + as->size) { > + return as->start + (pci_addr - as->pci_start); > + } > + } > + > + return 0; > +} > + > +#define PCI_OP_READ(type, op) \ > +type pci_config_##op(pcidevaddr_t dev, u8 off) \ > +{ \ > + void __iomem *conf = pci_get_dev_conf(pci_host_bridge, dev); \ > + return op(conf + off); \ > +} > + > +#define PCI_OP_WRITE(type, op) \ > +void pci_config_##op(pcidevaddr_t dev, u8 off, type val) \ > +{ \ > + void __iomem *conf = pci_get_dev_conf(pci_host_bridge, dev); \ > + op(val, conf + off); \ > +} > + > +PCI_OP_READ(u8, readb) > +PCI_OP_READ(u16, readw) > +PCI_OP_READ(u32, readl) > +PCI_OP_WRITE(u32, writel) OK, but I'm not a big fan of function generation like this. If there's tons to do, it makes sense, but for a handful, I prefer duplication. The reason I don't like it is that you can't grep/cscope for the function name. > + > diff --git a/lib/pci-host-generic.h b/lib/pci-host-generic.h > new file mode 100644 > index 000000000000..6b536c2de9fc > --- /dev/null > +++ b/lib/pci-host-generic.h > @@ -0,0 +1,57 @@ > +#ifndef PCI_HOST_GENERIC_H > +#define PCI_HOST_GENERIC_H > +/* > + * PCI host bridge supporting structures and constants > + * > + * Copyright (C) 2016, Red Hat Inc, Alexander Gordeev <agordeev@xxxxxxxxxx> > + * > + * This work is licensed under the terms of the GNU LGPL, version 2. > + */ > +#include "libcflat.h" > + > +struct pci_addr_space { > + phys_addr_t pci_start; > + phys_addr_t start; > + phys_addr_t size; > + phys_addr_t free; > + int type; > +}; > + > +struct pci_host_bridge { > + phys_addr_t start; > + phys_addr_t size; > + int bus; > + int bus_max; > + int nr_addr_spaces; > + struct pci_addr_space addr_space[]; > +}; > + > +/* > + * The following constants are derived from Linux, see this source: > + * > + * drivers/pci/host/pci-host-generic.c > + * struct gen_pci_cfg_bus_ops::bus_shift > + * int gen_pci_parse_map_cfg_windows(struct gen_pci *pci) > + * > + * Documentation/devicetree/bindings/pci/host-generic-pci.txt excerpt: > + * > + * Configuration Space is assumed to be memory-mapped (as opposed to being > + * accessed via an ioport) and laid out with a direct correspondence to the > + * geography of a PCI bus address by concatenating the various components to > + * form an offset. > + * > + * For CAM, this 24-bit offset is: > + * > + * cfg_offset(bus, device, function, register) = > + * bus << 16 | device << 11 | function << 8 | register > + * > + * Whilst ECAM extends this by 4 bits to accommodate 4k of function space: > + * > + * cfg_offset(bus, device, function, register) = > + * bus << 20 | device << 15 | function << 12 | register > + * > + */ I like documentation, but the above could probably be condensed. > +#define PCI_ECAM_BUS_SHIFT 20 > +#define PCI_ECAM_DEVICE_SHIFT 12 Hmm, so you've named the function shift PCI_ECAM_*DEVICE*_SHIFT? Linux's gen_pci_map_cfg_bus_ecam() shifts by 12, like you do in pci_get_dev_conf(), but it doesn't shift something called dev, like you do, it shifts something called 'devfn'. Thanks, drew > + > +#endif > -- > 1.8.3.1 > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html