On 14/11/2018 15:28, Alistair Popple wrote: > Hi Alexey, > > On Tuesday, 13 November 2018 7:28:07 PM AEDT Alexey Kardashevskiy wrote: >> static struct npu *npdev_to_npu(struct pci_dev *npdev) >> { >> - struct pnv_phb *nphb; >> + struct pci_controller *hose = pci_bus_to_host(npdev->bus); >> + struct npu *npu; >> >> - nphb = pci_bus_to_host(npdev->bus)->private_data; >> + list_for_each_entry(npu, &npu2_devices, next) > > This is called from the ATSD path which is (or at least has been) quite a > performance critical path so searching through all the NPUs in a list may be > problematic. > > I guess currently it wont make any practical difference as we only ever have 2 > NPUs, but in future they may get divided into more logical NPUs. Would it be > possible to store a back-pointer somewhere so we can avoid the lookup? It is quite possible even now with iommu_group_get() + container_of() + iommu_group_put(), I'll try that in respin. > >> + if (hose == npu->hose) >> + return npu; >> >> - return &nphb->npu; >> + WARN_ON_ONCE(1); >> + return NULL; >> } >> >> /* Maximum number of nvlinks per npu */ >> @@ -505,6 +531,9 @@ static void acquire_atsd_reg(struct npu_context >> *npu_context, continue; >> >> npu = npdev_to_npu(npdev); >> + if (!npu) >> + continue; >> + >> mmio_atsd_reg[i].npu = npu; >> mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); >> while (mmio_atsd_reg[i].reg < 0) { >> @@ -701,6 +730,8 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev >> *gpdev, >> >> nphb = pci_bus_to_host(npdev->bus)->private_data; >> npu = npdev_to_npu(npdev); >> + if (!npu) >> + return ERR_PTR(-ENODEV); >> >> /* >> * Setup the NPU context table for a particular GPU. These need to be >> @@ -821,6 +852,8 @@ void pnv_npu2_destroy_context(struct npu_context >> *npu_context, >> >> nphb = pci_bus_to_host(npdev->bus)->private_data; >> npu = npdev_to_npu(npdev); >> + if (!npu) >> + return; >> nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); >> if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", >> &nvlink_index))) >> @@ -898,9 +931,15 @@ int pnv_npu2_init(struct pnv_phb *phb) >> struct pci_dev *gpdev; >> static int npu_index; >> uint64_t rc = 0; >> + struct pci_controller *hose = phb->hose; >> + struct npu *npu; >> + int ret; >> >> - phb->npu.nmmu_flush = >> - of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush"); >> + npu = kzalloc(sizeof(*npu), GFP_KERNEL); >> + if (!npu) >> + return -ENOMEM; >> + >> + npu->nmmu_flush = of_property_read_bool(hose->dn, "ibm,nmmu-flush"); >> for_each_child_of_node(phb->hose->dn, dn) { >> gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn)); >> if (gpdev) { >> @@ -914,18 +953,31 @@ int pnv_npu2_init(struct pnv_phb *phb) >> } >> } >> >> - for (i = 0; !of_property_read_u64_index(phb->hose->dn, "ibm,mmio-atsd", >> + for (i = 0; !of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", >> i, &mmio_atsd); i++) >> - phb->npu.mmio_atsd_regs[i] = ioremap(mmio_atsd, 32); >> + npu->mmio_atsd_regs[i] = ioremap(mmio_atsd, 32); >> >> - pr_info("NPU%lld: Found %d MMIO ATSD registers", phb->opal_id, i); >> - phb->npu.mmio_atsd_count = i; >> - phb->npu.mmio_atsd_usage = 0; >> + pr_info("NPU%d: Found %d MMIO ATSD registers", hose->global_number, i); >> + npu->mmio_atsd_count = i; >> + npu->mmio_atsd_usage = 0; >> npu_index++; >> - if (WARN_ON(npu_index >= NV_MAX_NPUS)) >> - return -ENOSPC; >> + if (WARN_ON(npu_index >= NV_MAX_NPUS)) { >> + ret = -ENOSPC; >> + goto fail_exit; >> + } >> max_npu2_index = npu_index; >> - phb->npu.index = npu_index; >> + npu->index = npu_index; >> + npu->hose = hose; >> + >> + list_add(&npu->next, &npu2_devices); > > Guess we don't need any locking here as the list gets setup once during boot > long before loading the driver and is never modified right? Correct. > > - Alistair > >> return 0; >> + >> +fail_exit: >> + for (i = 0; i < npu->mmio_atsd_count; ++i) >> + iounmap(npu->mmio_atsd_regs[i]); >> + >> + kfree(npu); >> + >> + return ret; >> } > > -- Alexey