We are going to add a global list of NPUs in the system which is going to be yet another static symbol. Let's reorganise the code and put all static symbols into one struct for better tracking what is really needed for NPU (this might become a driver data some day). Signed-off-by: Alexey Kardashevskiy <aik@xxxxxxxxx> --- arch/powerpc/include/asm/pci.h | 1 + arch/powerpc/platforms/powernv/npu-dma.c | 77 ++++++++++++++++++------------- arch/powerpc/platforms/powernv/pci-ioda.c | 2 + 3 files changed, 47 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 2af9ded..1a96075 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -129,5 +129,6 @@ extern void pcibios_scan_phb(struct pci_controller *hose); extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev); extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index); +extern void pnv_npu2_devices_init(void); #endif /* __ASM_POWERPC_PCI_H */ diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 13e5153..01402f9 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -22,20 +22,6 @@ #include "pci.h" /* - * spinlock to protect initialisation of an npu_context for a particular - * mm_struct. - */ -static DEFINE_SPINLOCK(npu_context_lock); - -/* - * When an address shootdown range exceeds this threshold we invalidate the - * entire TLB on the GPU for the given PID rather than each specific address in - * the range. - */ -static uint64_t atsd_threshold = 2 * 1024 * 1024; -static struct dentry *atsd_threshold_dentry; - -/* * Other types of TCE cache invalidation are not functional in the * hardware. */ @@ -392,6 +378,33 @@ struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe) /* * NPU2 ATS */ +static struct { + /* + * spinlock to protect initialisation of an npu_context for + * a particular mm_struct. + */ + spinlock_t context_lock; + + /* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */ + int max_index; + + /* + * When an address shootdown range exceeds this threshold we invalidate the + * entire TLB on the GPU for the given PID rather than each specific address in + * the range. + */ + uint64_t atsd_threshold; + struct dentry *atsd_threshold_dentry; + +} npu2_devices; + +void pnv_npu2_devices_init(void) +{ + memset(&npu2_devices, 0, sizeof(npu2_devices)); + spin_lock_init(&npu2_devices.context_lock); + npu2_devices.atsd_threshold = 2 * 1024 * 1024; +} + static struct npu *npdev_to_npu(struct pci_dev *npdev) { struct pnv_phb *nphb; @@ -404,9 +417,6 @@ static struct npu *npdev_to_npu(struct pci_dev *npdev) /* Maximum number of nvlinks per npu */ #define NV_MAX_LINKS 6 -/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */ -static int max_npu2_index; - struct npu_context { struct mm_struct *mm; struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS]; @@ -472,7 +482,7 @@ static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], int i; unsigned long launch; - for (i = 0; i <= max_npu2_index; i++) { + for (i = 0; i <= npu2_devices.max_index; i++) { if (mmio_atsd_reg[i].reg < 0) continue; @@ -503,7 +513,7 @@ static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], int i; unsigned long launch; - for (i = 0; i <= max_npu2_index; i++) { + for (i = 0; i <= npu2_devices.max_index; i++) { if (mmio_atsd_reg[i].reg < 0) continue; @@ -536,7 +546,7 @@ static void mmio_invalidate_wait( int i, reg; /* Wait for all invalidations to complete */ - for (i = 0; i <= max_npu2_index; i++) { + for (i = 0; i <= npu2_devices.max_index; i++) { if (mmio_atsd_reg[i].reg < 0) continue; @@ -559,7 +569,7 @@ static void acquire_atsd_reg(struct npu_context *npu_context, struct npu *npu; struct pci_dev *npdev; - for (i = 0; i <= max_npu2_index; i++) { + for (i = 0; i <= npu2_devices.max_index; i++) { mmio_atsd_reg[i].reg = -1; for (j = 0; j < NV_MAX_LINKS; j++) { /* @@ -593,7 +603,7 @@ static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) { int i; - for (i = 0; i <= max_npu2_index; i++) { + for (i = 0; i <= npu2_devices.max_index; i++) { /* * We can't rely on npu_context->npdev[][] being the same here * as when acquire_atsd_reg() was called, hence we use the @@ -683,7 +693,7 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, struct npu_context *npu_context = mn_to_npu_context(mn); unsigned long address; - if (end - start > atsd_threshold) { + if (end - start > npu2_devices.atsd_threshold) { /* * Just invalidate the entire PID if the address range is too * large. @@ -777,12 +787,12 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, * We store the npu pci device so we can more easily get at the * associated npus. */ - spin_lock(&npu_context_lock); + spin_lock(&npu2_devices.context_lock); npu_context = mm->context.npu_context; if (npu_context) { if (npu_context->release_cb != cb || npu_context->priv != priv) { - spin_unlock(&npu_context_lock); + spin_unlock(&npu2_devices.context_lock); opal_npu_destroy_context(nphb->opal_id, mm->context.id, PCI_DEVID(gpdev->bus->number, gpdev->devfn)); @@ -791,12 +801,12 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, WARN_ON(!kref_get_unless_zero(&npu_context->kref)); } - spin_unlock(&npu_context_lock); + spin_unlock(&npu2_devices.context_lock); if (!npu_context) { /* * We can set up these fields without holding the - * npu_context_lock as the npu_context hasn't been returned to + * npu2_devices.context_lock as the npu_context hasn't been returned to * the caller meaning it can't be destroyed. Parallel allocation * is protected against by mmap_sem. */ @@ -887,9 +897,9 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context, WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL); opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id, PCI_DEVID(gpdev->bus->number, gpdev->devfn)); - spin_lock(&npu_context_lock); + spin_lock(&npu2_devices.context_lock); removed = kref_put(&npu_context->kref, pnv_npu2_release_context); - spin_unlock(&npu_context_lock); + spin_unlock(&npu2_devices.context_lock); /* * We need to do this outside of pnv_npu2_release_context so that it is @@ -958,9 +968,10 @@ int pnv_npu2_init(struct pnv_phb *phb) static int npu_index; uint64_t rc = 0; - if (!atsd_threshold_dentry) { - atsd_threshold_dentry = debugfs_create_x64("atsd_threshold", - 0600, powerpc_debugfs_root, &atsd_threshold); + if (!npu2_devices.atsd_threshold_dentry) { + npu2_devices.atsd_threshold_dentry = debugfs_create_x64( + "atsd_threshold", 0600, powerpc_debugfs_root, + &npu2_devices.atsd_threshold); } phb->npu.nmmu_flush = @@ -988,7 +999,7 @@ int pnv_npu2_init(struct pnv_phb *phb) npu_index++; if (WARN_ON(npu_index >= NV_MAX_NPUS)) return -ENOSPC; - max_npu2_index = npu_index; + npu2_devices.max_index = npu_index; phb->npu.index = npu_index; return 0; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index e37b9cc..0cc81c0 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1279,6 +1279,8 @@ static void pnv_pci_ioda_setup_PEs(void) struct pci_bus *bus; struct pci_dev *pdev; + pnv_npu2_devices_init(); + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; if (phb->type == PNV_PHB_NPU_NVLINK) { -- 2.11.0