On Mon, Oct 15, 2018 at 08:32:58PM +1100, Alexey Kardashevskiy wrote: > We are going to add a global list of NPUs in the system which is going > to be yet another static symbol. Let's reorganise the code and put all > static symbols into one struct for better tracking what is really needed > for NPU (this might become a driver data some day). > > Signed-off-by: Alexey Kardashevskiy <aik@xxxxxxxxx> I'm not entirely convinced this is worthwhile, but maybe it'll become clearer with the later patches. There are some nits though. > --- > arch/powerpc/include/asm/pci.h | 1 + > arch/powerpc/platforms/powernv/npu-dma.c | 77 ++++++++++++++++++------------- > arch/powerpc/platforms/powernv/pci-ioda.c | 2 + > 3 files changed, 47 insertions(+), 33 deletions(-) > > diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h > index 2af9ded..1a96075 100644 > --- a/arch/powerpc/include/asm/pci.h > +++ b/arch/powerpc/include/asm/pci.h > @@ -129,5 +129,6 @@ extern void pcibios_scan_phb(struct pci_controller *hose); > > extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev); > extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index); > +extern void pnv_npu2_devices_init(void); > > #endif /* __ASM_POWERPC_PCI_H */ > diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c > index 13e5153..01402f9 100644 > --- a/arch/powerpc/platforms/powernv/npu-dma.c > +++ b/arch/powerpc/platforms/powernv/npu-dma.c > @@ -22,20 +22,6 @@ > #include "pci.h" > > /* > - * spinlock to protect initialisation of an npu_context for a particular > - * mm_struct. > - */ > -static DEFINE_SPINLOCK(npu_context_lock); > - > -/* > - * When an address shootdown range exceeds this threshold we invalidate the > - * entire TLB on the GPU for the given PID rather than each specific address in > - * the range. > - */ > -static uint64_t atsd_threshold = 2 * 1024 * 1024; > -static struct dentry *atsd_threshold_dentry; > - > -/* > * Other types of TCE cache invalidation are not functional in the > * hardware. > */ > @@ -392,6 +378,33 @@ struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe) > /* > * NPU2 ATS > */ > +static struct { > + /* > + * spinlock to protect initialisation of an npu_context for > + * a particular mm_struct. > + */ > + spinlock_t context_lock; > + > + /* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */ > + int max_index; > + > + /* > + * When an address shootdown range exceeds this threshold we invalidate the > + * entire TLB on the GPU for the given PID rather than each specific address in > + * the range. > + */ > + uint64_t atsd_threshold; > + struct dentry *atsd_threshold_dentry; > + > +} npu2_devices; Even as a structu, it should be possible to statically initialize this. > + > +void pnv_npu2_devices_init(void) > +{ > + memset(&npu2_devices, 0, sizeof(npu2_devices)); The memset() is unnecessary. The static structure lives in the BSS, which means it is already initialized to zeroes. > + spin_lock_init(&npu2_devices.context_lock); > + npu2_devices.atsd_threshold = 2 * 1024 * 1024; > +} > + > static struct npu *npdev_to_npu(struct pci_dev *npdev) > { > struct pnv_phb *nphb; > @@ -404,9 +417,6 @@ static struct npu *npdev_to_npu(struct pci_dev *npdev) > /* Maximum number of nvlinks per npu */ > #define NV_MAX_LINKS 6 > > -/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */ > -static int max_npu2_index; > - > struct npu_context { > struct mm_struct *mm; > struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS]; > @@ -472,7 +482,7 @@ static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], > int i; > unsigned long launch; > > - for (i = 0; i <= max_npu2_index; i++) { > + for (i = 0; i <= npu2_devices.max_index; i++) { > if (mmio_atsd_reg[i].reg < 0) > continue; > > @@ -503,7 +513,7 @@ static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], > int i; > unsigned long launch; > > - for (i = 0; i <= max_npu2_index; i++) { > + for (i = 0; i <= npu2_devices.max_index; i++) { > if (mmio_atsd_reg[i].reg < 0) > continue; > > @@ -536,7 +546,7 @@ static void mmio_invalidate_wait( > int i, reg; > > /* Wait for all invalidations to complete */ > - for (i = 0; i <= max_npu2_index; i++) { > + for (i = 0; i <= npu2_devices.max_index; i++) { > if (mmio_atsd_reg[i].reg < 0) > continue; > > @@ -559,7 +569,7 @@ static void acquire_atsd_reg(struct npu_context *npu_context, > struct npu *npu; > struct pci_dev *npdev; > > - for (i = 0; i <= max_npu2_index; i++) { > + for (i = 0; i <= npu2_devices.max_index; i++) { > mmio_atsd_reg[i].reg = -1; > for (j = 0; j < NV_MAX_LINKS; j++) { > /* > @@ -593,7 +603,7 @@ static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) > { > int i; > > - for (i = 0; i <= max_npu2_index; i++) { > + for (i = 0; i <= npu2_devices.max_index; i++) { > /* > * We can't rely on npu_context->npdev[][] being the same here > * as when acquire_atsd_reg() was called, hence we use the > @@ -683,7 +693,7 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, > struct npu_context *npu_context = mn_to_npu_context(mn); > unsigned long address; > > - if (end - start > atsd_threshold) { > + if (end - start > npu2_devices.atsd_threshold) { > /* > * Just invalidate the entire PID if the address range is too > * large. > @@ -777,12 +787,12 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, > * We store the npu pci device so we can more easily get at the > * associated npus. > */ > - spin_lock(&npu_context_lock); > + spin_lock(&npu2_devices.context_lock); > npu_context = mm->context.npu_context; > if (npu_context) { > if (npu_context->release_cb != cb || > npu_context->priv != priv) { > - spin_unlock(&npu_context_lock); > + spin_unlock(&npu2_devices.context_lock); > opal_npu_destroy_context(nphb->opal_id, mm->context.id, > PCI_DEVID(gpdev->bus->number, > gpdev->devfn)); > @@ -791,12 +801,12 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, > > WARN_ON(!kref_get_unless_zero(&npu_context->kref)); > } > - spin_unlock(&npu_context_lock); > + spin_unlock(&npu2_devices.context_lock); > > if (!npu_context) { > /* > * We can set up these fields without holding the > - * npu_context_lock as the npu_context hasn't been returned to > + * npu2_devices.context_lock as the npu_context hasn't been returned to > * the caller meaning it can't be destroyed. Parallel allocation > * is protected against by mmap_sem. > */ > @@ -887,9 +897,9 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context, > WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL); > opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id, > PCI_DEVID(gpdev->bus->number, gpdev->devfn)); > - spin_lock(&npu_context_lock); > + spin_lock(&npu2_devices.context_lock); > removed = kref_put(&npu_context->kref, pnv_npu2_release_context); > - spin_unlock(&npu_context_lock); > + spin_unlock(&npu2_devices.context_lock); > > /* > * We need to do this outside of pnv_npu2_release_context so that it is > @@ -958,9 +968,10 @@ int pnv_npu2_init(struct pnv_phb *phb) > static int npu_index; > uint64_t rc = 0; > > - if (!atsd_threshold_dentry) { > - atsd_threshold_dentry = debugfs_create_x64("atsd_threshold", > - 0600, powerpc_debugfs_root, &atsd_threshold); > + if (!npu2_devices.atsd_threshold_dentry) { > + npu2_devices.atsd_threshold_dentry = debugfs_create_x64( > + "atsd_threshold", 0600, powerpc_debugfs_root, > + &npu2_devices.atsd_threshold); > } > > phb->npu.nmmu_flush = > @@ -988,7 +999,7 @@ int pnv_npu2_init(struct pnv_phb *phb) > npu_index++; > if (WARN_ON(npu_index >= NV_MAX_NPUS)) > return -ENOSPC; > - max_npu2_index = npu_index; > + npu2_devices.max_index = npu_index; > phb->npu.index = npu_index; > > return 0; > diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c > index e37b9cc..0cc81c0 100644 > --- a/arch/powerpc/platforms/powernv/pci-ioda.c > +++ b/arch/powerpc/platforms/powernv/pci-ioda.c > @@ -1279,6 +1279,8 @@ static void pnv_pci_ioda_setup_PEs(void) > struct pci_bus *bus; > struct pci_dev *pdev; > > + pnv_npu2_devices_init(); > + > list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { > phb = hose->private_data; > if (phb->type == PNV_PHB_NPU_NVLINK) { -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
Attachment:
signature.asc
Description: PGP signature