Re: [PATCH kernel 2/5] powerpc/powernv/npu: Collect all static symbols under one struct

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Oct 15, 2018 at 08:32:58PM +1100, Alexey Kardashevskiy wrote:
> We are going to add a global list of NPUs in the system which is going
> to be yet another static symbol. Let's reorganise the code and put all
> static symbols into one struct for better tracking what is really needed
> for NPU (this might become a driver data some day).
> 
> Signed-off-by: Alexey Kardashevskiy <aik@xxxxxxxxx>

I'm not entirely convinced this is worthwhile, but maybe it'll become
clearer with the later patches.  There are some nits though.

> ---
>  arch/powerpc/include/asm/pci.h            |  1 +
>  arch/powerpc/platforms/powernv/npu-dma.c  | 77 ++++++++++++++++++-------------
>  arch/powerpc/platforms/powernv/pci-ioda.c |  2 +
>  3 files changed, 47 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
> index 2af9ded..1a96075 100644
> --- a/arch/powerpc/include/asm/pci.h
> +++ b/arch/powerpc/include/asm/pci.h
> @@ -129,5 +129,6 @@ extern void pcibios_scan_phb(struct pci_controller *hose);
>  
>  extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev);
>  extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index);
> +extern void pnv_npu2_devices_init(void);
>  
>  #endif /* __ASM_POWERPC_PCI_H */
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
> index 13e5153..01402f9 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -22,20 +22,6 @@
>  #include "pci.h"
>  
>  /*
> - * spinlock to protect initialisation of an npu_context for a particular
> - * mm_struct.
> - */
> -static DEFINE_SPINLOCK(npu_context_lock);
> -
> -/*
> - * When an address shootdown range exceeds this threshold we invalidate the
> - * entire TLB on the GPU for the given PID rather than each specific address in
> - * the range.
> - */
> -static uint64_t atsd_threshold = 2 * 1024 * 1024;
> -static struct dentry *atsd_threshold_dentry;
> -
> -/*
>   * Other types of TCE cache invalidation are not functional in the
>   * hardware.
>   */
> @@ -392,6 +378,33 @@ struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
>  /*
>   * NPU2 ATS
>   */
> +static struct {
> +	/*
> +	 * spinlock to protect initialisation of an npu_context for
> +	 * a particular mm_struct.
> +	 */
> +	spinlock_t context_lock;
> +
> +	/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */
> +	int max_index;
> +
> +	/*
> +	 * When an address shootdown range exceeds this threshold we invalidate the
> +	 * entire TLB on the GPU for the given PID rather than each specific address in
> +	 * the range.
> +	 */
> +	uint64_t atsd_threshold;
> +	struct dentry *atsd_threshold_dentry;
> +
> +} npu2_devices;

Even as a structu, it should be possible to statically initialize this.

> +
> +void pnv_npu2_devices_init(void)
> +{
> +	memset(&npu2_devices, 0, sizeof(npu2_devices));

The memset() is unnecessary.  The static structure lives in the BSS,
which means it is already initialized to zeroes.

> +	spin_lock_init(&npu2_devices.context_lock);
> +	npu2_devices.atsd_threshold = 2 * 1024 * 1024;
> +}
> +
>  static struct npu *npdev_to_npu(struct pci_dev *npdev)
>  {
>  	struct pnv_phb *nphb;
> @@ -404,9 +417,6 @@ static struct npu *npdev_to_npu(struct pci_dev *npdev)
>  /* Maximum number of nvlinks per npu */
>  #define NV_MAX_LINKS 6
>  
> -/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */
> -static int max_npu2_index;
> -
>  struct npu_context {
>  	struct mm_struct *mm;
>  	struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS];
> @@ -472,7 +482,7 @@ static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
>  	int i;
>  	unsigned long launch;
>  
> -	for (i = 0; i <= max_npu2_index; i++) {
> +	for (i = 0; i <= npu2_devices.max_index; i++) {
>  		if (mmio_atsd_reg[i].reg < 0)
>  			continue;
>  
> @@ -503,7 +513,7 @@ static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
>  	int i;
>  	unsigned long launch;
>  
> -	for (i = 0; i <= max_npu2_index; i++) {
> +	for (i = 0; i <= npu2_devices.max_index; i++) {
>  		if (mmio_atsd_reg[i].reg < 0)
>  			continue;
>  
> @@ -536,7 +546,7 @@ static void mmio_invalidate_wait(
>  	int i, reg;
>  
>  	/* Wait for all invalidations to complete */
> -	for (i = 0; i <= max_npu2_index; i++) {
> +	for (i = 0; i <= npu2_devices.max_index; i++) {
>  		if (mmio_atsd_reg[i].reg < 0)
>  			continue;
>  
> @@ -559,7 +569,7 @@ static void acquire_atsd_reg(struct npu_context *npu_context,
>  	struct npu *npu;
>  	struct pci_dev *npdev;
>  
> -	for (i = 0; i <= max_npu2_index; i++) {
> +	for (i = 0; i <= npu2_devices.max_index; i++) {
>  		mmio_atsd_reg[i].reg = -1;
>  		for (j = 0; j < NV_MAX_LINKS; j++) {
>  			/*
> @@ -593,7 +603,7 @@ static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
>  {
>  	int i;
>  
> -	for (i = 0; i <= max_npu2_index; i++) {
> +	for (i = 0; i <= npu2_devices.max_index; i++) {
>  		/*
>  		 * We can't rely on npu_context->npdev[][] being the same here
>  		 * as when acquire_atsd_reg() was called, hence we use the
> @@ -683,7 +693,7 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
>  	struct npu_context *npu_context = mn_to_npu_context(mn);
>  	unsigned long address;
>  
> -	if (end - start > atsd_threshold) {
> +	if (end - start > npu2_devices.atsd_threshold) {
>  		/*
>  		 * Just invalidate the entire PID if the address range is too
>  		 * large.
> @@ -777,12 +787,12 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>  	 * We store the npu pci device so we can more easily get at the
>  	 * associated npus.
>  	 */
> -	spin_lock(&npu_context_lock);
> +	spin_lock(&npu2_devices.context_lock);
>  	npu_context = mm->context.npu_context;
>  	if (npu_context) {
>  		if (npu_context->release_cb != cb ||
>  			npu_context->priv != priv) {
> -			spin_unlock(&npu_context_lock);
> +			spin_unlock(&npu2_devices.context_lock);
>  			opal_npu_destroy_context(nphb->opal_id, mm->context.id,
>  						PCI_DEVID(gpdev->bus->number,
>  							gpdev->devfn));
> @@ -791,12 +801,12 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>  
>  		WARN_ON(!kref_get_unless_zero(&npu_context->kref));
>  	}
> -	spin_unlock(&npu_context_lock);
> +	spin_unlock(&npu2_devices.context_lock);
>  
>  	if (!npu_context) {
>  		/*
>  		 * We can set up these fields without holding the
> -		 * npu_context_lock as the npu_context hasn't been returned to
> +		 * npu2_devices.context_lock as the npu_context hasn't been returned to
>  		 * the caller meaning it can't be destroyed. Parallel allocation
>  		 * is protected against by mmap_sem.
>  		 */
> @@ -887,9 +897,9 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
>  	WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
>  	opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
>  				PCI_DEVID(gpdev->bus->number, gpdev->devfn));
> -	spin_lock(&npu_context_lock);
> +	spin_lock(&npu2_devices.context_lock);
>  	removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
> -	spin_unlock(&npu_context_lock);
> +	spin_unlock(&npu2_devices.context_lock);
>  
>  	/*
>  	 * We need to do this outside of pnv_npu2_release_context so that it is
> @@ -958,9 +968,10 @@ int pnv_npu2_init(struct pnv_phb *phb)
>  	static int npu_index;
>  	uint64_t rc = 0;
>  
> -	if (!atsd_threshold_dentry) {
> -		atsd_threshold_dentry = debugfs_create_x64("atsd_threshold",
> -				   0600, powerpc_debugfs_root, &atsd_threshold);
> +	if (!npu2_devices.atsd_threshold_dentry) {
> +		npu2_devices.atsd_threshold_dentry = debugfs_create_x64(
> +				"atsd_threshold", 0600, powerpc_debugfs_root,
> +				&npu2_devices.atsd_threshold);
>  	}
>  
>  	phb->npu.nmmu_flush =
> @@ -988,7 +999,7 @@ int pnv_npu2_init(struct pnv_phb *phb)
>  	npu_index++;
>  	if (WARN_ON(npu_index >= NV_MAX_NPUS))
>  		return -ENOSPC;
> -	max_npu2_index = npu_index;
> +	npu2_devices.max_index = npu_index;
>  	phb->npu.index = npu_index;
>  
>  	return 0;
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index e37b9cc..0cc81c0 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -1279,6 +1279,8 @@ static void pnv_pci_ioda_setup_PEs(void)
>  	struct pci_bus *bus;
>  	struct pci_dev *pdev;
>  
> +	pnv_npu2_devices_init();
> +
>  	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
>  		phb = hose->private_data;
>  		if (phb->type == PNV_PHB_NPU_NVLINK) {

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [KVM Development]     [KVM ARM]     [KVM ia64]     [Linux Virtualization]     [Linux USB Devel]     [Linux Video]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Big List of Linux Books]

  Powered by Linux