Re: [PATCH v3 1/2] iommu/io-pgtable-arm: Add support for ARM_ADRENO_GPU_LPAE io-pgtable format

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Aug 07, 2019 at 04:21:39PM -0600, Jordan Crouse wrote:
> Add a new sub-format ARM_ADRENO_GPU_LPAE to set up TTBR0 and TTBR1 for
> use by the Adreno GPU. This will allow The GPU driver to map global
> buffers in the TTBR1 and leave the TTBR0 configured but unset and
> free to be changed dynamically by the GPU.

It would take a bit of code rework and un-static-ifying a few functions but I'm
wondering if it would be cleaner to add the Adreno GPU pagetable format in a new
file, such as io-pgtable-adreno.c. 

Jordan

> Signed-off-by: Jordan Crouse <jcrouse@xxxxxxxxxxxxxx>
> ---
> 
>  drivers/iommu/io-pgtable-arm.c | 214 ++++++++++++++++++++++++++++++++++++++---
>  drivers/iommu/io-pgtable.c     |   1 +
>  include/linux/io-pgtable.h     |   2 +
>  3 files changed, 202 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index 161a7d5..8eb0dbb 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -112,13 +112,19 @@
>  #define ARM_32_LPAE_TCR_EAE		(1 << 31)
>  #define ARM_64_LPAE_S2_TCR_RES1		(1 << 31)
>  
> +#define ARM_LPAE_TCR_EPD0		(1 << 7)
>  #define ARM_LPAE_TCR_EPD1		(1 << 23)
>  
>  #define ARM_LPAE_TCR_TG0_4K		(0 << 14)
>  #define ARM_LPAE_TCR_TG0_64K		(1 << 14)
>  #define ARM_LPAE_TCR_TG0_16K		(2 << 14)
>  
> +#define ARM_LPAE_TCR_TG1_4K		(0 << 30)
> +#define ARM_LPAE_TCR_TG1_64K		(1 << 30)
> +#define ARM_LPAE_TCR_TG1_16K		(2 << 30)
> +
>  #define ARM_LPAE_TCR_SH0_SHIFT		12
> +#define ARM_LPAE_TCR_SH1_SHIFT		28
>  #define ARM_LPAE_TCR_SH0_MASK		0x3
>  #define ARM_LPAE_TCR_SH_NS		0
>  #define ARM_LPAE_TCR_SH_OS		2
> @@ -126,6 +132,8 @@
>  
>  #define ARM_LPAE_TCR_ORGN0_SHIFT	10
>  #define ARM_LPAE_TCR_IRGN0_SHIFT	8
> +#define ARM_LPAE_TCR_ORGN1_SHIFT	26
> +#define ARM_LPAE_TCR_IRGN1_SHIFT	24
>  #define ARM_LPAE_TCR_RGN_MASK		0x3
>  #define ARM_LPAE_TCR_RGN_NC		0
>  #define ARM_LPAE_TCR_RGN_WBWA		1
> @@ -136,6 +144,7 @@
>  #define ARM_LPAE_TCR_SL0_MASK		0x3
>  
>  #define ARM_LPAE_TCR_T0SZ_SHIFT		0
> +#define ARM_LPAE_TCR_T1SZ_SHIFT		16
>  #define ARM_LPAE_TCR_SZ_MASK		0xf
>  
>  #define ARM_LPAE_TCR_PS_SHIFT		16
> @@ -152,6 +161,14 @@
>  #define ARM_LPAE_TCR_PS_48_BIT		0x5ULL
>  #define ARM_LPAE_TCR_PS_52_BIT		0x6ULL
>  
> +#define ARM_LPAE_TCR_SEP_SHIFT		47
> +#define ARM_LPAE_TCR_SEP_31		(0x0ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +#define ARM_LPAE_TCR_SEP_35		(0x1ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +#define ARM_LPAE_TCR_SEP_39		(0x2ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +#define ARM_LPAE_TCR_SEP_41		(0x3ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +#define ARM_LPAE_TCR_SEP_43		(0x4ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +#define ARM_LPAE_TCR_SEP_UPSTREAM	(0x7ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +
>  #define ARM_LPAE_MAIR_ATTR_SHIFT(n)	((n) << 3)
>  #define ARM_LPAE_MAIR_ATTR_MASK		0xff
>  #define ARM_LPAE_MAIR_ATTR_DEVICE	0x04
> @@ -426,7 +443,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
>  	arm_lpae_iopte pte;
>  
>  	if (data->iop.fmt == ARM_64_LPAE_S1 ||
> -	    data->iop.fmt == ARM_32_LPAE_S1) {
> +	    data->iop.fmt == ARM_32_LPAE_S1 ||
> +	    data->iop.fmt == ARM_ADRENO_GPU_LPAE) {
>  		pte = ARM_LPAE_PTE_nG;
>  		if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
>  			pte |= ARM_LPAE_PTE_AP_RDONLY;
> @@ -497,6 +515,21 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
>  	return ret;
>  }
>  
> +static int arm_adreno_gpu_lpae_map(struct io_pgtable_ops *ops,
> +		unsigned long iova, phys_addr_t paddr, size_t size,
> +		int iommu_prot)
> +{
> +	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> +	unsigned long mask = 1UL << data->iop.cfg.ias;
> +
> +	/* This configuration expects all iova addresses to be in TTBR1 */
> +	if (WARN_ON(iova & mask))
> +		return -ERANGE;
> +
> +	/* Mask off the sign extended bits and map as usual */
> +	return arm_lpae_map(ops, iova & (mask - 1), paddr, size, iommu_prot);
> +}
> +
>  static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
>  				    arm_lpae_iopte *ptep)
>  {
> @@ -643,6 +676,22 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
>  	return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep);
>  }
>  
> +static size_t arm_adreno_gpu_lpae_unmap(struct io_pgtable_ops *ops,
> +				   unsigned long iova, size_t size)
> +{
> +	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> +	arm_lpae_iopte *ptep = data->pgd;
> +	int lvl = ARM_LPAE_START_LVL(data);
> +	unsigned long mask = 1UL << data->iop.cfg.ias;
> +
> +	/* Make sure the sign extend bit is set in the iova */
> +	if (WARN_ON(!(iova & mask)))
> +		return 0;
> +
> +	/* Mask off the sign extended bits before unmapping */
> +	return __arm_lpae_unmap(data, iova & (mask - 1), size, lvl, ptep);
> +}
> +
>  static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
>  			     size_t size)
>  {
> @@ -692,6 +741,17 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
>  	return iopte_to_paddr(pte, data) | iova;
>  }
>  
> +
> +static phys_addr_t arm_adreno_gpu_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> +					       unsigned long iova)
> +{
> +	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> +	unsigned long mask = 1UL << data->iop.cfg.ias;
> +
> +	/* Mask off the sign extended bits before translating */
> +	return arm_lpae_iova_to_phys(ops, iova & (mask - 1));
> +}
> +
>  static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
>  {
>  	unsigned long granule, page_sizes;
> @@ -771,17 +831,16 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
>  	pgd_bits = va_bits - (data->bits_per_level * (data->levels - 1));
>  	data->pgd_size = 1UL << (pgd_bits + ilog2(sizeof(arm_lpae_iopte)));
>  
> -	data->iop.ops = (struct io_pgtable_ops) {
> -		.map		= arm_lpae_map,
> -		.unmap		= arm_lpae_unmap,
> -		.iova_to_phys	= arm_lpae_iova_to_phys,
> -	};
>  
>  	return data;
>  }
>  
> -static struct io_pgtable *
> -arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
> +/*
> + * Common allocation function for S1 pagetables.  Set up the TTBR0 region and
> + * allocate a default pagetable
> + */
> +static struct arm_lpae_io_pgtable *
> +_arm_64_lpae_alloc_pgtable_s1_common(struct io_pgtable_cfg *cfg)
>  {
>  	u64 reg;
>  	struct arm_lpae_io_pgtable *data;
> @@ -845,8 +904,6 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
>  
>  	reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT;
>  
> -	/* Disable speculative walks through TTBR1 */
> -	reg |= ARM_LPAE_TCR_EPD1;
>  	cfg->arm_lpae_s1_cfg.tcr = reg;
>  
>  	/* MAIRs */
> @@ -870,16 +927,131 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
>  	/* Ensure the empty pgd is visible before any actual TTBR write */
>  	wmb();
>  
> -	/* TTBRs */
> -	cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd);
> -	cfg->arm_lpae_s1_cfg.ttbr[1] = 0;
> -	return &data->iop;
> -
> +	return data;
>  out_free_data:
>  	kfree(data);
>  	return NULL;
>  }
>  
> +
> +static struct io_pgtable *
> +arm_adreno_gpu_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
> +{
> +	struct arm_lpae_io_pgtable *data;
> +	u64 reg;
> +
> +	/*
> +	 * Make sure the ias aligns with the available options for the sign
> +	 * extension bit
> +	 */
> +	switch (cfg->ias) {
> +	case 32:
> +	case 36:
> +	case 40:
> +	case 42:
> +	case 44:
> +		/*
> +		 * The SEP will be the highest available bit so adjust the data
> +		 * size by one to accommodate it
> +		 */
> +		cfg->ias--;
> +		break;
> +	case 48:
> +		/*
> +		 * IAS of 48 is a special case, it has a dedicated sign
> +		 * extension bit so we can use the full IAS size
> +		 */
> +		break;
> +	default:
> +		/* The ias doesn't work for the available SEP options */
> +		return NULL;
> +	}
> +
> +	data = _arm_64_lpae_alloc_pgtable_s1_common(cfg);
> +	if (!data)
> +		return NULL;
> +
> +	reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH1_SHIFT) |
> +	      (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN1_SHIFT) |
> +	      (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN1_SHIFT);
> +
> +	switch (ARM_LPAE_GRANULE(data)) {
> +	case SZ_4K:
> +		reg |= ARM_LPAE_TCR_TG1_4K;
> +		break;
> +	case SZ_16K:
> +		reg |= ARM_LPAE_TCR_TG1_16K;
> +		break;
> +	case SZ_64K:
> +		reg |= ARM_LPAE_TCR_TG1_64K;
> +		break;
> +	}
> +
> +	reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T1SZ_SHIFT;
> +
> +	/* Set the sign extension bit */
> +	switch (cfg->ias) {
> +	case 31:
> +		reg |= ARM_LPAE_TCR_SEP_31;
> +		break;
> +	case 35:
> +		reg |= ARM_LPAE_TCR_SEP_35;
> +		break;
> +	case 39:
> +		reg |= ARM_LPAE_TCR_SEP_39;
> +		break;
> +	case 41:
> +		reg |= ARM_LPAE_TCR_SEP_41;
> +		break;
> +	case 43:
> +		reg |= ARM_LPAE_TCR_SEP_43;
> +		break;
> +	case 48:
> +		reg |= ARM_LPAE_TCR_SEP_UPSTREAM;
> +		break;
> +	}
> +
> +	cfg->arm_lpae_s1_cfg.tcr |= reg;
> +
> +	/* Set the allocated pgd to ttbr1 and leave ttbr0 empty */
> +	cfg->arm_lpae_s1_cfg.ttbr[0] = 0;
> +	cfg->arm_lpae_s1_cfg.ttbr[1] = virt_to_phys(data->pgd);
> +
> +	/* Set use case specific pgtable helpers */
> +	data->iop.ops = (struct io_pgtable_ops) {
> +		.map		= arm_adreno_gpu_lpae_map,
> +		.unmap		= arm_adreno_gpu_lpae_unmap,
> +		.iova_to_phys	= arm_adreno_gpu_lpae_iova_to_phys,
> +	};
> +
> +	return &data->iop;
> +}
> +
> +static struct io_pgtable *
> +arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
> +{
> +	struct arm_lpae_io_pgtable *data;
> +
> +	data = _arm_64_lpae_alloc_pgtable_s1_common(cfg);
> +	if (!data)
> +		return NULL;
> +
> +	/* Disable speculative walks through TTBR1 */
> +	cfg->arm_lpae_s1_cfg.tcr |= ARM_LPAE_TCR_EPD1;
> +
> +	/* Set the pgd to TTBR0 */
> +	cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd);
> +	cfg->arm_lpae_s1_cfg.ttbr[1] = 0;
> +
> +	data->iop.ops = (struct io_pgtable_ops) {
> +		.map		= arm_lpae_map,
> +		.unmap		= arm_lpae_unmap,
> +		.iova_to_phys	= arm_lpae_iova_to_phys,
> +	};
> +
> +	return &data->iop;
> +}
> +
>  static struct io_pgtable *
>  arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
>  {
> @@ -894,6 +1066,12 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
>  	if (!data)
>  		return NULL;
>  
> +	data->iop.ops = (struct io_pgtable_ops) {
> +		.map		= arm_lpae_map,
> +		.unmap		= arm_lpae_unmap,
> +		.iova_to_phys	= arm_lpae_iova_to_phys,
> +	};
> +
>  	/*
>  	 * Concatenate PGDs at level 1 if possible in order to reduce
>  	 * the depth of the stage-2 walk.
> @@ -1041,6 +1219,11 @@ struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
>  	.free	= arm_lpae_free_pgtable,
>  };
>  
> +struct io_pgtable_init_fns io_pgtable_arm_adreno_gpu_lpae_init_fns = {
> +	.alloc	= arm_adreno_gpu_lpae_alloc_pgtable,
> +	.free	= arm_lpae_free_pgtable,
> +};
> +
>  struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = {
>  	.alloc	= arm_64_lpae_alloc_pgtable_s2,
>  	.free	= arm_lpae_free_pgtable,
> @@ -1112,6 +1295,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
>  	static const enum io_pgtable_fmt fmts[] = {
>  		ARM_64_LPAE_S1,
>  		ARM_64_LPAE_S2,
> +		ARM_64_LPAE_TTBR1_S1,
>  	};
>  
>  	int i, j;
> diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
> index ced53e5..e47ed2d 100644
> --- a/drivers/iommu/io-pgtable.c
> +++ b/drivers/iommu/io-pgtable.c
> @@ -20,6 +20,7 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
>  	[ARM_64_LPAE_S1] = &io_pgtable_arm_64_lpae_s1_init_fns,
>  	[ARM_64_LPAE_S2] = &io_pgtable_arm_64_lpae_s2_init_fns,
>  	[ARM_MALI_LPAE] = &io_pgtable_arm_mali_lpae_init_fns,
> +	[ARM_ADRENO_GPU_LPAE] = &io_pgtable_arm_adreno_gpu_lpae_init_fns,
>  #endif
>  #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S
>  	[ARM_V7S] = &io_pgtable_arm_v7s_init_fns,
> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
> index b5a450a..4871e85 100644
> --- a/include/linux/io-pgtable.h
> +++ b/include/linux/io-pgtable.h
> @@ -13,6 +13,7 @@ enum io_pgtable_fmt {
>  	ARM_64_LPAE_S2,
>  	ARM_V7S,
>  	ARM_MALI_LPAE,
> +	ARM_ADRENO_GPU_LPAE,
>  	IO_PGTABLE_NUM_FMTS,
>  };
>  
> @@ -213,5 +214,6 @@ extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns;
>  extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns;
>  extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns;
>  extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns;
> +extern struct io_pgtable_init_fns io_pgtable_arm_adreno_gpu_lpae_init_fns;
>  
>  #endif /* __IO_PGTABLE_H */
> -- 
> 2.7.4
> 

-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [Linux for Sparc]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux