On Wed, Aug 07, 2019 at 04:21:39PM -0600, Jordan Crouse wrote: > Add a new sub-format ARM_ADRENO_GPU_LPAE to set up TTBR0 and TTBR1 for > use by the Adreno GPU. This will allow The GPU driver to map global > buffers in the TTBR1 and leave the TTBR0 configured but unset and > free to be changed dynamically by the GPU. It would take a bit of code rework and un-static-ifying a few functions but I'm wondering if it would be cleaner to add the Adreno GPU pagetable format in a new file, such as io-pgtable-adreno.c. Jordan > Signed-off-by: Jordan Crouse <jcrouse@xxxxxxxxxxxxxx> > --- > > drivers/iommu/io-pgtable-arm.c | 214 ++++++++++++++++++++++++++++++++++++++--- > drivers/iommu/io-pgtable.c | 1 + > include/linux/io-pgtable.h | 2 + > 3 files changed, 202 insertions(+), 15 deletions(-) > > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c > index 161a7d5..8eb0dbb 100644 > --- a/drivers/iommu/io-pgtable-arm.c > +++ b/drivers/iommu/io-pgtable-arm.c > @@ -112,13 +112,19 @@ > #define ARM_32_LPAE_TCR_EAE (1 << 31) > #define ARM_64_LPAE_S2_TCR_RES1 (1 << 31) > > +#define ARM_LPAE_TCR_EPD0 (1 << 7) > #define ARM_LPAE_TCR_EPD1 (1 << 23) > > #define ARM_LPAE_TCR_TG0_4K (0 << 14) > #define ARM_LPAE_TCR_TG0_64K (1 << 14) > #define ARM_LPAE_TCR_TG0_16K (2 << 14) > > +#define ARM_LPAE_TCR_TG1_4K (0 << 30) > +#define ARM_LPAE_TCR_TG1_64K (1 << 30) > +#define ARM_LPAE_TCR_TG1_16K (2 << 30) > + > #define ARM_LPAE_TCR_SH0_SHIFT 12 > +#define ARM_LPAE_TCR_SH1_SHIFT 28 > #define ARM_LPAE_TCR_SH0_MASK 0x3 > #define ARM_LPAE_TCR_SH_NS 0 > #define ARM_LPAE_TCR_SH_OS 2 > @@ -126,6 +132,8 @@ > > #define ARM_LPAE_TCR_ORGN0_SHIFT 10 > #define ARM_LPAE_TCR_IRGN0_SHIFT 8 > +#define ARM_LPAE_TCR_ORGN1_SHIFT 26 > +#define ARM_LPAE_TCR_IRGN1_SHIFT 24 > #define ARM_LPAE_TCR_RGN_MASK 0x3 > #define ARM_LPAE_TCR_RGN_NC 0 > #define ARM_LPAE_TCR_RGN_WBWA 1 > @@ -136,6 +144,7 @@ > #define ARM_LPAE_TCR_SL0_MASK 0x3 > > #define ARM_LPAE_TCR_T0SZ_SHIFT 0 > +#define ARM_LPAE_TCR_T1SZ_SHIFT 16 > #define ARM_LPAE_TCR_SZ_MASK 0xf > > #define ARM_LPAE_TCR_PS_SHIFT 16 > @@ -152,6 +161,14 @@ > #define ARM_LPAE_TCR_PS_48_BIT 0x5ULL > #define ARM_LPAE_TCR_PS_52_BIT 0x6ULL > > +#define ARM_LPAE_TCR_SEP_SHIFT 47 > +#define ARM_LPAE_TCR_SEP_31 (0x0ULL << ARM_LPAE_TCR_SEP_SHIFT) > +#define ARM_LPAE_TCR_SEP_35 (0x1ULL << ARM_LPAE_TCR_SEP_SHIFT) > +#define ARM_LPAE_TCR_SEP_39 (0x2ULL << ARM_LPAE_TCR_SEP_SHIFT) > +#define ARM_LPAE_TCR_SEP_41 (0x3ULL << ARM_LPAE_TCR_SEP_SHIFT) > +#define ARM_LPAE_TCR_SEP_43 (0x4ULL << ARM_LPAE_TCR_SEP_SHIFT) > +#define ARM_LPAE_TCR_SEP_UPSTREAM (0x7ULL << ARM_LPAE_TCR_SEP_SHIFT) > + > #define ARM_LPAE_MAIR_ATTR_SHIFT(n) ((n) << 3) > #define ARM_LPAE_MAIR_ATTR_MASK 0xff > #define ARM_LPAE_MAIR_ATTR_DEVICE 0x04 > @@ -426,7 +443,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, > arm_lpae_iopte pte; > > if (data->iop.fmt == ARM_64_LPAE_S1 || > - data->iop.fmt == ARM_32_LPAE_S1) { > + data->iop.fmt == ARM_32_LPAE_S1 || > + data->iop.fmt == ARM_ADRENO_GPU_LPAE) { > pte = ARM_LPAE_PTE_nG; > if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) > pte |= ARM_LPAE_PTE_AP_RDONLY; > @@ -497,6 +515,21 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, > return ret; > } > > +static int arm_adreno_gpu_lpae_map(struct io_pgtable_ops *ops, > + unsigned long iova, phys_addr_t paddr, size_t size, > + int iommu_prot) > +{ > + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); > + unsigned long mask = 1UL << data->iop.cfg.ias; > + > + /* This configuration expects all iova addresses to be in TTBR1 */ > + if (WARN_ON(iova & mask)) > + return -ERANGE; > + > + /* Mask off the sign extended bits and map as usual */ > + return arm_lpae_map(ops, iova & (mask - 1), paddr, size, iommu_prot); > +} > + > static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, > arm_lpae_iopte *ptep) > { > @@ -643,6 +676,22 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, > return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep); > } > > +static size_t arm_adreno_gpu_lpae_unmap(struct io_pgtable_ops *ops, > + unsigned long iova, size_t size) > +{ > + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); > + arm_lpae_iopte *ptep = data->pgd; > + int lvl = ARM_LPAE_START_LVL(data); > + unsigned long mask = 1UL << data->iop.cfg.ias; > + > + /* Make sure the sign extend bit is set in the iova */ > + if (WARN_ON(!(iova & mask))) > + return 0; > + > + /* Mask off the sign extended bits before unmapping */ > + return __arm_lpae_unmap(data, iova & (mask - 1), size, lvl, ptep); > +} > + > static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, > size_t size) > { > @@ -692,6 +741,17 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, > return iopte_to_paddr(pte, data) | iova; > } > > + > +static phys_addr_t arm_adreno_gpu_lpae_iova_to_phys(struct io_pgtable_ops *ops, > + unsigned long iova) > +{ > + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); > + unsigned long mask = 1UL << data->iop.cfg.ias; > + > + /* Mask off the sign extended bits before translating */ > + return arm_lpae_iova_to_phys(ops, iova & (mask - 1)); > +} > + > static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) > { > unsigned long granule, page_sizes; > @@ -771,17 +831,16 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) > pgd_bits = va_bits - (data->bits_per_level * (data->levels - 1)); > data->pgd_size = 1UL << (pgd_bits + ilog2(sizeof(arm_lpae_iopte))); > > - data->iop.ops = (struct io_pgtable_ops) { > - .map = arm_lpae_map, > - .unmap = arm_lpae_unmap, > - .iova_to_phys = arm_lpae_iova_to_phys, > - }; > > return data; > } > > -static struct io_pgtable * > -arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) > +/* > + * Common allocation function for S1 pagetables. Set up the TTBR0 region and > + * allocate a default pagetable > + */ > +static struct arm_lpae_io_pgtable * > +_arm_64_lpae_alloc_pgtable_s1_common(struct io_pgtable_cfg *cfg) > { > u64 reg; > struct arm_lpae_io_pgtable *data; > @@ -845,8 +904,6 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) > > reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT; > > - /* Disable speculative walks through TTBR1 */ > - reg |= ARM_LPAE_TCR_EPD1; > cfg->arm_lpae_s1_cfg.tcr = reg; > > /* MAIRs */ > @@ -870,16 +927,131 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) > /* Ensure the empty pgd is visible before any actual TTBR write */ > wmb(); > > - /* TTBRs */ > - cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd); > - cfg->arm_lpae_s1_cfg.ttbr[1] = 0; > - return &data->iop; > - > + return data; > out_free_data: > kfree(data); > return NULL; > } > > + > +static struct io_pgtable * > +arm_adreno_gpu_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) > +{ > + struct arm_lpae_io_pgtable *data; > + u64 reg; > + > + /* > + * Make sure the ias aligns with the available options for the sign > + * extension bit > + */ > + switch (cfg->ias) { > + case 32: > + case 36: > + case 40: > + case 42: > + case 44: > + /* > + * The SEP will be the highest available bit so adjust the data > + * size by one to accommodate it > + */ > + cfg->ias--; > + break; > + case 48: > + /* > + * IAS of 48 is a special case, it has a dedicated sign > + * extension bit so we can use the full IAS size > + */ > + break; > + default: > + /* The ias doesn't work for the available SEP options */ > + return NULL; > + } > + > + data = _arm_64_lpae_alloc_pgtable_s1_common(cfg); > + if (!data) > + return NULL; > + > + reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH1_SHIFT) | > + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN1_SHIFT) | > + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN1_SHIFT); > + > + switch (ARM_LPAE_GRANULE(data)) { > + case SZ_4K: > + reg |= ARM_LPAE_TCR_TG1_4K; > + break; > + case SZ_16K: > + reg |= ARM_LPAE_TCR_TG1_16K; > + break; > + case SZ_64K: > + reg |= ARM_LPAE_TCR_TG1_64K; > + break; > + } > + > + reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T1SZ_SHIFT; > + > + /* Set the sign extension bit */ > + switch (cfg->ias) { > + case 31: > + reg |= ARM_LPAE_TCR_SEP_31; > + break; > + case 35: > + reg |= ARM_LPAE_TCR_SEP_35; > + break; > + case 39: > + reg |= ARM_LPAE_TCR_SEP_39; > + break; > + case 41: > + reg |= ARM_LPAE_TCR_SEP_41; > + break; > + case 43: > + reg |= ARM_LPAE_TCR_SEP_43; > + break; > + case 48: > + reg |= ARM_LPAE_TCR_SEP_UPSTREAM; > + break; > + } > + > + cfg->arm_lpae_s1_cfg.tcr |= reg; > + > + /* Set the allocated pgd to ttbr1 and leave ttbr0 empty */ > + cfg->arm_lpae_s1_cfg.ttbr[0] = 0; > + cfg->arm_lpae_s1_cfg.ttbr[1] = virt_to_phys(data->pgd); > + > + /* Set use case specific pgtable helpers */ > + data->iop.ops = (struct io_pgtable_ops) { > + .map = arm_adreno_gpu_lpae_map, > + .unmap = arm_adreno_gpu_lpae_unmap, > + .iova_to_phys = arm_adreno_gpu_lpae_iova_to_phys, > + }; > + > + return &data->iop; > +} > + > +static struct io_pgtable * > +arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) > +{ > + struct arm_lpae_io_pgtable *data; > + > + data = _arm_64_lpae_alloc_pgtable_s1_common(cfg); > + if (!data) > + return NULL; > + > + /* Disable speculative walks through TTBR1 */ > + cfg->arm_lpae_s1_cfg.tcr |= ARM_LPAE_TCR_EPD1; > + > + /* Set the pgd to TTBR0 */ > + cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd); > + cfg->arm_lpae_s1_cfg.ttbr[1] = 0; > + > + data->iop.ops = (struct io_pgtable_ops) { > + .map = arm_lpae_map, > + .unmap = arm_lpae_unmap, > + .iova_to_phys = arm_lpae_iova_to_phys, > + }; > + > + return &data->iop; > +} > + > static struct io_pgtable * > arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) > { > @@ -894,6 +1066,12 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) > if (!data) > return NULL; > > + data->iop.ops = (struct io_pgtable_ops) { > + .map = arm_lpae_map, > + .unmap = arm_lpae_unmap, > + .iova_to_phys = arm_lpae_iova_to_phys, > + }; > + > /* > * Concatenate PGDs at level 1 if possible in order to reduce > * the depth of the stage-2 walk. > @@ -1041,6 +1219,11 @@ struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = { > .free = arm_lpae_free_pgtable, > }; > > +struct io_pgtable_init_fns io_pgtable_arm_adreno_gpu_lpae_init_fns = { > + .alloc = arm_adreno_gpu_lpae_alloc_pgtable, > + .free = arm_lpae_free_pgtable, > +}; > + > struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = { > .alloc = arm_64_lpae_alloc_pgtable_s2, > .free = arm_lpae_free_pgtable, > @@ -1112,6 +1295,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) > static const enum io_pgtable_fmt fmts[] = { > ARM_64_LPAE_S1, > ARM_64_LPAE_S2, > + ARM_64_LPAE_TTBR1_S1, > }; > > int i, j; > diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c > index ced53e5..e47ed2d 100644 > --- a/drivers/iommu/io-pgtable.c > +++ b/drivers/iommu/io-pgtable.c > @@ -20,6 +20,7 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { > [ARM_64_LPAE_S1] = &io_pgtable_arm_64_lpae_s1_init_fns, > [ARM_64_LPAE_S2] = &io_pgtable_arm_64_lpae_s2_init_fns, > [ARM_MALI_LPAE] = &io_pgtable_arm_mali_lpae_init_fns, > + [ARM_ADRENO_GPU_LPAE] = &io_pgtable_arm_adreno_gpu_lpae_init_fns, > #endif > #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S > [ARM_V7S] = &io_pgtable_arm_v7s_init_fns, > diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h > index b5a450a..4871e85 100644 > --- a/include/linux/io-pgtable.h > +++ b/include/linux/io-pgtable.h > @@ -13,6 +13,7 @@ enum io_pgtable_fmt { > ARM_64_LPAE_S2, > ARM_V7S, > ARM_MALI_LPAE, > + ARM_ADRENO_GPU_LPAE, > IO_PGTABLE_NUM_FMTS, > }; > > @@ -213,5 +214,6 @@ extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns; > extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns; > extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns; > extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns; > +extern struct io_pgtable_init_fns io_pgtable_arm_adreno_gpu_lpae_init_fns; > > #endif /* __IO_PGTABLE_H */ > -- > 2.7.4 > -- The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project