On 08/08/2019 23:21, Rob Herring wrote: > The midgard/bifrost GPUs need to allocate GPU heap memory which is > allocated on GPU page faults and not pinned in memory. The vendor driver > calls this functionality GROW_ON_GPF. > > This implementation assumes that BOs allocated with the > PANFROST_BO_NOEXEC flag are never mmapped or exported. Both of those may > actually work, but I'm unsure if there's some interaction there. It > would cause the whole object to be pinned in memory which would defeat > the point of this. > > On faults, we map in 2MB at a time in order to utilize huge pages (if > enabled). Currently, once we've mapped pages in, they are only unmapped > if the BO is freed. Once we add shrinker support, we can unmap pages > with the shrinker. > > Cc: Tomeu Vizoso <tomeu.vizoso@xxxxxxxxxxxxx> > Cc: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx> > Cc: Robin Murphy <robin.murphy@xxxxxxx> > Cc: Steven Price <steven.price@xxxxxxx> > Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@xxxxxxxxxxxxx> > Signed-off-by: Rob Herring <robh@xxxxxxxxxx> Reviewed-by: Steven Price <steven.price@xxxxxxx> Although a couple of comments below... > --- > drivers/gpu/drm/panfrost/TODO | 2 - > drivers/gpu/drm/panfrost/panfrost_drv.c | 11 +- > drivers/gpu/drm/panfrost/panfrost_gem.c | 43 +++++++- > drivers/gpu/drm/panfrost/panfrost_gem.h | 8 ++ > drivers/gpu/drm/panfrost/panfrost_mmu.c | 129 ++++++++++++++++++++++-- > include/uapi/drm/panfrost_drm.h | 1 + > 6 files changed, 177 insertions(+), 17 deletions(-) > > diff --git a/drivers/gpu/drm/panfrost/TODO b/drivers/gpu/drm/panfrost/TODO > index d4c7fb7e7d13..e7727b292355 100644 > --- a/drivers/gpu/drm/panfrost/TODO > +++ b/drivers/gpu/drm/panfrost/TODO > @@ -10,8 +10,6 @@ > The hard part is handling when more address spaces are needed than what > the h/w provides. > > -- Support pinning pages on demand (GPU page faults). > - > - Support userspace controlled GPU virtual addresses. Needed for Vulkan. (Tomeu) > > - Compute job support. So called 'compute only' jobs need to be plumbed up to > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c > index f070f2dd9f84..994dbc37e4d0 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c > @@ -82,7 +82,12 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, > struct drm_panfrost_create_bo *args = data; > > if (!args->size || args->pad || > - (args->flags & ~PANFROST_BO_NOEXEC)) > + (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP))) > + return -EINVAL; > + > + /* Heaps should never be executable */ > + if ((args->flags & PANFROST_BO_HEAP) && > + !(args->flags & PANFROST_BO_NOEXEC)) > return -EINVAL; > > bo = panfrost_gem_create_with_handle(file, dev, args->size, args->flags, > @@ -297,6 +302,10 @@ static int panfrost_ioctl_mmap_bo(struct drm_device *dev, void *data, > return -ENOENT; > } > > + /* Don't allow mmapping of heap objects as pages are not pinned. */ > + if (to_panfrost_bo(gem_obj)->is_heap) > + return -EINVAL; > + > ret = drm_gem_create_mmap_offset(gem_obj); > if (ret == 0) > args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); > diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c > index f398109b8e0c..37a3a6ed4617 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_gem.c > +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c > @@ -16,6 +16,23 @@ > */ > static void panfrost_gem_free_object(struct drm_gem_object *obj) > { > + struct panfrost_gem_object *bo = to_panfrost_bo(obj); > + struct panfrost_device *pfdev = obj->dev->dev_private; This is where things start building again - these need moving to patch 3. > + > + if (bo->sgts) { > + int i; > + int n_sgt = bo->base.base.size / SZ_2M; > + > + for (i = 0; i < n_sgt; i++) { > + if (bo->sgts[i].sgl) { > + dma_unmap_sg(pfdev->dev, bo->sgts[i].sgl, > + bo->sgts[i].nents, DMA_BIDIRECTIONAL); > + sg_free_table(&bo->sgts[i]); > + } > + } > + kfree(bo->sgts); > + } > + > mutex_lock(&pfdev->shrinker_lock); > if (!list_empty(&bo->base.madv_list)) > list_del(&bo->base.madv_list); > @@ -50,10 +67,11 @@ static int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_p > if (ret) > goto out; > > - ret = panfrost_mmu_map(bo); > - if (ret) > - drm_mm_remove_node(&bo->node); > - > + if (!bo->is_heap) { > + ret = panfrost_mmu_map(bo); > + if (ret) > + drm_mm_remove_node(&bo->node); > + } > out: > spin_unlock(&pfdev->mm_lock); > return ret; > @@ -73,12 +91,20 @@ static void panfrost_gem_close(struct drm_gem_object *obj, struct drm_file *file > spin_unlock(&pfdev->mm_lock); > } > > +static int panfrost_gem_pin(struct drm_gem_object *obj) > +{ > + if (to_panfrost_bo(obj)->is_heap) > + return -EINVAL; > + > + return drm_gem_shmem_pin(obj); > +} > + > static const struct drm_gem_object_funcs panfrost_gem_funcs = { > .free = panfrost_gem_free_object, > .open = panfrost_gem_open, > .close = panfrost_gem_close, > .print_info = drm_gem_shmem_print_info, > - .pin = drm_gem_shmem_pin, > + .pin = panfrost_gem_pin, > .unpin = drm_gem_shmem_unpin, > .get_sg_table = drm_gem_shmem_get_sg_table, > .vmap = drm_gem_shmem_vmap, > @@ -117,12 +143,19 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv, > struct drm_gem_shmem_object *shmem; > struct panfrost_gem_object *bo; > > + /* Round up heap allocations to 2MB to keep fault handling simple */ > + if (flags & PANFROST_BO_HEAP) > + size = roundup(size, SZ_2M); > + else > + size = roundup(size, PAGE_SIZE); If I understand correctly this roundup to PAGE_SIZE is now redundant (although not harmful). Steve > + > shmem = drm_gem_shmem_create(dev, size); > if (IS_ERR(shmem)) > return ERR_CAST(shmem); > > bo = to_panfrost_bo(&shmem->base); > bo->noexec = !!(flags & PANFROST_BO_NOEXEC); > + bo->is_heap = !!(flags & PANFROST_BO_HEAP); > > /* > * Allocate an id of idr table where the obj is registered > diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h > index d4c7aa1790a7..e10f58316915 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_gem.h > +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h > @@ -9,10 +9,12 @@ > > struct panfrost_gem_object { > struct drm_gem_shmem_object base; > + struct sg_table *sgts; > > struct drm_mm_node node; > bool is_mapped :1; > bool noexec :1; > + bool is_heap :1; > }; > > static inline > @@ -21,6 +23,12 @@ struct panfrost_gem_object *to_panfrost_bo(struct drm_gem_object *obj) > return container_of(to_drm_gem_shmem_obj(obj), struct panfrost_gem_object, base); > } > > +static inline > +struct panfrost_gem_object *drm_mm_node_to_panfrost_bo(struct drm_mm_node *node) > +{ > + return container_of(node, struct panfrost_gem_object, node); > +} > + > struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t size); > > struct drm_gem_object * > diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c > index b609ee55a872..2ed411f09d80 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c > +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c > @@ -2,6 +2,7 @@ > /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@xxxxxxxxxx> */ > #include <linux/bitfield.h> > #include <linux/delay.h> > +#include <linux/dma-mapping.h> > #include <linux/interrupt.h> > #include <linux/io.h> > #include <linux/iopoll.h> > @@ -9,6 +10,7 @@ > #include <linux/iommu.h> > #include <linux/platform_device.h> > #include <linux/pm_runtime.h> > +#include <linux/shmem_fs.h> > #include <linux/sizes.h> > > #include "panfrost_device.h" > @@ -240,12 +242,12 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo) > size_t unmapped_page; > size_t pgsize = get_pgsize(iova, len - unmapped_len); > > - unmapped_page = ops->unmap(ops, iova, pgsize); > - if (!unmapped_page) > - break; > - > - iova += unmapped_page; > - unmapped_len += unmapped_page; > + if (ops->iova_to_phys(ops, iova)) { > + unmapped_page = ops->unmap(ops, iova, pgsize); > + WARN_ON(unmapped_page != pgsize); > + } > + iova += pgsize; > + unmapped_len += pgsize; > } > > mmu_hw_do_operation(pfdev, 0, bo->node.start << PAGE_SHIFT, > @@ -281,6 +283,105 @@ static const struct iommu_gather_ops mmu_tlb_ops = { > .tlb_sync = mmu_tlb_sync_context, > }; > > +static struct drm_mm_node *addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr) > +{ > + struct drm_mm_node *node; > + u64 offset = addr >> PAGE_SHIFT; > + > + drm_mm_for_each_node(node, &pfdev->mm) { > + if (offset >= node->start && offset < (node->start + node->size)) > + return node; > + } > + return NULL; > +} > + > +#define NUM_FAULT_PAGES (SZ_2M / PAGE_SIZE) > + > +int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr) > +{ > + int ret, i; > + struct drm_mm_node *node; > + struct panfrost_gem_object *bo; > + struct address_space *mapping; > + pgoff_t page_offset; > + struct sg_table *sgt; > + struct page **pages; > + > + node = addr_to_drm_mm_node(pfdev, as, addr); > + if (!node) > + return -ENOENT; > + > + bo = drm_mm_node_to_panfrost_bo(node); > + if (!bo->is_heap) { > + dev_WARN(pfdev->dev, "matching BO is not heap type (GPU VA = %llx)", > + node->start << PAGE_SHIFT); > + return -EINVAL; > + } > + /* Assume 2MB alignment and size multiple */ > + addr &= ~((u64)SZ_2M - 1); > + page_offset = addr >> PAGE_SHIFT; > + page_offset -= node->start; > + > + mutex_lock(&bo->base.pages_lock); > + > + if (!bo->base.pages) { > + bo->sgts = kvmalloc_array(bo->base.base.size / SZ_2M, > + sizeof(struct sg_table), GFP_KERNEL | __GFP_ZERO); > + if (!bo->sgts) > + return -ENOMEM; > + > + pages = kvmalloc_array(bo->base.base.size >> PAGE_SHIFT, > + sizeof(struct page *), GFP_KERNEL | __GFP_ZERO); > + if (!pages) { > + kfree(bo->sgts); > + bo->sgts = NULL; > + return -ENOMEM; > + } > + bo->base.pages = pages; > + bo->base.pages_use_count = 1; > + } else > + pages = bo->base.pages; > + > + mapping = bo->base.base.filp->f_mapping; > + mapping_set_unevictable(mapping); > + > + for (i = page_offset; i < page_offset + NUM_FAULT_PAGES; i++) { > + pages[i] = shmem_read_mapping_page(mapping, i); > + if (IS_ERR(pages[i])) { > + mutex_unlock(&bo->base.pages_lock); > + ret = PTR_ERR(pages[i]); > + goto err_pages; > + } > + } > + > + mutex_unlock(&bo->base.pages_lock); > + > + sgt = &bo->sgts[page_offset / (SZ_2M / PAGE_SIZE)]; > + ret = sg_alloc_table_from_pages(sgt, pages + page_offset, > + NUM_FAULT_PAGES, 0, SZ_2M, GFP_KERNEL); > + if (ret) > + goto err_pages; > + > + if (!dma_map_sg(pfdev->dev, sgt->sgl, sgt->nents, DMA_BIDIRECTIONAL)) { > + ret = -EINVAL; > + goto err_map; > + } > + > + mmu_map_sg(pfdev, addr, IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt); > + > + bo->is_mapped = true; > + > + dev_dbg(pfdev->dev, "mapped page fault @ %llx", addr); > + > + return 0; > + > +err_map: > + sg_free_table(sgt); > +err_pages: > + drm_gem_shmem_put_pages(&bo->base); > + return ret; > +} > + > static const char *access_type_name(struct panfrost_device *pfdev, > u32 fault_status) > { > @@ -317,9 +418,7 @@ static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data) > { > struct panfrost_device *pfdev = data; > u32 status = mmu_read(pfdev, MMU_INT_RAWSTAT); > - int i; > - > - dev_err(pfdev->dev, "mmu irq status=%x\n", status); > + int i, ret; > > for (i = 0; status; i++) { > u32 mask = BIT(i) | BIT(i + 16); > @@ -341,6 +440,18 @@ static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data) > access_type = (fault_status >> 8) & 0x3; > source_id = (fault_status >> 16); > > + /* Page fault only */ > + if ((status & mask) == BIT(i)) { > + WARN_ON(exception_type < 0xC1 || exception_type > 0xC4); > + > + ret = panfrost_mmu_map_fault_addr(pfdev, i, addr); > + if (!ret) { > + mmu_write(pfdev, MMU_INT_CLEAR, BIT(i)); > + status &= ~mask; > + continue; > + } > + } > + > /* terminal fault, print info about the fault */ > dev_err(pfdev->dev, > "Unhandled Page fault in AS%d at VA 0x%016llX\n" > diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h > index b80c20d17dec..ec19db1eead8 100644 > --- a/include/uapi/drm/panfrost_drm.h > +++ b/include/uapi/drm/panfrost_drm.h > @@ -85,6 +85,7 @@ struct drm_panfrost_wait_bo { > }; > > #define PANFROST_BO_NOEXEC 1 > +#define PANFROST_BO_HEAP 2 > > /** > * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs. > _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel