On 08/08/2019 23:29, Rob Herring wrote: > Up until now, a single shared GPU address space was used. This is not > ideal as there's no protection between processes and doesn't work for > supporting the same GPU/CPU VA feature. Most importantly, this will > hopefully mitigate Alyssa's fear of WebGL, whatever that is. > > Most of the changes here are moving struct drm_mm and struct > panfrost_mmu objects from the per device struct to the per FD struct. > The critical function is panfrost_mmu_as_get() which handles allocating > and switching the h/w address spaces. > > Cc: Tomeu Vizoso <tomeu.vizoso@xxxxxxxxxxxxx> > Cc: David Airlie <airlied@xxxxxxxx> > Cc: Daniel Vetter <daniel@xxxxxxxx> > Cc: Robin Murphy <robin.murphy@xxxxxxx> > Cc: Steven Price <steven.price@xxxxxxx> > Cc: Alyssa Rosenzweig <alyssa.rosenzweig@xxxxxxxxxxxxx> > Signed-off-by: Rob Herring <robh@xxxxxxxxxx> > --- > This depends on madvise support (now in drm-misc) and the heap/no-exec > series (just the rework). Seeems to be working pretty well for me, but > more testing would be helpful. I've run multiple 'glmark2-es2-drm > --off-screen' instances and Gnome Shell. Running more than 8 clients (at > least for T860) will hit the address space switch code paths. > > Rob > > drivers/gpu/drm/panfrost/TODO | 4 - > drivers/gpu/drm/panfrost/panfrost_device.c | 2 + > drivers/gpu/drm/panfrost/panfrost_device.h | 24 ++- > drivers/gpu/drm/panfrost/panfrost_drv.c | 31 ++- > drivers/gpu/drm/panfrost/panfrost_gem.c | 15 +- > drivers/gpu/drm/panfrost/panfrost_gem.h | 3 + > drivers/gpu/drm/panfrost/panfrost_job.c | 12 +- > drivers/gpu/drm/panfrost/panfrost_mmu.c | 220 +++++++++++++++------ > drivers/gpu/drm/panfrost/panfrost_mmu.h | 8 + > 9 files changed, 239 insertions(+), 80 deletions(-) > > diff --git a/drivers/gpu/drm/panfrost/TODO b/drivers/gpu/drm/panfrost/TODO > index e7727b292355..536a0d4f8d29 100644 > --- a/drivers/gpu/drm/panfrost/TODO > +++ b/drivers/gpu/drm/panfrost/TODO > @@ -6,10 +6,6 @@ > - Bifrost specific feature and issue handling > - Coherent DMA support > > -- Per FD address space support. The h/w supports multiple addresses spaces. > - The hard part is handling when more address spaces are needed than what > - the h/w provides. > - > - Support userspace controlled GPU virtual addresses. Needed for Vulkan. (Tomeu) > > - Compute job support. So called 'compute only' jobs need to be plumbed up to > diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c > index 9814f4ccbd26..4da71bb56c20 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_device.c > +++ b/drivers/gpu/drm/panfrost/panfrost_device.c > @@ -123,8 +123,10 @@ int panfrost_device_init(struct panfrost_device *pfdev) > mutex_init(&pfdev->sched_lock); > mutex_init(&pfdev->reset_lock); > INIT_LIST_HEAD(&pfdev->scheduled_jobs); > + INIT_LIST_HEAD(&pfdev->as_lru_list); > > spin_lock_init(&pfdev->hwaccess_lock); > + spin_lock_init(&pfdev->as_lock); > > err = panfrost_clk_init(pfdev); > if (err) { > diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h > index 4e5641db9c7e..f503c566e99f 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_device.h > +++ b/drivers/gpu/drm/panfrost/panfrost_device.h > @@ -5,6 +5,8 @@ > #ifndef __PANFROST_DEVICE_H__ > #define __PANFROST_DEVICE_H__ > > +#include <linux/atomic.h> > +#include <linux/io-pgtable.h> > #include <linux/spinlock.h> > #include <drm/drm_device.h> > #include <drm/drm_mm.h> > @@ -63,9 +65,6 @@ struct panfrost_device { > > spinlock_t hwaccess_lock; > > - struct drm_mm mm; > - spinlock_t mm_lock; > - > void __iomem *iomem; > struct clk *clock; > struct clk *bus_clock; > @@ -74,7 +73,11 @@ struct panfrost_device { > > struct panfrost_features features; > > - struct panfrost_mmu *mmu; > + spinlock_t as_lock; > + unsigned long as_in_use_mask; > + unsigned long as_alloc_mask; > + struct list_head as_lru_list; > + > struct panfrost_job_slot *js; > > struct panfrost_job *jobs[NUM_JOB_SLOTS]; > @@ -98,10 +101,23 @@ struct panfrost_device { > } devfreq; > }; > > +struct panfrost_mmu { > + struct io_pgtable_cfg pgtbl_cfg; > + struct io_pgtable_ops *pgtbl_ops; > + struct mutex lock; > + int as; > + atomic_t as_count; > + struct list_head list; > +}; > + > struct panfrost_file_priv { > struct panfrost_device *pfdev; > > struct drm_sched_entity sched_entity[NUM_JOB_SLOTS]; > + > + struct panfrost_mmu mmu; > + struct drm_mm mm; > + spinlock_t mm_lock; > }; > > static inline struct panfrost_device *to_panfrost_device(struct drm_device *ddev) > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c > index a1352750984c..7c8aa1a8054f 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c > @@ -403,6 +403,7 @@ static void panfrost_drm_mm_color_adjust(const struct drm_mm_node *node, > static int > panfrost_open(struct drm_device *dev, struct drm_file *file) > { > + int ret; > struct panfrost_device *pfdev = dev->dev_private; > struct panfrost_file_priv *panfrost_priv; > > @@ -413,7 +414,28 @@ panfrost_open(struct drm_device *dev, struct drm_file *file) > panfrost_priv->pfdev = pfdev; > file->driver_priv = panfrost_priv; > > - return panfrost_job_open(panfrost_priv); > + spin_lock_init(&panfrost_priv->mm_lock); > + > + /* 4G enough for now. can be 48-bit */ > + panfrost_priv->mm.color_adjust = panfrost_drm_mm_color_adjust; > + drm_mm_init(&panfrost_priv->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT); > + > + ret = panfrost_mmu_pgtable_alloc(panfrost_priv); > + if (ret) > + goto err_pgtable; > + > + ret = panfrost_job_open(panfrost_priv); > + if (ret) > + goto err_job; > + > + return 0; > + > +err_job: > + panfrost_mmu_pgtable_free(panfrost_priv); > +err_pgtable: > + drm_mm_takedown(&panfrost_priv->mm); > + kfree(panfrost_priv); Looks like this kfree was missing before... :) > + return ret; > } > > static void > @@ -424,6 +446,8 @@ panfrost_postclose(struct drm_device *dev, struct drm_file *file) > panfrost_perfcnt_close(panfrost_priv); > panfrost_job_close(panfrost_priv); > > + panfrost_mmu_pgtable_free(panfrost_priv); > + drm_mm_takedown(&panfrost_priv->mm); > kfree(panfrost_priv); > } > > @@ -496,14 +520,9 @@ static int panfrost_probe(struct platform_device *pdev) > ddev->dev_private = pfdev; > pfdev->ddev = ddev; > > - spin_lock_init(&pfdev->mm_lock); > mutex_init(&pfdev->shrinker_lock); > INIT_LIST_HEAD(&pfdev->shrinker_list); > > - /* 4G enough for now. can be 48-bit */ > - drm_mm_init(&pfdev->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT); > - pfdev->mm.color_adjust = panfrost_drm_mm_color_adjust; > - > pm_runtime_use_autosuspend(pfdev->dev); > pm_runtime_set_autosuspend_delay(pfdev->dev, 50); /* ~3 frames */ > pm_runtime_enable(pfdev->dev); > diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c > index 37a3a6ed4617..93204b44ef90 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_gem.c > +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c > @@ -47,8 +47,8 @@ static int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_p > size_t size = obj->size; > u64 align; > struct panfrost_gem_object *bo = to_panfrost_bo(obj); > - struct panfrost_device *pfdev = obj->dev->dev_private; > unsigned long color = bo->noexec ? PANFROST_BO_NOEXEC : 0; > + struct panfrost_file_priv *priv = file_priv->driver_priv; > > /* > * Executable buffers cannot cross a 16MB boundary as the program > @@ -61,8 +61,9 @@ static int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_p > else > align = size >= SZ_2M ? SZ_2M >> PAGE_SHIFT : 0; > > - spin_lock(&pfdev->mm_lock); > - ret = drm_mm_insert_node_generic(&pfdev->mm, &bo->node, > + bo->mmu = &priv->mmu; > + spin_lock(&priv->mm_lock); > + ret = drm_mm_insert_node_generic(&priv->mm, &bo->node, > size >> PAGE_SHIFT, align, color, 0); > if (ret) > goto out; > @@ -73,22 +74,22 @@ static int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_p > drm_mm_remove_node(&bo->node); > } > out: > - spin_unlock(&pfdev->mm_lock); > + spin_unlock(&priv->mm_lock); > return ret; > } > > static void panfrost_gem_close(struct drm_gem_object *obj, struct drm_file *file_priv) > { > struct panfrost_gem_object *bo = to_panfrost_bo(obj); > - struct panfrost_device *pfdev = obj->dev->dev_private; > + struct panfrost_file_priv *priv = file_priv->driver_priv; > > if (bo->is_mapped) > panfrost_mmu_unmap(bo); > > - spin_lock(&pfdev->mm_lock); > + spin_lock(&priv->mm_lock); > if (drm_mm_node_allocated(&bo->node)) > drm_mm_remove_node(&bo->node); > - spin_unlock(&pfdev->mm_lock); > + spin_unlock(&priv->mm_lock); > } > > static int panfrost_gem_pin(struct drm_gem_object *obj) > diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h > index e10f58316915..50920819cc16 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_gem.h > +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h > @@ -7,10 +7,13 @@ > #include <drm/drm_gem_shmem_helper.h> > #include <drm/drm_mm.h> > > +struct panfrost_mmu; > + > struct panfrost_gem_object { > struct drm_gem_shmem_object base; > struct sg_table *sgts; > > + struct panfrost_mmu *mmu; > struct drm_mm_node node; > bool is_mapped :1; > bool noexec :1; > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c > index d567ce98494c..aea84610b8cd 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_job.c > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c > @@ -153,6 +153,8 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js) > if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) > goto end; > > + cfg = panfrost_mmu_as_get(pfdev, &job->file_priv->mmu); > + > panfrost_devfreq_record_transition(pfdev, js); > spin_lock_irqsave(&pfdev->hwaccess_lock, flags); > > @@ -163,8 +165,7 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js) > > /* start MMU, medium priority, cache clean/flush on end, clean/flush on > * start */ > - /* TODO: different address spaces */ > - cfg = JS_CONFIG_THREAD_PRI(8) | > + cfg |= JS_CONFIG_THREAD_PRI(8) | > JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE | > JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; > > @@ -280,6 +281,9 @@ static void panfrost_job_cleanup(struct kref *ref) > kvfree(job->bos); > } > > + for (i = 0; i < NUM_JOB_SLOTS; i++) > + if (job == job->pfdev->jobs[i]) > + job->pfdev->jobs[i] = NULL; I'm a bit worried about this. As far as I can see previously the jobs array would just contain dangling pointers - which admittedly wasn't ideal. However do we have sufficient locks here to ensure that this could be racing with a panfrost_job_run() on the same slot? I'm worried that between the check and the assignment to NULL it is possible (admittedly very unlikely) for panfrost_job_run() to assign a new pointer which would then be clobbered. > kfree(job); > } > > @@ -377,8 +381,9 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job) > if (dma_fence_is_signaled(job->done_fence)) > return; > > - dev_err(pfdev->dev, "gpu sched timeout, js=%d, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p", > + dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p", > js, > + job_read(pfdev, JS_CONFIG(js)), > job_read(pfdev, JS_STATUS(js)), > job_read(pfdev, JS_HEAD_LO(js)), > job_read(pfdev, JS_TAIL_LO(js)), > @@ -448,6 +453,7 @@ static irqreturn_t panfrost_job_irq_handler(int irq, void *data) > } > > if (status & JOB_INT_MASK_DONE(j)) { > + panfrost_mmu_as_put(pfdev, &pfdev->jobs[j]->file_priv->mmu); > panfrost_devfreq_record_transition(pfdev, j); > dma_fence_signal(pfdev->jobs[j]->done_fence); > } > diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c > index 2ed411f09d80..f8da7557d1be 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c > +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c > @@ -1,5 +1,6 @@ > // SPDX-License-Identifier: GPL-2.0 > /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@xxxxxxxxxx> */ > +#include <linux/atomic.h> > #include <linux/bitfield.h> > #include <linux/delay.h> > #include <linux/dma-mapping.h> > @@ -22,12 +23,6 @@ > #define mmu_write(dev, reg, data) writel(data, dev->iomem + reg) > #define mmu_read(dev, reg) readl(dev->iomem + reg) > > -struct panfrost_mmu { > - struct io_pgtable_cfg pgtbl_cfg; > - struct io_pgtable_ops *pgtbl_ops; > - struct mutex lock; > -}; > - > static int wait_ready(struct panfrost_device *pfdev, u32 as_nr) > { > int ret; > @@ -91,6 +86,9 @@ static int mmu_hw_do_operation(struct panfrost_device *pfdev, u32 as_nr, > unsigned long flags; > int ret; > > + if (as_nr < 0) > + return 0; > + Do we need any synchronisation with panfrost_mmu_as_get() for this to be reliable? As far as I can see this check can race with the assignment of an address space. Perhaps panfrost_mmu_as_get() (or maybe panfrost_mmu_enable) should hold mmu->lock? > spin_lock_irqsave(&pfdev->hwaccess_lock, flags); > > if (op != AS_COMMAND_UNLOCK) > @@ -107,9 +105,10 @@ static int mmu_hw_do_operation(struct panfrost_device *pfdev, u32 as_nr, > return ret; > } > > -static void panfrost_mmu_enable(struct panfrost_device *pfdev, u32 as_nr) > +static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_mmu *mmu) > { > - struct io_pgtable_cfg *cfg = &pfdev->mmu->pgtbl_cfg; > + int as_nr = mmu->as; > + struct io_pgtable_cfg *cfg = &mmu->pgtbl_cfg; > u64 transtab = cfg->arm_mali_lpae_cfg.transtab; > u64 memattr = cfg->arm_mali_lpae_cfg.memattr; > > @@ -136,9 +135,87 @@ static void mmu_disable(struct panfrost_device *pfdev, u32 as_nr) > write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE); > } > > +u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu) > +{ > + int as = mmu->as; > + > + spin_lock(&pfdev->as_lock); > + > + as = mmu->as; > + if (as >= 0) { > + int en = atomic_fetch_inc(&mmu->as_count); > + WARN_ON(en > 2); > + > + list_move(&mmu->list, &pfdev->as_lru_list); > + spin_unlock(&pfdev->as_lock); > + if (!en) > + panfrost_mmu_enable(pfdev, mmu); I don't see why we need to re-enable here - if (as >= 0) then the AS should be present on the GPU and mmu_hw_do_operation() will have been doing any necessary MMU commands. > + return as; > + } > + > + /* Check for a free AS */ > + as = ffz(pfdev->as_alloc_mask); > + if (!(BIT(as) & pfdev->features.as_present)) { > + struct panfrost_mmu *lru_mmu; > + > + list_for_each_entry_reverse(lru_mmu, &pfdev->as_lru_list, list) { > + if (!atomic_read(&lru_mmu->as_count)) > + break; > + } > + WARN_ON(!lru_mmu); > + > + list_del_init(&lru_mmu->list); > + as = lru_mmu->as; > + > + WARN_ON(as < 0); > + lru_mmu->as = -1; This should be holding a lock to synchronise with the mmu_hw_do_operation(). > + } > + > + /* Assign the free or reclaimed AS to the */ > + mmu->as = as; > + set_bit(as, &pfdev->as_alloc_mask); > + atomic_set(&mmu->as_count, 1); > + list_add(&mmu->list, &pfdev->as_lru_list); > + > + dev_dbg(pfdev->dev, "Assigned AS%d to mmu %p, alloc_mask=%lx", as, mmu, pfdev->as_alloc_mask); > + > + spin_unlock(&pfdev->as_lock); > + > + panfrost_mmu_enable(pfdev, mmu); > + return as; > +} > + > +void panfrost_mmu_as_put(struct panfrost_device *pfdev, struct panfrost_mmu *mmu) > +{ > + atomic_dec(&mmu->as_count); > + WARN_ON(atomic_read(&mmu->as_count) < 0); > +} > + > +static void panfrost_mmu_as_reset(struct panfrost_device *pfdev, struct panfrost_mmu *mmu) > +{ > + spin_lock(&pfdev->as_lock); > + > + atomic_set(&mmu->as_count, 0); > + if (mmu->as >= 0) { > + clear_bit(mmu->as, &pfdev->as_alloc_mask); > + list_del_init(&mmu->list); > + mmu->as = -1; > + } > + > + spin_unlock(&pfdev->as_lock); > +} > + > void panfrost_mmu_reset(struct panfrost_device *pfdev) > { > - panfrost_mmu_enable(pfdev, 0); > + struct drm_file *file; > + > + mutex_lock(&pfdev->ddev->filelist_mutex); > + list_for_each_entry(file, &pfdev->ddev->filelist, lhead) { > + struct panfrost_file_priv *priv = file->driver_priv; > + > + panfrost_mmu_as_reset(pfdev, &priv->mmu); > + } > + mutex_unlock(&pfdev->ddev->filelist_mutex); > > mmu_write(pfdev, MMU_INT_CLEAR, ~0); > mmu_write(pfdev, MMU_INT_MASK, ~0); > @@ -152,21 +229,21 @@ static size_t get_pgsize(u64 addr, size_t size) > return SZ_2M; > } > > -static int mmu_map_sg(struct panfrost_device *pfdev, u64 iova, > - int prot, struct sg_table *sgt) > +static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu, > + u64 iova, int prot, struct sg_table *sgt) > { > unsigned int count; > struct scatterlist *sgl; > - struct io_pgtable_ops *ops = pfdev->mmu->pgtbl_ops; > + struct io_pgtable_ops *ops = mmu->pgtbl_ops; > u64 start_iova = iova; > > - mutex_lock(&pfdev->mmu->lock); > + mutex_lock(&mmu->lock); > > for_each_sg(sgt->sgl, sgl, sgt->nents, count) { > unsigned long paddr = sg_dma_address(sgl); > size_t len = sg_dma_len(sgl); > > - dev_dbg(pfdev->dev, "map: iova=%llx, paddr=%lx, len=%zx", iova, paddr, len); > + dev_dbg(pfdev->dev, "map: as=%d, iova=%llx, paddr=%lx, len=%zx", mmu->as, iova, paddr, len); > > while (len) { > size_t pgsize = get_pgsize(iova | paddr, len); > @@ -178,10 +255,10 @@ static int mmu_map_sg(struct panfrost_device *pfdev, u64 iova, > } > } > > - mmu_hw_do_operation(pfdev, 0, start_iova, iova - start_iova, > + mmu_hw_do_operation(pfdev, mmu->as, start_iova, iova - start_iova, > AS_COMMAND_FLUSH_PT); > > - mutex_unlock(&pfdev->mmu->lock); > + mutex_unlock(&mmu->lock); > > return 0; > } > @@ -208,7 +285,7 @@ int panfrost_mmu_map(struct panfrost_gem_object *bo) > if (ret < 0) > return ret; > > - mmu_map_sg(pfdev, bo->node.start << PAGE_SHIFT, prot, sgt); > + mmu_map_sg(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT, prot, sgt); > > pm_runtime_mark_last_busy(pfdev->dev); > pm_runtime_put_autosuspend(pfdev->dev); > @@ -221,7 +298,7 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo) > { > struct drm_gem_object *obj = &bo->base.base; > struct panfrost_device *pfdev = to_panfrost_device(obj->dev); > - struct io_pgtable_ops *ops = pfdev->mmu->pgtbl_ops; > + struct io_pgtable_ops *ops = bo->mmu->pgtbl_ops; > u64 iova = bo->node.start << PAGE_SHIFT; > size_t len = bo->node.size << PAGE_SHIFT; > size_t unmapped_len = 0; > @@ -230,13 +307,13 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo) > if (WARN_ON(!bo->is_mapped)) > return; > > - dev_dbg(pfdev->dev, "unmap: iova=%llx, len=%zx", iova, len); > + dev_dbg(pfdev->dev, "unmap: as=%d, iova=%llx, len=%zx", bo->mmu->as, iova, len); > > ret = pm_runtime_get_sync(pfdev->dev); > if (ret < 0) > return; > > - mutex_lock(&pfdev->mmu->lock); > + mutex_lock(&bo->mmu->lock); > > while (unmapped_len < len) { > size_t unmapped_page; > @@ -250,10 +327,10 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo) > unmapped_len += pgsize; > } > > - mmu_hw_do_operation(pfdev, 0, bo->node.start << PAGE_SHIFT, > + mmu_hw_do_operation(pfdev, bo->mmu->as, bo->node.start << PAGE_SHIFT, > bo->node.size << PAGE_SHIFT, AS_COMMAND_FLUSH_PT); > > - mutex_unlock(&pfdev->mmu->lock); > + mutex_unlock(&bo->mmu->lock); > > pm_runtime_mark_last_busy(pfdev->dev); > pm_runtime_put_autosuspend(pfdev->dev); > @@ -262,9 +339,10 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo) > > static void mmu_tlb_inv_context_s1(void *cookie) > { > - struct panfrost_device *pfdev = cookie; > + struct panfrost_file_priv *priv = cookie; > > - mmu_hw_do_operation(pfdev, 0, 0, ~0UL, AS_COMMAND_FLUSH_MEM); > + /* Only flush if we have an assigned AS */ I'm not sure if this comment makes sense here - seems like it belongs in mmu_hw_do_operation() > + mmu_hw_do_operation(priv->pfdev, priv->mmu.as, 0, ~0UL, AS_COMMAND_FLUSH_MEM); > } > > static void mmu_tlb_inv_range_nosync(unsigned long iova, size_t size, > @@ -283,16 +361,69 @@ static const struct iommu_gather_ops mmu_tlb_ops = { > .tlb_sync = mmu_tlb_sync_context, > }; > > +int panfrost_mmu_pgtable_alloc(struct panfrost_file_priv *priv) > +{ > + struct panfrost_mmu *mmu = &priv->mmu; > + struct panfrost_device *pfdev = priv->pfdev; > + > + mutex_init(&mmu->lock); > + INIT_LIST_HEAD(&mmu->list); > + mmu->as = -1; > + > + mmu->pgtbl_cfg = (struct io_pgtable_cfg) { > + .pgsize_bitmap = SZ_4K | SZ_2M, > + .ias = FIELD_GET(0xff, pfdev->features.mmu_features), > + .oas = FIELD_GET(0xff00, pfdev->features.mmu_features), > + .tlb = &mmu_tlb_ops, > + .iommu_dev = pfdev->dev, > + }; > + > + mmu->pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &mmu->pgtbl_cfg, > + priv); > + if (!mmu->pgtbl_ops) > + return -EINVAL; > + > + return 0; > +} > + > +void panfrost_mmu_pgtable_free(struct panfrost_file_priv *priv) > +{ > + struct panfrost_device *pfdev = priv->pfdev; > + struct panfrost_mmu *mmu = &priv->mmu; > + > + spin_lock(&pfdev->as_lock); > + if (mmu->as >= 0) { > + clear_bit(mmu->as, &pfdev->as_alloc_mask); > + clear_bit(mmu->as, &pfdev->as_in_use_mask); > + list_del(&mmu->list); > + } > + spin_unlock(&pfdev->as_lock); > + > + free_io_pgtable_ops(mmu->pgtbl_ops); > +} > + > static struct drm_mm_node *addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr) > { > - struct drm_mm_node *node; > + struct drm_mm_node *node = NULL; > u64 offset = addr >> PAGE_SHIFT; > + struct drm_file *file; > > - drm_mm_for_each_node(node, &pfdev->mm) { > - if (offset >= node->start && offset < (node->start + node->size)) > - return node; > + mutex_lock(&pfdev->ddev->filelist_mutex); > + list_for_each_entry(file, &pfdev->ddev->filelist, lhead) { You could walk as_lru_list rather than every file as you know you are looking for a fd which is assigned an address space. > + struct panfrost_file_priv *priv = file->driver_priv; > + > + if (as != priv->mmu.as) > + continue; > + > + drm_mm_for_each_node(node, &priv->mm) { > + if (offset >= node->start && offset < (node->start + node->size)) > + goto out; > + } > } > - return NULL; > + > +out: > + mutex_unlock(&pfdev->ddev->filelist_mutex); > + return node; > } > > #define NUM_FAULT_PAGES (SZ_2M / PAGE_SIZE) > @@ -317,6 +448,8 @@ int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr) > node->start << PAGE_SHIFT); > return -EINVAL; > } > + WARN_ON(bo->mmu->as == -1); You could strengthen this to WARN_ON(bo->mmu->as != as) > + > /* Assume 2MB alignment and size multiple */ > addr &= ~((u64)SZ_2M - 1); > page_offset = addr >> PAGE_SHIFT; > @@ -367,11 +500,11 @@ int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr) > goto err_map; > } > > - mmu_map_sg(pfdev, addr, IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt); > + mmu_map_sg(pfdev, bo->mmu, addr, IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt); > > bo->is_mapped = true; > > - dev_dbg(pfdev->dev, "mapped page fault @ %llx", addr); > + dev_dbg(pfdev->dev, "mapped page fault @ AS%d %llx", bo->mmu->as, addr); Assuming the above WARN_ON strengthing then: s/bo->mmu->as/as/ Steve > > return 0; > > @@ -480,15 +613,8 @@ static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data) > > int panfrost_mmu_init(struct panfrost_device *pfdev) > { > - struct io_pgtable_ops *pgtbl_ops; > int err, irq; > > - pfdev->mmu = devm_kzalloc(pfdev->dev, sizeof(*pfdev->mmu), GFP_KERNEL); > - if (!pfdev->mmu) > - return -ENOMEM; > - > - mutex_init(&pfdev->mmu->lock); > - > irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "mmu"); > if (irq <= 0) > return -ENODEV; > @@ -501,22 +627,6 @@ int panfrost_mmu_init(struct panfrost_device *pfdev) > dev_err(pfdev->dev, "failed to request mmu irq"); > return err; > } > - pfdev->mmu->pgtbl_cfg = (struct io_pgtable_cfg) { > - .pgsize_bitmap = SZ_4K | SZ_2M, > - .ias = FIELD_GET(0xff, pfdev->features.mmu_features), > - .oas = FIELD_GET(0xff00, pfdev->features.mmu_features), > - .tlb = &mmu_tlb_ops, > - .iommu_dev = pfdev->dev, > - }; > - > - pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &pfdev->mmu->pgtbl_cfg, > - pfdev); > - if (!pgtbl_ops) > - return -ENOMEM; > - > - pfdev->mmu->pgtbl_ops = pgtbl_ops; > - > - panfrost_mmu_enable(pfdev, 0); > > return 0; > } > @@ -525,6 +635,4 @@ void panfrost_mmu_fini(struct panfrost_device *pfdev) > { > mmu_write(pfdev, MMU_INT_MASK, 0); > mmu_disable(pfdev, 0); > - > - free_io_pgtable_ops(pfdev->mmu->pgtbl_ops); > } > diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.h b/drivers/gpu/drm/panfrost/panfrost_mmu.h > index d5f9b24537db..7c5b6775ae23 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_mmu.h > +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.h > @@ -5,6 +5,8 @@ > #define __PANFROST_MMU_H__ > > struct panfrost_gem_object; > +struct panfrost_file_priv; > +struct panfrost_mmu; > > int panfrost_mmu_map(struct panfrost_gem_object *bo); > void panfrost_mmu_unmap(struct panfrost_gem_object *bo); > @@ -13,4 +15,10 @@ int panfrost_mmu_init(struct panfrost_device *pfdev); > void panfrost_mmu_fini(struct panfrost_device *pfdev); > void panfrost_mmu_reset(struct panfrost_device *pfdev); > > +u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu); > +void panfrost_mmu_as_put(struct panfrost_device *pfdev, struct panfrost_mmu *mmu); > + > +int panfrost_mmu_pgtable_alloc(struct panfrost_file_priv *priv); > +void panfrost_mmu_pgtable_free(struct panfrost_file_priv *priv); > + > #endif > _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel