Hi Akash, On Mon, 2014-06-09 at 08:36 +0530, akash.goel@xxxxxxxxx wrote: > From: Akash Goel <akash.goel@xxxxxxxxx> > > This adds support for a write-enable bit in the entry of GTT. > This is handled via a read-only flag in the GEM buffer object which > is then used to see how to set the bit when writing the GTT entries. > Currently by default the Batch buffer & Ring buffers are marked as read only. > > v2: Moved the pte override code for read-only bit to 'byt_pte_encode'. (Chris) > Fixed the issue of leaving 'gt_old_ro' as unused. (Chris) > > v3: Removed the 'gt_old_ro' field, now setting RO bit only for Ring Buffers(Daniel). > > v4: Added a new 'flags' parameter to all the pte(gen6) encode & insert_entries functions, > in lieu of overloading the cache_level enum (Daniel). > > Signed-off-by: Akash Goel <akash.goel@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 6 +++++ > drivers/gpu/drm/i915/i915_gem_gtt.c | 48 ++++++++++++++++++++------------- > drivers/gpu/drm/i915/i915_gem_gtt.h | 5 ++-- > drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +++ > 4 files changed, 41 insertions(+), 21 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 506386e..dc18aee 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1658,6 +1658,12 @@ struct drm_i915_gem_object { > unsigned int pin_display:1; > > /* > + * Is the object to be mapped as read-only to the GPU > + * Only honoured if hardware has relevant pte bit > + */ > + unsigned long gt_ro:1; > + > + /* > * Is the GPU currently using a fence to access this buffer, > */ > unsigned int pending_fenced_gpu_access:1; > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 8b3cde7..221ea49 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -110,7 +110,7 @@ static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev, > > static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, > enum i915_cache_level level, > - bool valid) > + bool valid, u32 unused) > { > gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; > pte |= GEN6_PTE_ADDR_ENCODE(addr); > @@ -132,7 +132,7 @@ static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, > > static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, > enum i915_cache_level level, > - bool valid) > + bool valid, u32 unused) > { > gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; > pte |= GEN6_PTE_ADDR_ENCODE(addr); > @@ -156,7 +156,7 @@ static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, > > static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, > enum i915_cache_level level, > - bool valid) > + bool valid, u32 flags) > { > gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; > pte |= GEN6_PTE_ADDR_ENCODE(addr); > @@ -164,7 +164,8 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, > /* Mark the page as writeable. Other platforms don't have a > * setting for read-only/writable, so this matches that behavior. > */ > - pte |= BYT_PTE_WRITEABLE; > + if (!(flags & PTE_READ_ONLY)) > + pte |= BYT_PTE_WRITEABLE; > > if (level != I915_CACHE_NONE) > pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; > @@ -174,7 +175,7 @@ static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, > > static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, > enum i915_cache_level level, > - bool valid) > + bool valid, u32 unused) > { > gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; > pte |= HSW_PTE_ADDR_ENCODE(addr); > @@ -187,7 +188,7 @@ static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, > > static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, > enum i915_cache_level level, > - bool valid) > + bool valid, u32 unused) > { > gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; > pte |= HSW_PTE_ADDR_ENCODE(addr); > @@ -301,7 +302,7 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, > static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, > struct sg_table *pages, > uint64_t start, > - enum i915_cache_level cache_level) > + enum i915_cache_level cache_level, u32 unused) > { > struct i915_hw_ppgtt *ppgtt = > container_of(vm, struct i915_hw_ppgtt, base); > @@ -639,7 +640,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) > uint32_t pd_entry; > int pte, pde; > > - scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); > + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0); > > pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + > ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); > @@ -941,7 +942,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, > unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; > unsigned last_pte, i; > > - scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); > + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0); > > while (num_entries) { > last_pte = first_pte + num_entries; > @@ -964,7 +965,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, > static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, > struct sg_table *pages, > uint64_t start, > - enum i915_cache_level cache_level) > + enum i915_cache_level cache_level, u32 flags) > { > struct i915_hw_ppgtt *ppgtt = > container_of(vm, struct i915_hw_ppgtt, base); > @@ -981,7 +982,8 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, > > pt_vaddr[act_pte] = > vm->pte_encode(sg_page_iter_dma_address(&sg_iter), > - cache_level, true); > + cache_level, true, flags); > + > if (++act_pte == I915_PPGTT_PT_ENTRIES) { > kunmap_atomic(pt_vaddr); > pt_vaddr = NULL; > @@ -1218,8 +1220,12 @@ ppgtt_bind_vma(struct i915_vma *vma, > enum i915_cache_level cache_level, > u32 flags) > { > + if (IS_VALLEYVIEW(vma->vm->dev)) The above check is redundant, since with the vlv/byt specific pte_encode() we'll get already the right behavior. > + if (vma->obj->gt_ro) > + flags |= PTE_READ_ONLY; > + > vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, > - cache_level); > + cache_level, flags); > } > > static void ppgtt_unbind_vma(struct i915_vma *vma) > @@ -1394,7 +1400,7 @@ static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) > static void gen8_ggtt_insert_entries(struct i915_address_space *vm, > struct sg_table *st, > uint64_t start, > - enum i915_cache_level level) > + enum i915_cache_level level, u32 unused) > { > struct drm_i915_private *dev_priv = vm->dev->dev_private; > unsigned first_entry = start >> PAGE_SHIFT; > @@ -1440,7 +1446,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, > static void gen6_ggtt_insert_entries(struct i915_address_space *vm, > struct sg_table *st, > uint64_t start, > - enum i915_cache_level level) > + enum i915_cache_level level, u32 flags) > { > struct drm_i915_private *dev_priv = vm->dev->dev_private; > unsigned first_entry = start >> PAGE_SHIFT; > @@ -1452,7 +1458,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, > > for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { > addr = sg_page_iter_dma_address(&sg_iter); > - iowrite32(vm->pte_encode(addr, level, true), >t_entries[i]); > + iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]); > i++; > } > > @@ -1464,7 +1470,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, > */ > if (i != 0) > WARN_ON(readl(>t_entries[i-1]) != > - vm->pte_encode(addr, level, true)); > + vm->pte_encode(addr, level, true, flags)); > > /* This next bit makes the above posting read even more important. We > * want to flush the TLBs only after we're certain all the PTE updates > @@ -1518,7 +1524,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, > first_entry, num_entries, max_entries)) > num_entries = max_entries; > > - scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch); > + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0); > > for (i = 0; i < num_entries; i++) > iowrite32(scratch_pte, >t_base[i]); > @@ -1567,6 +1573,10 @@ static void ggtt_bind_vma(struct i915_vma *vma, > struct drm_i915_private *dev_priv = dev->dev_private; > struct drm_i915_gem_object *obj = vma->obj; > > + if (IS_VALLEYVIEW(vma->vm->dev)) The check is redundant as above. > + if (obj->gt_ro) > + flags |= PTE_READ_ONLY; > + > /* If there is no aliasing PPGTT, or the caller needs a global mapping, > * or we have a global mapping already but the cacheability flags have > * changed, set the global PTEs. > @@ -1583,7 +1593,7 @@ static void ggtt_bind_vma(struct i915_vma *vma, > (cache_level != obj->cache_level)) { > vma->vm->insert_entries(vma->vm, obj->pages, > vma->node.start, > - cache_level); > + cache_level, flags); > obj->has_global_gtt_mapping = 1; > } > } > @@ -1595,7 +1605,7 @@ static void ggtt_bind_vma(struct i915_vma *vma, > appgtt->base.insert_entries(&appgtt->base, > vma->obj->pages, > vma->node.start, > - cache_level); > + cache_level, flags); > vma->obj->has_aliasing_ppgtt_mapping = 1; > } > } > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index 1b96a06..674eaf5 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -197,15 +197,16 @@ struct i915_address_space { > /* FIXME: Need a more generic return type */ > gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr, > enum i915_cache_level level, > - bool valid); /* Create a valid PTE */ > + bool valid, u32 flags); /* Create a valid PTE */ > void (*clear_range)(struct i915_address_space *vm, > uint64_t start, > uint64_t length, > bool use_scratch); > +#define PTE_READ_ONLY (1<<2) As this uses the same namespace as GLOBAL_BIND, I would keep their definitions together. Also I couldn't find any other flag besides GLOBAL_BIND, in which case this could be (1 << 1). With the above fixed the patch looks good to me: Reviewed-by: Imre Deak <imre.deak@xxxxxxxxx> > void (*insert_entries)(struct i915_address_space *vm, > struct sg_table *st, > uint64_t start, > - enum i915_cache_level cache_level); > + enum i915_cache_level cache_level, u32 flags); > void (*cleanup)(struct i915_address_space *vm); > }; > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 279488a..ee99971 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -1397,6 +1397,9 @@ static int allocate_ring_buffer(struct intel_engine_cs *ring) > if (obj == NULL) > return -ENOMEM; > > + /* mark ring buffers as read-only from GPU side by default */ > + obj->gt_ro = 1; > + > ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE); > if (ret) > goto err_unref; _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx