On Wed, 3 Sep 2014 08:01:55 +0100 Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote: > On Tue, Sep 02, 2014 at 02:32:41PM -0700, Jesse Barnes wrote: > > Use a new reloc type to allow userspace to insert sync points within > > batches before they're submitted. The corresponding fence fds are > > returned in the offset field of the returned reloc tree, and can be > > operated on with the sync fence APIs. > > > > Signed-off-by: Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx> > > --- > > drivers/gpu/drm/i915/i915_drv.h | 4 + > > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 125 ++++++++++++++++++++++++----- > > drivers/gpu/drm/i915/i915_sync.c | 58 ++++++++++--- > > include/uapi/drm/i915_drm.h | 11 ++- > > 4 files changed, 167 insertions(+), 31 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > > index 6eb119e..410eedf 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.h > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > @@ -2284,6 +2284,10 @@ int i915_sync_init(struct drm_i915_private *dev_priv); > > void i915_sync_fini(struct drm_i915_private *dev_priv); > > int i915_sync_create_fence_ioctl(struct drm_device *dev, void *data, > > struct drm_file *file); > > +int i915_sync_fence_create(struct intel_engine_cs *ring, > > + struct intel_context *ctx, > > + u32 seqno); > > + > > > > #define PIN_MAPPABLE 0x1 > > #define PIN_NONBLOCK 0x2 > > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > > index 60998fc..32ec599 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > > @@ -32,6 +32,7 @@ > > #include "i915_trace.h" > > #include "intel_drv.h" > > #include <linux/dma_remapping.h> > > +#include "../../../staging/android/sync.h" > > > > #define __EXEC_OBJECT_HAS_PIN (1<<31) > > #define __EXEC_OBJECT_HAS_FENCE (1<<30) > > @@ -262,6 +263,67 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) > > !obj->map_and_fenceable || > > obj->cache_level != I915_CACHE_NONE); > > } > > +static int > > +emit_sync_obj_cpu(struct drm_i915_gem_object *obj, > > + struct drm_i915_gem_relocation_entry *reloc) > > +{ > > + uint32_t page_offset = offset_in_page(reloc->offset); > > + char *vaddr; > > + int ret; > > + > > + ret = i915_gem_object_set_to_cpu_domain(obj, true); > > + if (ret) > > + return ret; > > + > > + vaddr = kmap_atomic(i915_gem_object_get_page(obj, > > + reloc->offset >> PAGE_SHIFT)); > > + *(uint32_t *)(vaddr + page_offset) = MI_STORE_DWORD_INDEX; > > + *(uint32_t *)(vaddr + page_offset + 4) = > > + I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; > > + *(uint32_t *)(vaddr + page_offset + 8) = > > + obj->ring->outstanding_lazy_seqno; > > + *(uint32_t *)(vaddr + page_offset + 12) = MI_USER_INTERRUPT; > > + > > + kunmap_atomic(vaddr); > > + > > + return 0; > > +} > > + > > +static int > > +emit_sync_obj_gtt(struct drm_i915_gem_object *obj, > > + struct drm_i915_gem_relocation_entry *reloc) > > +{ > > + struct drm_device *dev = obj->base.dev; > > + struct drm_i915_private *dev_priv = dev->dev_private; > > + uint32_t __iomem *reloc_entry; > > + void __iomem *reloc_page; > > + int ret; > > + > > + ret = i915_gem_object_set_to_gtt_domain(obj, true); > > + if (ret) > > + return ret; > > + > > + ret = i915_gem_object_put_fence(obj); > > + if (ret) > > + return ret; > > + > > + /* Map the page containing the relocation we're going to perform. */ > > + reloc->offset += i915_gem_obj_ggtt_offset(obj); > > + reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, > > + reloc->offset & PAGE_MASK); > > + > > + reloc_entry = (uint32_t __iomem *) > > + (reloc_page + offset_in_page(reloc->offset)); > > + iowrite32(MI_STORE_DWORD_INDEX, reloc_entry); > > + iowrite32(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT, > > + reloc_entry); > > + iowrite32(obj->ring->outstanding_lazy_seqno, reloc_entry); > > + iowrite32(MI_USER_INTERRUPT, reloc_entry); > > + > > + io_mapping_unmap_atomic(reloc_page); > > These commands are illegal/invalid inside the object, only valid inside > the ring. Hm, we ought to be able to write to no privileged space with STORE_DWORD, but that does mean moving to context specific pages in process space, or at least adding them to our existing scheme. I haven't tried MI_USER_INTERRUPT from a batch, if we can't do it from a non-privileged batch that nixes one of the other neat features we could have (fine grained intra-batch userspace synchronization). > > + return 0; > > +} > > > > static int > > relocate_entry_cpu(struct drm_i915_gem_object *obj, > > @@ -349,7 +411,8 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj, > > static int > > i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, > > struct eb_vmas *eb, > > - struct drm_i915_gem_relocation_entry *reloc) > > + struct drm_i915_gem_relocation_entry *reloc, > > + struct intel_context *ctx) > > Hmm. That's a nuisance. But no, you only use it to automatically create > a fence not to patch the batch, so you can just use an object-flag. > > This fits neatly into requests. Most definitely. What do you think of the potential upside in the DDX for this, assuming we get dword writes from batches working? -- Jesse Barnes, Intel Open Source Technology Center _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx