On Tue, Sep 02, 2014 at 02:32:41PM -0700, Jesse Barnes wrote: > Use a new reloc type to allow userspace to insert sync points within > batches before they're submitted. The corresponding fence fds are > returned in the offset field of the returned reloc tree, and can be > operated on with the sync fence APIs. > > Signed-off-by: Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 4 + > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 125 ++++++++++++++++++++++++----- > drivers/gpu/drm/i915/i915_sync.c | 58 ++++++++++--- > include/uapi/drm/i915_drm.h | 11 ++- > 4 files changed, 167 insertions(+), 31 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 6eb119e..410eedf 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2284,6 +2284,10 @@ int i915_sync_init(struct drm_i915_private *dev_priv); > void i915_sync_fini(struct drm_i915_private *dev_priv); > int i915_sync_create_fence_ioctl(struct drm_device *dev, void *data, > struct drm_file *file); > +int i915_sync_fence_create(struct intel_engine_cs *ring, > + struct intel_context *ctx, > + u32 seqno); > + > > #define PIN_MAPPABLE 0x1 > #define PIN_NONBLOCK 0x2 > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 60998fc..32ec599 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -32,6 +32,7 @@ > #include "i915_trace.h" > #include "intel_drv.h" > #include <linux/dma_remapping.h> > +#include "../../../staging/android/sync.h" > > #define __EXEC_OBJECT_HAS_PIN (1<<31) > #define __EXEC_OBJECT_HAS_FENCE (1<<30) > @@ -262,6 +263,67 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) > !obj->map_and_fenceable || > obj->cache_level != I915_CACHE_NONE); > } > +static int > +emit_sync_obj_cpu(struct drm_i915_gem_object *obj, > + struct drm_i915_gem_relocation_entry *reloc) > +{ > + uint32_t page_offset = offset_in_page(reloc->offset); > + char *vaddr; > + int ret; > + > + ret = i915_gem_object_set_to_cpu_domain(obj, true); > + if (ret) > + return ret; > + > + vaddr = kmap_atomic(i915_gem_object_get_page(obj, > + reloc->offset >> PAGE_SHIFT)); > + *(uint32_t *)(vaddr + page_offset) = MI_STORE_DWORD_INDEX; > + *(uint32_t *)(vaddr + page_offset + 4) = > + I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; > + *(uint32_t *)(vaddr + page_offset + 8) = > + obj->ring->outstanding_lazy_seqno; > + *(uint32_t *)(vaddr + page_offset + 12) = MI_USER_INTERRUPT; > + > + kunmap_atomic(vaddr); > + > + return 0; > +} > + > +static int > +emit_sync_obj_gtt(struct drm_i915_gem_object *obj, > + struct drm_i915_gem_relocation_entry *reloc) > +{ > + struct drm_device *dev = obj->base.dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + uint32_t __iomem *reloc_entry; > + void __iomem *reloc_page; > + int ret; > + > + ret = i915_gem_object_set_to_gtt_domain(obj, true); > + if (ret) > + return ret; > + > + ret = i915_gem_object_put_fence(obj); > + if (ret) > + return ret; > + > + /* Map the page containing the relocation we're going to perform. */ > + reloc->offset += i915_gem_obj_ggtt_offset(obj); > + reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, > + reloc->offset & PAGE_MASK); > + > + reloc_entry = (uint32_t __iomem *) > + (reloc_page + offset_in_page(reloc->offset)); > + iowrite32(MI_STORE_DWORD_INDEX, reloc_entry); > + iowrite32(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT, > + reloc_entry); > + iowrite32(obj->ring->outstanding_lazy_seqno, reloc_entry); > + iowrite32(MI_USER_INTERRUPT, reloc_entry); > + > + io_mapping_unmap_atomic(reloc_page); These commands are illegal/invalid inside the object, only valid inside the ring. > + return 0; > +} > > static int > relocate_entry_cpu(struct drm_i915_gem_object *obj, > @@ -349,7 +411,8 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj, > static int > i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, > struct eb_vmas *eb, > - struct drm_i915_gem_relocation_entry *reloc) > + struct drm_i915_gem_relocation_entry *reloc, > + struct intel_context *ctx) Hmm. That's a nuisance. But no, you only use it to automatically create a fence not to patch the batch, so you can just use an object-flag. This fits neatly into requests. -Chris -- Chris Wilson, Intel Open Source Technology Centre _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx