From: Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx> We just need to pass in an address to execute and some flags, since we don't have to worry about buffer relocation or any of the other usual stuff. Returns a fence to be used for synchronization. v2: add a request after batch submission (Jesse) v3: add a flag for fence creation (Chris) v4: add CLOEXEC flag (Kristian) add non-RCS ring support (Jesse) v5: update for request alloc change (Jesse) v6: new sync file interface, error paths, request breadcrumbs v7: always CLOEXEC for sync_file_install v8: rebase on new sync file api v9: rework on top of fence requests and sync_file v10: take fence ref for sync_file (Chris) use correct flush (Chris) limit exec on rcs Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Signed-off-by: Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx> (v5) Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> --- drivers/gpu/drm/i915/Kconfig | 1 + drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 3 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 157 +++++++++++++++++++++++++++++ include/uapi/drm/i915_drm.h | 36 +++++++ 5 files changed, 198 insertions(+) diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 7769e469118f..6503133c3f85 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -8,6 +8,7 @@ config DRM_I915 # the shmem_readpage() which depends upon tmpfs select SHMEM select TMPFS + select SYNC_FILE select DRM_KMS_HELPER select DRM_PANEL select DRM_MIPI_DSI diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9fb6de90eac0..a07918d821e4 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2567,6 +2567,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM, i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM, i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE2, i915_gem_context_create2_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_EXEC_MM, intel_exec_mm_ioctl, DRM_UNLOCKED), }; static struct drm_driver driver = { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 64f3f0f18509..884d9844863c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3660,6 +3660,9 @@ static inline bool intel_init_svm(struct drm_device *dev) } #endif +extern int intel_exec_mm_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + /* overlay */ extern struct intel_overlay_error_state * intel_overlay_capture_error_state(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 699315304748..c1ba6da1fd33 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -29,6 +29,7 @@ #include <linux/dma_remapping.h> #include <linux/reservation.h> #include <linux/uaccess.h> +#include <linux/sync_file.h> #include <drm/drmP.h> #include <drm/i915_drm.h> @@ -1911,3 +1912,159 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, drm_free_large(exec2_list); return ret; } + +static struct intel_engine_cs * +exec_mm_select_engine(struct drm_i915_private *dev_priv, + struct drm_i915_exec_mm *exec_mm) +{ + unsigned int user_ring_id = exec_mm->ring_id & I915_EXEC_RING_MASK; + struct intel_engine_cs *e; + + if (user_ring_id > I915_USER_RINGS) { + DRM_DEBUG("exec_mm with unknown ring: %u\n", user_ring_id); + return NULL; + } + + e = &dev_priv->engine[user_ring_map[user_ring_id]]; + + if (!intel_engine_initialized(e)) { + DRM_DEBUG("exec_mm with invalid ring: %u\n", user_ring_id); + return NULL; + } + + return e; +} + +static int do_exec_mm(struct drm_i915_exec_mm *exec_mm, + struct drm_i915_gem_request *req, + const u32 flags) +{ + const bool create_fence = flags & I915_EXEC_MM_FENCE; + struct sync_file *out_fence; + int ret; + + if (create_fence) { + out_fence = sync_file_create(fence_get(&req->fence)); + if (!out_fence) { + DRM_DEBUG("sync file creation failed\n"); + return ret; + } + + exec_mm->fence = get_unused_fd_flags(O_CLOEXEC); + fd_install(exec_mm->fence, out_fence->file); + } + + ret = req->engine->emit_flush(req, EMIT_INVALIDATE); + if (ret) { + DRM_DEBUG_DRIVER("engine flush failed: %d\n", ret); + goto fput; + } + + ret = req->engine->emit_bb_start(req, exec_mm->batch_ptr, 0, 0); + if (ret) { + DRM_DEBUG_DRIVER("engine dispatch execbuf failed: %d\n", ret); + goto fput; + } + + return 0; +fput: + if (create_fence) { + fput(out_fence->file); + fence_put(&req->fence); + } + + return ret; +} + +int intel_exec_mm_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_exec_mm *exec_mm = data; + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + struct drm_i915_gem_request *req; + const u32 ctx_id = exec_mm->ctx_id; + const u32 flags = exec_mm->flags; + int ret; + + if (exec_mm->batch_ptr & 3) { + DRM_DEBUG_DRIVER("ptr not 4-byt aligned %llu\n", + exec_mm->batch_ptr); + return -EINVAL; + } + + if (flags & ~(I915_EXEC_MM_FENCE)) { + DRM_DEBUG_DRIVER("bad flags: 0x%08x\n", flags); + return -EINVAL; + } + + if (!dev_priv->svm_available) { + DRM_DEBUG_DRIVER("no svm available\n"); + return -ENODEV; + } + + if (file == NULL) + return -EINVAL; + + engine = exec_mm_select_engine(dev_priv, exec_mm); + if (!engine) + return -EINVAL; + + if (engine->id != RCS) { + DRM_DEBUG_DRIVER("svm exec only allowed on RCS engine\n"); + return -EINVAL; + } + + intel_runtime_pm_get(dev_priv); + + ret = i915_mutex_lock_interruptible(dev); + if (ret) { + DRM_ERROR("mutex interrupted\n"); + goto pm_put; + } + + ctx = i915_gem_validate_context(dev, file, engine, ctx_id); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + DRM_DEBUG_DRIVER("couldn't get context\n"); + goto unlock; + } + + if (!(ctx->flags & CONTEXT_SVM)) { + ret = -EINVAL; + DRM_DEBUG_DRIVER("context is not SVM enabled\n"); + goto unlock; + } + + i915_gem_context_get(ctx); + + req = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(req)) { + ret = PTR_ERR(req); + goto ctx_put; + } + + ret = i915_gem_request_add_to_client(req, file); + if (ret) { + DRM_DEBUG_DRIVER("failed to add request to client, %d\n", ret); + goto add_request; + } + + /* If we fail here, we still need to submit the req */ + ret = do_exec_mm(exec_mm, req, flags); + +add_request: + i915_add_request(req); + +ctx_put: + i915_gem_context_put(ctx); + +unlock: + mutex_unlock(&dev->struct_mutex); + +pm_put: + intel_runtime_pm_put(dev_priv); + + return ret; +} diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 639ef5b0e2c9..8d567744f221 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -259,6 +259,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 #define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 #define DRM_I915_GEM_CONTEXT_CREATE2 0x36 +#define DRM_I915_EXEC_MM 0x37 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -313,6 +314,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param) #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param) #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE2 DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE2, struct drm_i915_gem_context_create2) +#define DRM_IOCTL_I915_EXEC_MM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_EXEC_MM, struct drm_i915_exec_mm) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -1225,6 +1227,40 @@ struct drm_i915_gem_context_param { __u64 value; }; +/** + * drm_i915_exec_mm - shared address space execbuf + * @batch_ptr: address of batch buffer (in context's CPU address space) + * @ctx_id: context to use for execution + * @ring_id: ring to which this context will be submitted + * @flags: see flags + * @fence: returned fence handle + * @fence_dep_count: number of fences this execution depends on + * @fence_deps: array of fence IDs (u32) this execution depends on + * + * This simplified execbuf just executes an MI_BATCH_BUFFER_START at + * @batch_ptr using @ctx_id as the context. The context will indicate + * which address space the @batch_ptr will use. + * + * Note @batch_ptr must be dword aligned. + * + * By default, the kernel will simply execute the address given on the GPU. + * If the %I915_EXEC_MM_FENCE flag is passed in the @flags field however, + * the kernel will return a Android native sync object for the caller to + * use to synchronize execution (see the android-native-sync(7) man page). + * + */ +struct drm_i915_exec_mm { + __u64 batch_ptr; + __u32 ctx_id; + __u32 ring_id; /* see execbuffer2 flags */ + __u32 flags; +#define I915_EXEC_MM_FENCE (1<<0) + __u32 pad; + __u32 fence; + __u32 fence_dep_count; + __u64 fence_deps; +}; + #if defined(__cplusplus) } #endif -- 2.7.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx