Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > On Mon, Jan 09, 2017 at 06:52:54PM +0200, Mika Kuoppala wrote: >> From: Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx> >> >> We just need to pass in an address to execute and some flags, since we >> don't have to worry about buffer relocation or any of the other usual >> stuff. Takes in a fance and returns a fence to be used for >> synchronization. >> >> v2: add a request after batch submission (Jesse) >> v3: add a flag for fence creation (Chris) >> v4: add CLOEXEC flag (Kristian) >> add non-RCS ring support (Jesse) >> v5: update for request alloc change (Jesse) >> v6: new sync file interface, error paths, request breadcrumbs >> v7: always CLOEXEC for sync_file_install >> v8: rebase on new sync file api >> v9: rework on top of fence requests and sync_file >> v10: take fence ref for sync_file (Chris) >> use correct flush (Chris) >> limit exec on rcs >> v11: allow exec on all engines >> v12: dma_fence_put/get, fix error path on getting sync_file >> v13: add in fences, removed superfluous dma_fence_put/get >> >> Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> >> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> >> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> >> Signed-off-by: Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx> (v5) >> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> >> --- >> drivers/gpu/drm/i915/Kconfig | 1 + >> drivers/gpu/drm/i915/i915_drv.c | 1 + >> drivers/gpu/drm/i915/i915_drv.h | 2 + >> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 175 +++++++++++++++++++++++++++++ >> include/uapi/drm/i915_drm.h | 44 ++++++++ >> 5 files changed, 223 insertions(+) >> >> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig >> index 183f5dc..387990d 100644 >> --- a/drivers/gpu/drm/i915/Kconfig >> +++ b/drivers/gpu/drm/i915/Kconfig >> @@ -8,6 +8,7 @@ config DRM_I915 >> # the shmem_readpage() which depends upon tmpfs >> select SHMEM >> select TMPFS >> + select SYNC_FILE > > Duplicate. > >> select DRM_KMS_HELPER >> select DRM_PANEL >> select DRM_MIPI_DSI >> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c >> index 4d22b4b..2276ba6 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.c >> +++ b/drivers/gpu/drm/i915/i915_drv.c >> @@ -2567,6 +2567,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { >> DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM, i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW), >> DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM, i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW), >> DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, DRM_RENDER_ALLOW), >> + DRM_IOCTL_DEF_DRV(I915_GEM_EXEC_MM, i915_gem_exec_mm, DRM_RENDER_ALLOW), >> }; >> >> static struct drm_driver driver = { >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index ca1eaaf..8436ea3 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -3094,6 +3094,8 @@ int i915_gem_execbuffer(struct drm_device *dev, void *data, >> struct drm_file *file_priv); >> int i915_gem_execbuffer2(struct drm_device *dev, void *data, >> struct drm_file *file_priv); >> +int i915_gem_exec_mm(struct drm_device *dev, void *data, >> + struct drm_file *file); >> int i915_gem_busy_ioctl(struct drm_device *dev, void *data, >> struct drm_file *file_priv); >> int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, >> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> index a5fe299..bb51ce4 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> @@ -29,6 +29,7 @@ >> #include <linux/dma_remapping.h> >> #include <linux/reservation.h> >> #include <linux/uaccess.h> >> +#include <linux/sync_file.h> > > Out of date. Looking at the current code, it is duplicate as the explicit fencing prolly brought it in. >> #include <drm/drmP.h> >> #include <drm/i915_drm.h> >> @@ -1984,3 +1985,177 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, >> drm_free_large(exec2_list); >> return ret; >> } >> + >> +static struct intel_engine_cs * >> +exec_mm_select_engine(const struct drm_i915_private *dev_priv, >> + const struct drm_i915_exec_mm *exec_mm) >> +{ >> + const unsigned int user_ring_id = exec_mm->ring_id; >> + struct intel_engine_cs *engine; >> + >> + if (user_ring_id > I915_USER_RINGS) { >> + DRM_DEBUG("exec_mm with unknown ring: %u\n", user_ring_id); >> + return NULL; >> + } >> + >> + engine = dev_priv->engine[user_ring_map[user_ring_id]]; >> + >> + if (!engine) { >> + DRM_DEBUG("exec_mm with invalid engine: %u\n", user_ring_id); >> + return NULL; >> + } >> + >> + return engine; >> +} > > I would rather we reuse eb_select_engine(). We know we have to fix the > ABI anyway, so might as well kill 2 birds with one stone. > I am still hesistant about this. Tried but this would expose the exec svm flags to similar kind of trickery for BSD ring. >> +static int do_exec_mm(struct drm_i915_exec_mm *exec_mm, >> + struct drm_i915_gem_request *req, >> + const u32 flags) >> +{ >> + struct sync_file *out_fence = NULL; >> + struct dma_fence *in_fence = NULL; >> + int out_fence_fd = -1; >> + int ret; >> + >> + if (flags & I915_EXEC_MM_FENCE_IN) { >> + in_fence = sync_file_get_fence(exec_mm->in_fence_fd); >> + if (!in_fence) >> + return -EINVAL; >> + >> + ret = i915_gem_request_await_dma_fence(req, in_fence); > > You can drop the fence ref immediately. So closely mimiced from explicit fences that execbuf path might be able to do the same :) > > dma_fence_put(in_fence); >> + if (ret < 0) >> + goto err_out; >> + } >> + >> + if (flags & I915_EXEC_MM_FENCE_OUT) { >> + out_fence = sync_file_create(&req->fence); >> + if (!out_fence) { >> + DRM_DEBUG_DRIVER("sync_file_create failed\n"); >> + ret = -ENOMEM; >> + goto err_out; >> + } >> + >> + out_fence_fd = get_unused_fd_flags(O_CLOEXEC); >> + if (out_fence_fd < 0) { >> + ret = out_fence_fd; >> + out_fence_fd = -1; >> + goto err_out; >> + } >> + >> + fd_install(out_fence_fd, out_fence->file); >> + exec_mm->out_fence_fd = out_fence_fd; > > Try not to set before success. > >> + ret = req->engine->emit_flush(req, EMIT_INVALIDATE); >> + if (ret) { >> + DRM_DEBUG_DRIVER("engine flush failed: %d\n", ret); >> + goto err_out; >> + } >> + >> + ret = req->engine->emit_bb_start(req, exec_mm->batch_ptr, 0, 0); >> + if (ret) { >> + DRM_DEBUG_DRIVER("engine dispatch execbuf failed: %d\n", ret); >> + goto err_out; >> + } >> + >> + return 0; >> + >> +err_out: >> + if (out_fence_fd != -1) >> + put_unused_fd(out_fence_fd); >> + >> + if (out_fence) >> + fput(out_fence->file); >> + >> + if (in_fence) >> + dma_fence_put(in_fence); >> + >> + return ret; >> +} >> + >> +int i915_gem_exec_mm(struct drm_device *dev, void *data, struct drm_file *file) > > Might as call this exec_svm. Unless you have immediate plans to > generalise. > I renamed everywhere s/mm/svm >> +{ >> + struct drm_i915_private *dev_priv = to_i915(dev); >> + struct drm_i915_exec_mm *exec_mm = data; >> + struct intel_engine_cs *engine; >> + struct i915_gem_context *ctx; >> + struct drm_i915_gem_request *req; >> + const u32 ctx_id = exec_mm->ctx_id; >> + const u32 flags = exec_mm->flags; >> + int ret; >> + >> + if (exec_mm->batch_ptr & 3) { >> + DRM_DEBUG_DRIVER("ptr not 4-byt aligned %llu\n", >> + exec_mm->batch_ptr); >> + return -EINVAL; >> + } > > Where does access_ok() get performed? A little spiel on the mm validation > would help here if it is all done for us by svm. > Passing in length wouldn't help much as the refs inside the bb would also need to be checked? >> + if (flags & ~(I915_EXEC_MM_FENCE_OUT | I915_EXEC_MM_FENCE_IN)) { >> + DRM_DEBUG_DRIVER("bad flags: 0x%08x\n", flags); >> + return -EINVAL; >> + } >> + >> + if (exec_mm->pad != 0) { >> + DRM_DEBUG_DRIVER("bad pad: 0x%08x\n", exec_mm->pad); >> + return -EINVAL; >> + } >> + >> + if (file == NULL) >> + return -EINVAL; > > ? That's not a user error (EINVAL), that's a kaboom! > >> + >> + engine = exec_mm_select_engine(dev_priv, exec_mm); >> + if (!engine) >> + return -EINVAL; >> + >> + intel_runtime_pm_get(dev_priv); >> + >> + ret = i915_mutex_lock_interruptible(dev); >> + if (ret) { >> + DRM_ERROR("mutex interrupted\n"); >> + goto pm_put; >> + } >> + >> + ctx = i915_gem_validate_context(dev, file, engine, ctx_id); >> + if (IS_ERR(ctx)) { >> + ret = PTR_ERR(ctx); >> + DRM_DEBUG_DRIVER("couldn't get context\n"); >> + goto unlock; >> + } >> + >> + if (!i915_gem_context_is_svm(ctx)) { >> + ret = -EINVAL; >> + DRM_DEBUG_DRIVER("context is not SVM enabled\n"); >> + goto unlock; >> + } >> + >> + i915_gem_context_get(ctx); >> + >> + req = i915_gem_request_alloc(engine, ctx); >> + if (IS_ERR(req)) { >> + ret = PTR_ERR(req); >> + goto ctx_put; >> + } >> + >> + ret = i915_gem_request_add_to_client(req, file); >> + if (ret) { >> + DRM_DEBUG_DRIVER("failed to add request to client, %d\n", ret); >> + goto add_request; >> + } >> + >> + /* If we fail here, we still need to submit the req */ >> + ret = do_exec_mm(exec_mm, req, flags); >> + >> +add_request: >> + i915_add_request(req); > > Do the same as execbuf just for consistency (i.e. > __i915_add_request(req, ret == 0)). > > What's the story for i915_gpu_error.c? > Work in progress. >> + >> +ctx_put: >> + i915_gem_context_put(ctx); >> + >> +unlock: >> + mutex_unlock(&dev->struct_mutex); >> + >> +pm_put: >> + intel_runtime_pm_put(dev_priv); >> + >> + return ret; >> +} >> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h >> index 5b06ab1..843f943 100644 >> --- a/include/uapi/drm/i915_drm.h >> +++ b/include/uapi/drm/i915_drm.h >> @@ -259,6 +259,7 @@ typedef struct _drm_i915_sarea { >> #define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 >> #define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 >> #define DRM_I915_PERF_OPEN 0x36 >> +#define DRM_I915_GEM_EXEC_MM 0x37 >> >> #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) >> #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) >> @@ -313,6 +314,7 @@ typedef struct _drm_i915_sarea { >> #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param) >> #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param) >> #define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) >> +#define DRM_IOCTL_I915_GEM_EXEC_MM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_EXEC_MM, struct drm_i915_exec_mm) >> >> /* Allow drivers to submit batchbuffers directly to hardware, relying >> * on the security mechanisms provided by hardware. >> @@ -1363,6 +1365,48 @@ enum drm_i915_perf_record_type { >> DRM_I915_PERF_RECORD_MAX /* non-ABI */ >> }; >> >> +/** >> + * drm_i915_exec_mm - shared address space execbuf >> + * @batch_ptr: address of batch buffer (in context's CPU address space) >> + * @ctx_id: context to use for execution, must be svm enabled >> + * @ring_id: ring to which this context will be submitted, see execbuffer2 >> + * @flags: used to signal if in/out fences should be used >> + * @out_fence_fd: returned fence handle of out fence you can wait on >> + * @in_fence_fd: given fence handle of fence the gpu will wait for >> + * @pad: padding, must be zero >> + * >> + * This simplified execbuf just executes an MI_BATCH_BUFFER_START at >> + * @batch_ptr using @ctx_id as the context. The context will indicate >> + * which address space the @batch_ptr will use. >> + * >> + * Note @batch_ptr must be dword aligned. >> + * >> + * By default, the kernel will simply execute the address given on the GPU. >> + * >> + * If the %I915_EXEC_MM_FENCE_OUT flag is passed in the @flags field however, >> + * the kernel will return a sync_file (dma_fence) object in @out_fence_fd for >> + * the caller to use to synchronize execution of the passed batch. >> + * >> + * If the %I915_EXEC_MM_FENCE_IN flag is passed in the @flags field, >> + * the kernel will wait until the fence (dma_fence) object passed in >> + * @in_fence_fd to complete before submitting batch to gpu. >> + * >> + */ >> +struct drm_i915_exec_mm { >> + __u64 batch_ptr; >> + __u32 ctx_id; >> + __u32 ring_id; /* see execbuffer2 ring flags */ >> + >> +#define I915_EXEC_MM_FENCE_OUT (1<<0) >> +#define I915_EXEC_MM_FENCE_IN (1<<1) >> + __u32 flags; >> + >> + __u32 out_fence_fd; >> + __u32 in_fence_fd; > > In then out just makes more sense to mo! >> + >> + __u32 pad; > > If you use u64 flags (as the second parameter) we can put this pad to > good use. > I have addressed everything with the exception where I have stated otherwise. Thank you for the feedback, -Mika >> +}; >> + >> #if defined(__cplusplus) >> } >> #endif >> -- >> 2.7.4 >> > > -- > Chris Wilson, Intel Open Source Technology Centre _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx