We want the ability to dispatch a set of command buffer to the hardware, each with a different OA configuration. To achieve this, we reuse a couple of fields from the execbuf2 struct (I CAN HAZ execbuf3?) to notify what OA configuration should be used for a batch buffer. This requires the process making the execbuf with this flag to also own the perf fd at the time of execbuf. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.c | 4 ++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 60 +++++++++++++++++++--- drivers/gpu/drm/i915/i915_request.c | 4 ++ drivers/gpu/drm/i915/i915_request.h | 2 + drivers/gpu/drm/i915/intel_lrc.c | 13 ++++- drivers/gpu/drm/i915/intel_ringbuffer.c | 11 +++- include/uapi/drm/i915_drm.h | 12 ++++- 7 files changed, 97 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 193023427b40..564c2e749fd8 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -444,6 +444,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_MMAP_GTT_COHERENT: value = INTEL_INFO(dev_priv)->has_coherent_ggtt; break; + case I915_PARAM_HAS_EXEC_PERF_CONFIG: + /* Obviously requires perf support. */ + value = dev_priv->perf.initialized; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 09187286d346..8b963641f142 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -286,6 +286,8 @@ struct i915_execbuffer { */ int lut_size; struct hlist_head *buckets; /** ht for relocation handles */ + + struct i915_vma *oa_config; /** HW configuration for OA, NULL is not needed. */ }; #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) @@ -1121,6 +1123,32 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) *addr = value; } +static int +get_execbuf_oa_config(struct drm_i915_private *dev_priv, + int perf_fd, u32 oa_config_id, + struct i915_vma **out_oa_vma) +{ + struct file *perf_file; + int ret; + + if (!dev_priv->perf.oa.exclusive_stream) + return -EINVAL; + + perf_file = fget(perf_fd); + if (!perf_file) + return -EINVAL; + + if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream) + return -EINVAL; + + fput(perf_file); + + ret = i915_perf_get_oa_config(dev_priv, oa_config_id, + NULL, out_oa_vma); + + return ret; +} + static int __reloc_gpu_alloc(struct i915_execbuffer *eb, struct i915_vma *vma, unsigned int len) @@ -1173,6 +1201,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto err_unpin; } + rq->oa_config = eb->oa_config; + eb->oa_config = NULL; + err = i915_request_await_object(rq, vma->obj, true); if (err) goto err_request; @@ -1875,12 +1906,15 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) return false; } - if (exec->DR4 == 0xffffffff) { - DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); - exec->DR4 = 0; + /* We reuse DR1 & DR4 fields for passing the perf config detail. */ + if (!(exec->flags & I915_EXEC_PERF_CONFIG)) { + if (exec->DR4 == 0xffffffff) { + DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); + exec->DR4 = 0; + } + if (exec->DR1 || exec->DR4) + return false; } - if (exec->DR1 || exec->DR4) - return false; if ((exec->batch_start_offset | exec->batch_len) & 0x7) return false; @@ -2224,6 +2258,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.buffer_count = args->buffer_count; eb.batch_start_offset = args->batch_start_offset; eb.batch_len = args->batch_len; + eb.oa_config = NULL; eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { @@ -2253,9 +2288,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, } } + if (args->flags & I915_EXEC_PERF_CONFIG) { + err = get_execbuf_oa_config(eb.i915, args->DR1, args->DR4, + &eb.oa_config); + if (err) + goto err_out_fence; + } + err = eb_create(&eb); if (err) - goto err_out_fence; + goto err_perf; GEM_BUG_ON(!eb.lut_size); @@ -2365,6 +2407,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_batch_unpin; } + eb.request->oa_config = eb.oa_config; + eb.oa_config = NULL; + if (in_fence) { err = i915_request_await_dma_fence(eb.request, in_fence); if (err < 0) @@ -2426,6 +2471,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, i915_gem_context_put(eb.ctx); err_destroy: eb_destroy(&eb); +err_perf: + if (eb.oa_config) + i915_vma_put(eb.oa_config); err_out_fence: if (out_fence_fd != -1) put_unused_fd(out_fence_fd); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index abd4dacbab8e..8fb134793925 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -379,6 +379,9 @@ static void i915_request_retire(struct i915_request *request) unreserve_gt(request->i915); + if (request->oa_config) + i915_vma_put(request->oa_config); + i915_sched_node_fini(request->i915, &request->sched); i915_request_put(request); } @@ -704,6 +707,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) rq->batch = NULL; rq->capture_list = NULL; rq->waitboost = false; + rq->oa_config = NULL; /* * Reserve space in the ring buffer for all the commands required to diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 90e9d170a0cd..7a42c9b94877 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -188,6 +188,8 @@ struct i915_request { struct drm_i915_file_private *file_priv; /** file_priv list entry for this request */ struct list_head client_link; + + struct i915_vma *oa_config; /** HW configuration for OA, NULL is not needed. */ }; #define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b240332838c1..d3d8c0c60d65 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1858,6 +1858,8 @@ static int gen8_emit_bb_start(struct i915_request *rq, { u32 *cs; int ret; + bool use_oa_config = + rq->i915->perf.oa.exclusive_stream && rq->oa_config; /* Don't rely in hw updating PDPs, specially in lite-restore. * Ideally, we should set Force PD Restore in ctx descriptor, @@ -1875,10 +1877,19 @@ static int gen8_emit_bb_start(struct i915_request *rq, rq->gem_context->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine); } - cs = intel_ring_begin(rq, 6); + cs = intel_ring_begin(rq, use_oa_config ? 10 : 6); if (IS_ERR(cs)) return PTR_ERR(cs); + if (use_oa_config) { + u32 oa_config_offset = i915_ggtt_offset(rq->oa_config); + + *cs++ = MI_BATCH_BUFFER_START_GEN8; + *cs++ = oa_config_offset; + *cs++ = 0; + *cs++ = MI_NOOP; + } + /* * WaDisableCtxRestoreArbitration:bdw,chv * diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b8a7a014d46d..d8ebcf91ce93 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2037,11 +2037,20 @@ hsw_emit_bb_start(struct i915_request *rq, unsigned int dispatch_flags) { u32 *cs; + bool use_oa_config = + rq->i915->perf.oa.exclusive_stream && rq->oa_config; - cs = intel_ring_begin(rq, 2); + cs = intel_ring_begin(rq, use_oa_config ? 4 : 2); if (IS_ERR(cs)) return PTR_ERR(cs); + if (use_oa_config) { + u32 oa_config_offset = i915_ggtt_offset(rq->oa_config); + + *cs++ = MI_BATCH_BUFFER_START; + *cs++ = oa_config_offset; + } + *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW); /* bit0-7 is the length on GEN6+ */ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 62f669030741..4f0b39796d80 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -559,6 +559,8 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_MMAP_GTT_COHERENT 52 +#define I915_PARAM_HAS_EXEC_PERF_CONFIG 53 + typedef struct drm_i915_getparam { __s32 param; /* @@ -1078,7 +1080,15 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_FENCE_ARRAY (1<<19) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1)) +/* Request that perf monitoring hardware be reprogrammed before executing the + * commands from the batch in the execbuf. The DR1 & DR4 fields of the execbuf + * must respectively contain the file descriptor of the perf monitoring device + * and the configuration to program. + */ +#define I915_EXEC_PERF_CONFIG (1<<20) + + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_PERF_CONFIG<<1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- 2.19.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx