From: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> We want the ability to dispatch a set of command buffer to the hardware, each with a different OA configuration. To achieve this, we reuse a couple of fields from the execbuf2 struct (I CAN HAZ execbuf3?) to notify what OA configuration should be used for a batch buffer. This requires the process making the execbuf with this flag to also own the perf fd at the time of execbuf. v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris) Move oa_config vma to active (Chris) v3: Don't drop the lock for engine lookup (Chris) Move OA config vma to active before writing the ringbuffer (Chris) v4: Reuse i915_user_extension_fn Serialize requests with OA config updates v5: Check that the chained extension is only present once (Chris) Unpin oa_vma in main path (Chris) v6: Use BIT_ULL (Chris) v7: Hold drm.struct_mutex when serializing the request with OA config (Chris) v8: Remove active request from engine (Lionel) v9: Move fetching OA configuration pass engine pinning (Lionel) Lock VMA before moving to active (Chris) v10: Fix leak on perf_fd (Lionel) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 124 +++++++++++++++++- drivers/gpu/drm/i915/i915_getparam.c | 4 + include/uapi/drm/i915_drm.h | 44 ++++++- 3 files changed, 165 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 6482141cca1a..bddc357e63bf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -24,6 +24,7 @@ #include "i915_gem_clflush.h" #include "i915_gem_context.h" #include "i915_gem_ioctls.h" +#include "i915_perf.h" #include "i915_trace.h" #include "i915_user_extensions.h" @@ -47,6 +48,8 @@ enum { #define __EXEC_INTERNAL_FLAGS (~0u << 30) #define UPDATE PIN_OFFSET_FIXED +#define EXTRA_VMA 2 + #define BATCH_OFFSET_BIAS (256*1024) #define __I915_EXEC_ILLEGAL_FLAGS \ @@ -273,6 +276,12 @@ struct i915_execbuffer { */ int lut_size; struct hlist_head *buckets; /** ht for relocation handles */ + + struct { /* HW configuration for OA. */ + struct file *file; + struct i915_oa_config *config; + struct i915_vma *vma; + } oa; }; #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) @@ -889,6 +898,12 @@ static void eb_destroy(const struct i915_execbuffer *eb) if (eb->lut_size > 0) kfree(eb->buckets); + + if (eb->oa.config) { + i915_vma_put(eb->oa.vma); + i915_oa_config_put(eb->oa.config); + fput(eb->oa.file); + } } static inline u64 @@ -2045,6 +2060,58 @@ add_to_client(struct i915_request *rq, struct drm_file *file) spin_unlock(&file_priv->mm.lock); } +static int eb_oa_config(struct i915_execbuffer *eb) +{ + struct i915_perf_stream *stream; + struct i915_perf *perf; + int err; + + if (!eb->oa.config) + return 0; + + stream = eb->oa.file->private_data; + perf = stream->perf; + + err = mutex_lock_interruptible(&perf->lock); + if (err) + return err; + + if (stream != perf->exclusive_stream) { + err = -EINVAL; + goto out; + } + + if (stream->engine != eb->engine) { /* FIXME: virtual selection */ + err = -EINVAL; + goto out; + } + + err = i915_active_fence_set(&perf->active_config, eb->request); + if (err) + goto out; + + /* + * If the config hasn't changed, skip reconfiguring the HW (this is + * subject to a delay we want to avoid has much as possible). + * + * After a GPU reset, the oa config will not be recovered. + */ + if (eb->oa.config == stream->oa_config) + goto out; + + err = eb->engine->emit_bb_start(eb->request, + eb->oa.vma->node.start, 0, + I915_DISPATCH_SECURE); /* ggtt */ + if (err) + goto out; + + swap(eb->oa.config, stream->oa_config); + +out: + mutex_unlock(&perf->lock); + return err; +} + static int eb_submit(struct i915_execbuffer *eb) { int err; @@ -2071,6 +2138,10 @@ static int eb_submit(struct i915_execbuffer *eb) return err; } + err = eb_oa_config(eb); + if (err) + return err; + err = eb->engine->emit_bb_start(eb->request, eb->batch->node.start + eb->batch_start_offset, @@ -2421,7 +2492,39 @@ signal_fence_array(struct i915_execbuffer *eb, } } +static int parse_perf_config(struct i915_user_extension __user *ext, void *data) +{ + struct drm_i915_gem_execbuffer_ext_perf perf_config; + struct i915_execbuffer *eb = data; + struct file *file; + int err; + + if (eb->oa.config) + return -EINVAL; /* exclusive */ + + if (copy_from_user(&perf_config, ext, sizeof(perf_config))) + return -EFAULT; + + file = fget(perf_config.perf_fd); + if (!file) + return -EINVAL; + + err = i915_perf_stream_get_oa_config(file->private_data, + perf_config.oa_config, + &eb->oa.config, &eb->oa.vma); + if (err) + goto err_file; + + eb->oa.file = file; + return 0; + +err_file: + fput(file); + return err; +} + static const i915_user_extension_fn execbuf_extensions[] = { + [I915_EXEC_EXT_PERF_CONFIG] = parse_perf_config, }; static int @@ -2471,10 +2574,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) args->flags |= __EXEC_HAS_RELOC; + eb.oa.config = NULL; + eb.exec = exec; - eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1); + eb.vma = (struct i915_vma **)(exec + args->buffer_count + EXTRA_VMA); eb.vma[0] = NULL; - eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1); + eb.flags = (unsigned int *)(eb.vma + args->buffer_count + EXTRA_VMA); eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; reloc_cache_init(&eb.reloc_cache, eb.i915); @@ -2592,6 +2697,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, } } + if (eb.oa.config) { + err = i915_vma_pin(eb.oa.vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + goto err_vma; + + eb.vma[eb.buffer_count] = eb.oa.vma; + eb.flags[eb.buffer_count] = __EXEC_OBJECT_HAS_PIN; + eb.oa.vma->exec_flags = &eb.flags[eb.buffer_count]; + eb.buffer_count++; + } + if (eb.batch_len == 0) eb.batch_len = eb.batch->size - eb.batch_start_offset; @@ -2772,7 +2888,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, /* Copy in the exec list from userland */ exec_list = kvmalloc_array(count, sizeof(*exec_list), __GFP_NOWARN | GFP_KERNEL); - exec2_list = kvmalloc_array(count + 1, eb_element_size(), + exec2_list = kvmalloc_array(count + EXTRA_VMA, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec_list == NULL || exec2_list == NULL) { DRM_DEBUG("Failed to allocate exec list for %d buffers\n", @@ -2848,7 +2964,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, return -EINVAL; /* Allocate an extra slot for use by the command parser */ - exec2_list = kvmalloc_array(count + 1, eb_element_size(), + exec2_list = kvmalloc_array(count + EXTRA_VMA, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec2_list == NULL) { DRM_DEBUG("Failed to allocate exec list for %zd buffers\n", diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index ad33fbe90a28..6ad290b14e12 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -160,6 +160,10 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_PERF_REVISION: value = i915_perf_ioctl_version(); break; + case I915_PARAM_HAS_EXEC_PERF_CONFIG: + /* Obviously requires perf support. */ + value = !!i915->perf.i915; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 06c5c187264b..2ce498e498a2 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -618,6 +618,16 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_PERF_REVISION 54 +/* + * Request an i915/perf performance configuration change before running the + * commands given in an execbuf. + * + * Performance configuration ID and the file descriptor of the i915 perf + * stream are given through drm_i915_gem_execbuffer_ext_perf. See + * I915_EXEC_USE_EXTENSIONS. + */ +#define I915_PARAM_HAS_EXEC_PERF_CONFIG 55 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1014,6 +1024,29 @@ struct drm_i915_gem_exec_fence { __u32 flags; }; +struct drm_i915_gem_execbuffer_ext_perf { + struct i915_user_extension base; /* .name = I915_EXEC_EXT_PERF_CONFIG */ + + /* + * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN. + * This is used to identify that the application requesting a HW + * performance configuration change actually has a right to do so + * because it also has access the i915-perf stream. + */ + __s32 perf_fd; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * OA configuration ID to switch to before executing the commands + * associated to the execbuf. + */ + __u64 oa_config; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs @@ -1038,9 +1071,13 @@ struct drm_i915_gem_execbuffer2 { * * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a * single struct drm_i915_gem_base_execbuffer_ext and num_cliprects is - * 0. + * 0. Extensions: + * drm_i915_gem_execbuffer_ext_perf (I915_EXEC_EXT_PERF_CONFIG) */ __u64 cliprects_ptr; +#define I915_EXEC_EXT_PERF_CONFIG 0 + + __u64 flags; #define I915_EXEC_RING_MASK (0x3f) #define I915_EXEC_DEFAULT (0<<0) #define I915_EXEC_RENDER (1<<0) @@ -1048,7 +1085,8 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_BLT (3<<0) #define I915_EXEC_VEBOX (4<<0) -/* Used for switching the constants addressing mode on gen4+ RENDER ring. +/* + * Used for switching the constants addressing mode on gen4+ RENDER ring. * Gen6+ only supports relative addressing to dynamic state (default) and * absolute addressing. * @@ -1058,7 +1096,7 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_CONSTANTS_REL_GENERAL (0<<6) /* default */ #define I915_EXEC_CONSTANTS_ABSOLUTE (1<<6) #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */ - __u64 flags; + __u64 rsvd1; /* now used for context info */ __u64 rsvd2; }; -- 2.23.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx