Where are these changes from (repo/commit)? It could be good to reference in the commit message. I suspect that the answer might mean that these patches should be labeled RFC. -Jordan On 2019-03-25 03:58:58, Chris Wilson wrote: > For use in GPU recovery and pipeline construction. > --- > include/drm-uapi/i915_drm.h | 389 +++++++++++++++++++++++++++++------- > 1 file changed, 317 insertions(+), 72 deletions(-) > > diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h > index d2792ab3640..59baacd265d 100644 > --- a/include/drm-uapi/i915_drm.h > +++ b/include/drm-uapi/i915_drm.h > @@ -62,6 +62,28 @@ extern "C" { > #define I915_ERROR_UEVENT "ERROR" > #define I915_RESET_UEVENT "RESET" > > +/* > + * i915_user_extension: Base class for defining a chain of extensions > + * > + * Many interfaces need to grow over time. In most cases we can simply > + * extend the struct and have userspace pass in more data. Another option, > + * as demonstrated by Vulkan's approach to providing extensions for forward > + * and backward compatibility, is to use a list of optional structs to > + * provide those extra details. > + * > + * The key advantage to using an extension chain is that it allows us to > + * redefine the interface more easily than an ever growing struct of > + * increasing complexity, and for large parts of that interface to be > + * entirely optional. The downside is more pointer chasing; chasing across > + * the boundary with pointers encapsulated inside u64. > + */ > +struct i915_user_extension { > + __u64 next_extension; > + __u32 name; > + __u32 flags; /* All undefined bits must be zero. */ > + __u32 rsvd[4]; /* Reserved for future use; must be zero. */ > +}; > + > /* > * MOCS indexes used for GPU surfaces, defining the cacheability of the > * surface data and the coherency for this data wrt. CPU vs. GPU accesses. > @@ -99,9 +121,14 @@ enum drm_i915_gem_engine_class { > I915_ENGINE_CLASS_VIDEO = 2, > I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, > > + /* should be kept compact */ > + > I915_ENGINE_CLASS_INVALID = -1 > }; > > +#define I915_ENGINE_CLASS_INVALID_NONE -1 > +#define I915_ENGINE_CLASS_INVALID_VIRTUAL 0 > + > /** > * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915 > * > @@ -319,6 +346,9 @@ typedef struct _drm_i915_sarea { > #define DRM_I915_PERF_ADD_CONFIG 0x37 > #define DRM_I915_PERF_REMOVE_CONFIG 0x38 > #define DRM_I915_QUERY 0x39 > +#define DRM_I915_GEM_VM_CREATE 0x3a > +#define DRM_I915_GEM_VM_DESTROY 0x3b > +/* Must be kept compact -- no holes */ > > #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) > #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) > @@ -367,6 +397,7 @@ typedef struct _drm_i915_sarea { > #define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey) > #define DRM_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait) > #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create) > +#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_ext) > #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy) > #define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read) > #define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats) > @@ -377,6 +408,8 @@ typedef struct _drm_i915_sarea { > #define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config) > #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) > #define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query) > +#define DRM_IOCTL_I915_GEM_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control) > +#define DRM_IOCTL_I915_GEM_VM_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control) > > /* Allow drivers to submit batchbuffers directly to hardware, relying > * on the security mechanisms provided by hardware. > @@ -476,6 +509,7 @@ typedef struct drm_i915_irq_wait { > #define I915_SCHEDULER_CAP_ENABLED (1ul << 0) > #define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) > #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) > +#define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) > > #define I915_PARAM_HUC_STATUS 42 > > @@ -559,6 +593,14 @@ typedef struct drm_i915_irq_wait { > */ > #define I915_PARAM_MMAP_GTT_COHERENT 52 > > +/* > + * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel > + * execution through use of explicit fence support. > + * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT. > + */ > +#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53 > +/* Must be kept compact -- no holes and well documented */ > + > typedef struct drm_i915_getparam { > __s32 param; > /* > @@ -574,6 +616,7 @@ typedef struct drm_i915_getparam { > #define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 > #define I915_SETPARAM_ALLOW_BATCHBUFFER 3 > #define I915_SETPARAM_NUM_USED_FENCES 4 > +/* Must be kept compact -- no holes */ > > typedef struct drm_i915_setparam { > int param; > @@ -972,7 +1015,7 @@ struct drm_i915_gem_execbuffer2 { > * struct drm_i915_gem_exec_fence *fences. > */ > __u64 cliprects_ptr; > -#define I915_EXEC_RING_MASK (7<<0) > +#define I915_EXEC_RING_MASK (0x3f) > #define I915_EXEC_DEFAULT (0<<0) > #define I915_EXEC_RENDER (1<<0) > #define I915_EXEC_BSD (2<<0) > @@ -1078,7 +1121,16 @@ struct drm_i915_gem_execbuffer2 { > */ > #define I915_EXEC_FENCE_ARRAY (1<<19) > > -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1)) > +/* > + * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent > + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing > + * the batch. > + * > + * Returns -EINVAL if the sync_file fd cannot be found. > + */ > +#define I915_EXEC_FENCE_SUBMIT (1 << 20) > + > +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1)) > > #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) > #define i915_execbuffer2_set_context_id(eb2, context) \ > @@ -1120,32 +1172,34 @@ struct drm_i915_gem_busy { > * as busy may become idle before the ioctl is completed. > * > * Furthermore, if the object is busy, which engine is busy is only > - * provided as a guide. There are race conditions which prevent the > - * report of which engines are busy from being always accurate. > - * However, the converse is not true. If the object is idle, the > - * result of the ioctl, that all engines are idle, is accurate. > + * provided as a guide and only indirectly by reporting its class > + * (there may be more than one engine in each class). There are race > + * conditions which prevent the report of which engines are busy from > + * being always accurate. However, the converse is not true. If the > + * object is idle, the result of the ioctl, that all engines are idle, > + * is accurate. > * > * The returned dword is split into two fields to indicate both > - * the engines on which the object is being read, and the > - * engine on which it is currently being written (if any). > + * the engine classess on which the object is being read, and the > + * engine class on which it is currently being written (if any). > * > * The low word (bits 0:15) indicate if the object is being written > * to by any engine (there can only be one, as the GEM implicit > * synchronisation rules force writes to be serialised). Only the > - * engine for the last write is reported. > + * engine class (offset by 1, I915_ENGINE_CLASS_RENDER is reported as > + * 1 not 0 etc) for the last write is reported. > * > - * The high word (bits 16:31) are a bitmask of which engines are > - * currently reading from the object. Multiple engines may be > + * The high word (bits 16:31) are a bitmask of which engines classes > + * are currently reading from the object. Multiple engines may be > * reading from the object simultaneously. > * > - * The value of each engine is the same as specified in the > - * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc. > - * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to > - * the I915_EXEC_RENDER engine for execution, and so it is never > + * The value of each engine class is the same as specified in the > + * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e. > + * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc. > * reported as active itself. Some hardware may have parallel > * execution engines, e.g. multiple media engines, which are > - * mapped to the same identifier in the EXECBUFFER2 ioctl and > - * so are not separately reported for busyness. > + * mapped to the same class identifier and so are not separately > + * reported for busyness. > * > * Caveat emptor: > * Only the boolean result of this query is reliable; that is whether > @@ -1412,65 +1466,18 @@ struct drm_i915_gem_wait { > }; > > struct drm_i915_gem_context_create { > - /* output: id of new context*/ > - __u32 ctx_id; > - __u32 pad; > -}; > - > -struct drm_i915_gem_context_destroy { > - __u32 ctx_id; > + __u32 ctx_id; /* output: id of new context*/ > __u32 pad; > }; > > -struct drm_i915_reg_read { > - /* > - * Register offset. > - * For 64bit wide registers where the upper 32bits don't immediately > - * follow the lower 32bits, the offset of the lower 32bits must > - * be specified > - */ > - __u64 offset; > -#define I915_REG_READ_8B_WA (1ul << 0) > - > - __u64 val; /* Return value */ > -}; > -/* Known registers: > - * > - * Render engine timestamp - 0x2358 + 64bit - gen7+ > - * - Note this register returns an invalid value if using the default > - * single instruction 8byte read, in order to workaround that pass > - * flag I915_REG_READ_8B_WA in offset field. > - * > - */ > - > -struct drm_i915_reset_stats { > - __u32 ctx_id; > +struct drm_i915_gem_context_create_ext { > + __u32 ctx_id; /* output: id of new context*/ > __u32 flags; > - > - /* All resets since boot/module reload, for all contexts */ > - __u32 reset_count; > - > - /* Number of batches lost when active in GPU, for this context */ > - __u32 batch_active; > - > - /* Number of batches lost pending for execution, for this context */ > - __u32 batch_pending; > - > - __u32 pad; > -}; > - > -struct drm_i915_gem_userptr { > - __u64 user_ptr; > - __u64 user_size; > - __u32 flags; > -#define I915_USERPTR_READ_ONLY 0x1 > -#define I915_USERPTR_UNSYNCHRONIZED 0x80000000 > - /** > - * Returned handle for the object. > - * > - * Object handles are nonzero. > - */ > - __u32 handle; > +#define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS (1u << 0) > +#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1u << 1) > +#define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \ > + (-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1)) > + __u64 extensions; > }; > > struct drm_i915_gem_context_param { > @@ -1491,6 +1498,63 @@ struct drm_i915_gem_context_param { > * drm_i915_gem_context_param_sseu. > */ > #define I915_CONTEXT_PARAM_SSEU 0x7 > + > +/* > + * Not all clients may want to attempt automatic recover of a context after > + * a hang (for example, some clients may only submit very small incremental > + * batches relying on known logical state of previous batches which will never > + * recover correctly and each attempt will hang), and so would prefer that > + * the context is forever banned instead. > + * > + * If set to false (0), after a reset, subsequent (and in flight) rendering > + * from this context is discarded, and the client will need to create a new > + * context to use instead. > + * > + * If set to true (1), the kernel will automatically attempt to recover the > + * context by skipping the hanging batch and executing the next batch starting > + * from the default context state (discarding the incomplete logical context > + * state lost due to the reset). > + * > + * On creation, all new contexts are marked as recoverable. > + */ > +#define I915_CONTEXT_PARAM_RECOVERABLE 0x8 > + > + /* > + * The id of the associated virtual memory address space (ppGTT) of > + * this context. Can be retrieved and passed to another context > + * (on the same fd) for both to use the same ppGTT and so share > + * address layouts, and avoid reloading the page tables on context > + * switches between themselves. > + * > + * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY. > + */ > +#define I915_CONTEXT_PARAM_VM 0x9 > + > +/* > + * I915_CONTEXT_PARAM_ENGINES: > + * > + * Bind this context to operate on this subset of available engines. Henceforth, > + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as > + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0] > + * and upwards. Slots 0...N are filled in using the specified (class, instance). > + * Use > + * engine_class: I915_ENGINE_CLASS_INVALID, > + * engine_instance: I915_ENGINE_CLASS_INVALID_NONE > + * to specify a gap in the array that can be filled in later, e.g. by a > + * virtual engine used for load balancing. > + * > + * Setting the number of engines bound to the context to 0, by passing a zero > + * sized argument, will revert back to default settings. > + * > + * See struct i915_context_param_engines. > + * > + * Extensions: > + * i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE) > + * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) > + */ > +#define I915_CONTEXT_PARAM_ENGINES 0xa > +/* Must be kept compact -- no holes and well documented */ > + > __u64 value; > }; > > @@ -1553,6 +1617,186 @@ struct drm_i915_gem_context_param_sseu { > __u32 rsvd; > }; > > +/* > + * i915_context_engines_load_balance: > + * > + * Enable load balancing across this set of engines. > + * > + * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when > + * used will proxy the execbuffer request onto one of the set of engines > + * in such a way as to distribute the load evenly across the set. > + * > + * The set of engines must be compatible (e.g. the same HW class) as they > + * will share the same logical GPU context and ring. > + * > + * To intermix rendering with the virtual engine and direct rendering onto > + * the backing engines (bypassing the load balancing proxy), the context must > + * be defined to use a single timeline for all engines. > + */ > +struct i915_context_engines_load_balance { > + struct i915_user_extension base; > + > + __u16 engine_index; > + __u16 mbz16; /* reserved for future use; must be zero */ > + __u32 flags; /* all undefined flags must be zero */ > + > + __u64 engines_mask; /* selection mask of engines[] */ > + > + __u64 mbz64[4]; /* reserved for future use; must be zero */ > +}; > + > +/* > + * i915_context_engines_bond: > + * > + * Constructed bonded pairs for execution within a virtual engine. > + * > + * All engines are equal, but some are more equal than others. Given > + * the distribution of resources in the HW, it may be preferable to run > + * a request on a given subset of engines in parallel to a request on a > + * specific engine. We enable this selection of engines within a virtual > + * engine by specifying bonding pairs, for any given master engine we will > + * only execute on one of the corresponding siblings within the virtual engine. > + * > + * To execute a request in parallel on the master engine and a sibling requires > + * coordination with a I915_EXEC_FENCE_SUBMIT. > + */ > +struct i915_context_engines_bond { > + struct i915_user_extension base; > + > + __u16 virtual_index; /* index of virtual engine in ctx->engines[] */ > + __u16 mbz; > + > + __u16 master_class; > + __u16 master_instance; > + > + __u64 sibling_mask; /* bitmask of BIT(sibling_index) wrt the v.engine */ > + __u64 flags; /* all undefined flags must be zero */ > +}; > + > +struct i915_context_param_engines { > + __u64 extensions; /* linked chain of extension blocks, 0 terminates */ > +#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 > +#define I915_CONTEXT_ENGINES_EXT_BOND 1 > + > + struct { > + __u16 engine_class; /* see enum drm_i915_gem_engine_class */ > + __u16 engine_instance; > + } class_instance[0]; > +} __attribute__((packed)); > + > +#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \ > + __u64 extensions; \ > + struct { \ > + __u16 engine_class; \ > + __u16 engine_instance; \ > + } class_instance[N__]; \ > +} __attribute__((packed)) name__ > + > +struct drm_i915_gem_context_create_ext_setparam { > +#define I915_CONTEXT_CREATE_EXT_SETPARAM 0 > + struct i915_user_extension base; > + struct drm_i915_gem_context_param param; > +}; > + > +struct drm_i915_gem_context_create_ext_clone { > +#define I915_CONTEXT_CREATE_EXT_CLONE 1 > + struct i915_user_extension base; > + __u32 clone_id; > + __u32 flags; > +#define I915_CONTEXT_CLONE_ENGINES (1u << 0) > +#define I915_CONTEXT_CLONE_FLAGS (1u << 1) > +#define I915_CONTEXT_CLONE_SCHEDATTR (1u << 2) > +#define I915_CONTEXT_CLONE_SSEU (1u << 3) > +#define I915_CONTEXT_CLONE_TIMELINE (1u << 4) > +#define I915_CONTEXT_CLONE_VM (1u << 5) > +#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1) > + __u64 rsvd; > +}; > + > +struct drm_i915_gem_context_destroy { > + __u32 ctx_id; > + __u32 pad; > +}; > + > +/* > + * DRM_I915_GEM_VM_CREATE - > + * > + * Create a new virtual memory address space (ppGTT) for use within a context > + * on the same file. Extensions can be provided to configure exactly how the > + * address space is setup upon creation. > + * > + * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is > + * returned in the outparam @id. > + * > + * No flags are defined, with all bits reserved and must be zero. > + * > + * An extension chain maybe provided, starting with @extensions, and terminated > + * by the @next_extension being 0. Currently, no extensions are defined. > + * > + * DRM_I915_GEM_VM_DESTROY - > + * > + * Destroys a previously created VM id, specified in @id. > + * > + * No extensions or flags are allowed currently, and so must be zero. > + */ > +struct drm_i915_gem_vm_control { > + __u64 extensions; > + __u32 flags; > + __u32 vm_id; > +}; > + > +struct drm_i915_reg_read { > + /* > + * Register offset. > + * For 64bit wide registers where the upper 32bits don't immediately > + * follow the lower 32bits, the offset of the lower 32bits must > + * be specified > + */ > + __u64 offset; > +#define I915_REG_READ_8B_WA (1ul << 0) > + > + __u64 val; /* Return value */ > +}; > + > +/* Known registers: > + * > + * Render engine timestamp - 0x2358 + 64bit - gen7+ > + * - Note this register returns an invalid value if using the default > + * single instruction 8byte read, in order to workaround that pass > + * flag I915_REG_READ_8B_WA in offset field. > + * > + */ > + > +struct drm_i915_reset_stats { > + __u32 ctx_id; > + __u32 flags; > + > + /* All resets since boot/module reload, for all contexts */ > + __u32 reset_count; > + > + /* Number of batches lost when active in GPU, for this context */ > + __u32 batch_active; > + > + /* Number of batches lost pending for execution, for this context */ > + __u32 batch_pending; > + > + __u32 pad; > +}; > + > +struct drm_i915_gem_userptr { > + __u64 user_ptr; > + __u64 user_size; > + __u32 flags; > +#define I915_USERPTR_READ_ONLY 0x1 > +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000 > + /** > + * Returned handle for the object. > + * > + * Object handles are nonzero. > + */ > + __u32 handle; > +}; > + > enum drm_i915_oa_format { > I915_OA_FORMAT_A13 = 1, /* HSW only */ > I915_OA_FORMAT_A29, /* HSW only */ > @@ -1714,6 +1958,7 @@ struct drm_i915_perf_oa_config { > struct drm_i915_query_item { > __u64 query_id; > #define DRM_I915_QUERY_TOPOLOGY_INFO 1 > +/* Must be kept compact -- no holes and well documented */ > > /* > * When set to zero by userspace, this is filled with the size of the > -- > 2.20.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx