On Wed, Apr 26, 2023 at 04:57:06PM -0400, Rodrigo Vivi wrote: > These structs and definitions are only used for the guc_submit > and they were added specifically for the parallel submission. > > While doing that also delete the unused struct guc_wq_item. > > Cc: Matthew Brost <matthew.brost@xxxxxxxxx> > Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> Reviewed-by: Matthew Brost <matthew.brost@xxxxxxxxx> Side note this reminds me we need to implement to parallel submission handsake in xe_ring_ops.c if we want parallel submission to truly work. Should be a straight port from the i915... Also should write an IGT for that too. > --- > drivers/gpu/drm/xe/xe_guc_fwif.h | 29 ----------- > drivers/gpu/drm/xe/xe_guc_submit.c | 40 ++++----------- > drivers/gpu/drm/xe/xe_guc_submit_types.h | 64 ++++++++++++++++++++++++ > 3 files changed, 75 insertions(+), 58 deletions(-) > create mode 100644 drivers/gpu/drm/xe/xe_guc_submit_types.h > > diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h > index 20155ba4ef07..27d132ce2087 100644 > --- a/drivers/gpu/drm/xe/xe_guc_fwif.h > +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h > @@ -46,35 +46,6 @@ > #define GUC_MAX_ENGINE_CLASSES 16 > #define GUC_MAX_INSTANCES_PER_CLASS 32 > > -/* Work item for submitting workloads into work queue of GuC. */ > -#define WQ_STATUS_ACTIVE 1 > -#define WQ_STATUS_SUSPENDED 2 > -#define WQ_STATUS_CMD_ERROR 3 > -#define WQ_STATUS_ENGINE_ID_NOT_USED 4 > -#define WQ_STATUS_SUSPENDED_FROM_RESET 5 > -#define WQ_TYPE_NOOP 0x4 > -#define WQ_TYPE_MULTI_LRC 0x5 > -#define WQ_TYPE_MASK GENMASK(7, 0) > -#define WQ_LEN_MASK GENMASK(26, 16) > - > -#define WQ_GUC_ID_MASK GENMASK(15, 0) > -#define WQ_RING_TAIL_MASK GENMASK(28, 18) > - > -struct guc_wq_item { > - u32 header; > - u32 context_desc; > - u32 submit_element_info; > - u32 fence_id; > -} __packed; > - > -struct guc_sched_wq_desc { > - u32 head; > - u32 tail; > - u32 error_offset; > - u32 wq_status; > - u32 reserved[28]; > -} __packed; > - > /* Helper for context registration H2G */ > struct guc_ctxt_registration_info { > u32 flags; > diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c > index 231fb4145297..a5fe7755ce4c 100644 > --- a/drivers/gpu/drm/xe/xe_guc_submit.c > +++ b/drivers/gpu/drm/xe/xe_guc_submit.c > @@ -22,6 +22,7 @@ > #include "xe_guc.h" > #include "xe_guc_ct.h" > #include "xe_guc_engine_types.h" > +#include "xe_guc_submit_types.h" > #include "xe_hw_engine.h" > #include "xe_hw_fence.h" > #include "xe_lrc.h" > @@ -378,32 +379,12 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e) > __guc_engine_policy_action_size(&policy), 0, 0); > } > > -#define PARALLEL_SCRATCH_SIZE 2048 > -#define WQ_SIZE (PARALLEL_SCRATCH_SIZE / 2) > -#define WQ_OFFSET (PARALLEL_SCRATCH_SIZE - WQ_SIZE) > -#define CACHELINE_BYTES 64 > - > -struct sync_semaphore { > - u32 semaphore; > - u8 unused[CACHELINE_BYTES - sizeof(u32)]; > -}; > - > -struct parallel_scratch { > - struct guc_sched_wq_desc wq_desc; > - > - struct sync_semaphore go; > - struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE]; > - > - u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - > - sizeof(struct sync_semaphore) * (XE_HW_ENGINE_MAX_INSTANCE + 1)]; > - > - u32 wq[WQ_SIZE / sizeof(u32)]; > -}; > - > #define parallel_read(xe_, map_, field_) \ > - xe_map_rd_field(xe_, &map_, 0, struct parallel_scratch, field_) > + xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ > + field_) > #define parallel_write(xe_, map_, field_, val_) \ > - xe_map_wr_field(xe_, &map_, 0, struct parallel_scratch, field_, val_) > + xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ > + field_, val_) > > static void __register_mlrc_engine(struct xe_guc *guc, > struct xe_engine *e, > @@ -486,13 +467,13 @@ static void register_engine(struct xe_engine *e) > struct iosys_map map = xe_lrc_parallel_map(lrc); > > info.wq_desc_lo = lower_32_bits(ggtt_addr + > - offsetof(struct parallel_scratch, wq_desc)); > + offsetof(struct guc_submit_parallel_scratch, wq_desc)); > info.wq_desc_hi = upper_32_bits(ggtt_addr + > - offsetof(struct parallel_scratch, wq_desc)); > + offsetof(struct guc_submit_parallel_scratch, wq_desc)); > info.wq_base_lo = lower_32_bits(ggtt_addr + > - offsetof(struct parallel_scratch, wq[0])); > + offsetof(struct guc_submit_parallel_scratch, wq[0])); > info.wq_base_hi = upper_32_bits(ggtt_addr + > - offsetof(struct parallel_scratch, wq[0])); > + offsetof(struct guc_submit_parallel_scratch, wq[0])); > info.wq_size = WQ_SIZE; > > e->guc->wqi_head = 0; > @@ -594,7 +575,7 @@ static void wq_item_append(struct xe_engine *e) > > XE_BUG_ON(i != wqi_size / sizeof(u32)); > > - iosys_map_incr(&map, offsetof(struct parallel_scratch, > + iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, > wq[e->guc->wqi_tail / sizeof(u32)])); > xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); > e->guc->wqi_tail += wqi_size; > @@ -1674,6 +1655,7 @@ static void guc_engine_print(struct xe_engine *e, struct drm_printer *p) > guc_engine_wq_print(e, p); > > spin_lock(&sched->job_list_lock); > + > list_for_each_entry(job, &sched->pending_list, drm.list) > drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", > xe_sched_job_seqno(job), > diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h > new file mode 100644 > index 000000000000..d369ea0bad60 > --- /dev/null > +++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h > @@ -0,0 +1,64 @@ > +// SPDX-License-Identifier: MIT > +/* > + * Copyright © 2023 Intel Corporation > + */ > + > +#ifndef _XE_GUC_SUBMIT_TYPES_H_ > +#define _XE_GUC_SUBMIT_TYPES_H_ > + > +#include "xe_hw_engine_types.h" > + > +/* Work item for submitting workloads into work queue of GuC. */ > +#define WQ_STATUS_ACTIVE 1 > +#define WQ_STATUS_SUSPENDED 2 > +#define WQ_STATUS_CMD_ERROR 3 > +#define WQ_STATUS_ENGINE_ID_NOT_USED 4 > +#define WQ_STATUS_SUSPENDED_FROM_RESET 5 > +#define WQ_TYPE_NOOP 0x4 > +#define WQ_TYPE_MULTI_LRC 0x5 > +#define WQ_TYPE_MASK GENMASK(7, 0) > +#define WQ_LEN_MASK GENMASK(26, 16) > + > +#define WQ_GUC_ID_MASK GENMASK(15, 0) > +#define WQ_RING_TAIL_MASK GENMASK(28, 18) > + > +#define PARALLEL_SCRATCH_SIZE 2048 > +#define WQ_SIZE (PARALLEL_SCRATCH_SIZE / 2) > +#define WQ_OFFSET (PARALLEL_SCRATCH_SIZE - WQ_SIZE) > +#define CACHELINE_BYTES 64 > + > +struct guc_sched_wq_desc { > + u32 head; > + u32 tail; > + u32 error_offset; > + u32 wq_status; > + u32 reserved[28]; > +} __packed; > + > +struct sync_semaphore { > + u32 semaphore; > + u8 unused[CACHELINE_BYTES - sizeof(u32)]; > +}; > + > +/** > + * Struct guc_submit_parallel_scratch - A scratch shared mapped buffer. > + */ > +struct guc_submit_parallel_scratch { > + /** @wq_desc: Guc scheduler workqueue descriptor */ > + struct guc_sched_wq_desc wq_desc; > + > + /** @go: Go Semaphore */ > + struct sync_semaphore go; > + /** @join: Joined semaphore for the relevant hw engine instances */ > + struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE]; > + > + /** @unused: Unused/Reserved memory space */ > + u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - > + sizeof(struct sync_semaphore) * > + (XE_HW_ENGINE_MAX_INSTANCE + 1)]; > + > + /** @wq: Workqueue info */ > + u32 wq[WQ_SIZE / sizeof(u32)]; > +}; > + > +#endif > -- > 2.39.2 >