On 2022-03-21 at 14:19:01 +0530, Hellstrom, Thomas wrote: > On Sun, 2022-03-20 at 02:12 +0530, Ramalingam C wrote: > > XY_FAST_COLOR_BLT cmd is faster than the older XY_COLOR_BLT. Hence > > for > > clearing (Zero out) the pages of the newly allocated object, faster > > cmd > > is used. > > NIT: Imperative wording > > > > > Signed-off-by: Ramalingam C <ramalingam.c@xxxxxxxxx> > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > Also there's a typo in the patch title. Fixed them in the next version. Thanks for the review Thomas. Ram > > With that fixed: > Reviewed-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx> > > > > --- > > drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 5 +++ > > drivers/gpu/drm/i915/gt/intel_migrate.c | 43 +++++++++++++++++- > > -- > > 2 files changed, 43 insertions(+), 5 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > > b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > > index d112ffd56418..925e55b6a94f 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > > +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > > @@ -205,6 +205,11 @@ > > > > #define COLOR_BLT_CMD (2 << 29 | 0x40 << 22 | (5 - > > 2)) > > #define XY_COLOR_BLT_CMD (2 << 29 | 0x50 << 22) > > +#define XY_FAST_COLOR_BLT_CMD (2 << 29 | 0x44 << 22) > > +#define XY_FAST_COLOR_BLT_DEPTH_32 (2 << 19) > > +#define XY_FAST_COLOR_BLT_DW 16 > > +#define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 21) > > +#define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31 > > #define SRC_COPY_BLT_CMD (2 << 29 | 0x43 << 22) > > #define GEN9_XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) > > #define XY_SRC_COPY_BLT_CMD (2 << 29 | 0x53 << 22) > > diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c > > b/drivers/gpu/drm/i915/gt/intel_migrate.c > > index 20444d6ceb3c..73199ebf0671 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_migrate.c > > +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c > > @@ -614,20 +614,53 @@ intel_context_migrate_copy(struct intel_context > > *ce, > > return err; > > } > > > > -static int emit_clear(struct i915_request *rq, u64 offset, int size, > > u32 value) > > +static int emit_clear(struct i915_request *rq, u64 offset, int size, > > + u32 value, bool is_lmem) > > { > > - const int ver = GRAPHICS_VER(rq->engine->i915); > > + struct drm_i915_private *i915 = rq->engine->i915; > > + int mocs = rq->engine->gt->mocs.uc_index << 1; > > + const int ver = GRAPHICS_VER(i915); > > + int ring_sz; > > u32 *cs; > > > > GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); > > > > offset += (u64)rq->engine->instance << 32; > > > > - cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6); > > + if (ver >= 12) > > + ring_sz = 16; > > + else if (ver >= 8) > > + ring_sz = 8; > > + else > > + ring_sz = 6; > > + > > + cs = intel_ring_begin(rq, ring_sz); > > if (IS_ERR(cs)) > > return PTR_ERR(cs); > > > > - if (ver >= 8) { > > + if (ver >= 12) { > > + *cs++ = XY_FAST_COLOR_BLT_CMD | > > XY_FAST_COLOR_BLT_DEPTH_32 | > > + (XY_FAST_COLOR_BLT_DW - 2); > > + *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) > > | > > + (PAGE_SIZE - 1); > > + *cs++ = 0; > > + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; > > + *cs++ = lower_32_bits(offset); > > + *cs++ = upper_32_bits(offset); > > + *cs++ = !is_lmem << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT; > > + /* BG7 */ > > + *cs++ = value; > > + *cs++ = 0; > > + *cs++ = 0; > > + *cs++ = 0; > > + /* BG11 */ > > + *cs++ = 0; > > + *cs++ = 0; > > + /* BG13 */ > > + *cs++ = 0; > > + *cs++ = 0; > > + *cs++ = 0; > > + } else if (ver >= 8) { > > *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); > > *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | > > PAGE_SIZE; > > *cs++ = 0; > > @@ -711,7 +744,7 @@ intel_context_migrate_clear(struct intel_context > > *ce, > > if (err) > > goto out_rq; > > > > - err = emit_clear(rq, offset, len, value); > > + err = emit_clear(rq, offset, len, value, is_lmem); > > > > /* Arbitration is re-enabled between requests. */ > > out_rq: >