On 2022-08-05 at 14:22:40 +0100, Matthew Auld wrote: > Crucible + recent Mesa seems to sometimes hit: > > GEM_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER) > > And it looks like we can also trigger this with gem_lmem_swapping, if we > modify the test to use slightly larger object sizes. > > Looking closer it looks like we have the following issues in > migrate_copy(): > > - We are using plain integer in various places, which we can easily overflow > with a large object. > > - We pass the entire object size (when the src is lmem) into > emit_pte() and then try to copy it, which doesn't work, since we > only have a few fixed sized windows in which to map the pages and > perform the copy. With an object > 8M we therefore aren't properly > copying the pages. And then with an object > 64M we trigger the > GEM_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER). > > So it looks like our copy handling for any object > 8M (which is our > CHUNK_SZ) is currently broken on DG2. > > Fixes: da0595ae91da ("drm/i915/migrate: Evict and restore the flatccs capable lmem obj") > Testcase: igt@gem_lmem_swapping@basic-big > Testcase: igt@gem_lmem_swapping@verify-ccs-big > Signed-off-by: Matthew Auld <matthew.auld@xxxxxxxxx> > Cc: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx> > Cc: Ramalingam C <ramalingam.c@xxxxxxxxx> > --- > drivers/gpu/drm/i915/gt/intel_migrate.c | 44 ++++++++++++------------- > 1 file changed, 21 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c > index 1bbed7aa436a..aaaf1906026c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_migrate.c > +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c > @@ -609,9 +609,9 @@ static int emit_copy(struct i915_request *rq, > return 0; > } > > -static int scatter_list_length(struct scatterlist *sg) > +static u64 scatter_list_length(struct scatterlist *sg) > { > - int len = 0; > + u64 len = 0; > > while (sg && sg_dma_len(sg)) { > len += sg_dma_len(sg); > @@ -621,28 +621,26 @@ static int scatter_list_length(struct scatterlist *sg) > return len; > } > > -static void > +static int > calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem, > - int *src_sz, u32 bytes_to_cpy, u32 ccs_bytes_to_cpy) > + u64 bytes_to_cpy, u64 ccs_bytes_to_cpy) > { > - if (ccs_bytes_to_cpy) { > - if (!src_is_lmem) > - /* > - * When CHUNK_SZ is passed all the pages upto CHUNK_SZ > - * will be taken for the blt. in Flat-ccs supported > - * platform Smem obj will have more pages than required > - * for main meory hence limit it to the required size > - * for main memory > - */ > - *src_sz = min_t(int, bytes_to_cpy, CHUNK_SZ); > - } else { /* ccs handling is not required */ > - *src_sz = CHUNK_SZ; > - } > + if (ccs_bytes_to_cpy && !src_is_lmem) Yes this is needed for ccs copy of an obj of >8M from lmem to smem. Reviewed-by: Ramalingam C<ramalingam.c@xxxxxxxxx> > + /* > + * When CHUNK_SZ is passed all the pages upto CHUNK_SZ > + * will be taken for the blt. in Flat-ccs supported > + * platform Smem obj will have more pages than required > + * for main meory hence limit it to the required size > + * for main memory > + */ > + return min_t(u64, bytes_to_cpy, CHUNK_SZ); > + else > + return CHUNK_SZ; > } > > -static void get_ccs_sg_sgt(struct sgt_dma *it, u32 bytes_to_cpy) > +static void get_ccs_sg_sgt(struct sgt_dma *it, u64 bytes_to_cpy) > { > - u32 len; > + u64 len; > > do { > GEM_BUG_ON(!it->sg || !sg_dma_len(it->sg)); > @@ -673,12 +671,12 @@ intel_context_migrate_copy(struct intel_context *ce, > { > struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst), it_ccs; > struct drm_i915_private *i915 = ce->engine->i915; > - u32 ccs_bytes_to_cpy = 0, bytes_to_cpy; > + u64 ccs_bytes_to_cpy = 0, bytes_to_cpy; > enum i915_cache_level ccs_cache_level; > u32 src_offset, dst_offset; > u8 src_access, dst_access; > struct i915_request *rq; > - int src_sz, dst_sz; > + u64 src_sz, dst_sz; > bool ccs_is_src, overwrite_ccs; > int err; > > @@ -761,8 +759,8 @@ intel_context_migrate_copy(struct intel_context *ce, > if (err) > goto out_rq; > > - calculate_chunk_sz(i915, src_is_lmem, &src_sz, > - bytes_to_cpy, ccs_bytes_to_cpy); > + src_sz = calculate_chunk_sz(i915, src_is_lmem, > + bytes_to_cpy, ccs_bytes_to_cpy); > > len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, > src_offset, src_sz); > -- > 2.37.1 >