On Thu, Jul 19, 2018 at 07:59:33PM +0100, Chris Wilson wrote: > Quoting Ville Syrjala (2018-07-19 19:22:11) > > +static struct scatterlist * > > +remap_pages(const dma_addr_t *in, unsigned int offset, > > + unsigned int width, unsigned int height, > > + unsigned int stride, > > + struct sg_table *st, struct scatterlist *sg) > > +{ > > + unsigned int column, row; > > + > > + for (row = 0; row < height; row++) { > > + for (column = 0; column < width; column++) { > > + st->nents++; > > + /* We don't need the pages, but need to initialize > > + * the entries so the sg list can be happily traversed. > > + * The only thing we need are DMA addresses. > > + */ > > + sg_set_page(sg, NULL, PAGE_SIZE, 0); > > + sg_dma_address(sg) = in[offset + column]; > > + sg_dma_len(sg) = PAGE_SIZE; > > + sg = sg_next(sg); > > Ok. But should be I915_GTT_PAGE_SIZE? I suppose. And now I wonder what would happen on gen2 with its 2KiB gtt pages. Probably nothing good. > > And yes, following that argument looks like rotation should be converted. > > > + } > > + offset += stride; > > + } > > + > > + return sg; > > +} > > + > > +static noinline struct sg_table * > > +intel_remap_pages(struct intel_remapped_info *rem_info, > > + struct drm_i915_gem_object *obj) > > +{ > > + const unsigned long n_pages = obj->base.size / PAGE_SIZE; > > + unsigned int size = intel_remapped_info_size(rem_info); > > + struct sgt_iter sgt_iter; > > + dma_addr_t dma_addr; > > + unsigned long i; > > + dma_addr_t *page_addr_list; > > + struct sg_table *st; > > + struct scatterlist *sg; > > + int ret = -ENOMEM; > > + > > + /* Allocate a temporary list of source pages for random access. */ > > + page_addr_list = kvmalloc_array(n_pages, > > + sizeof(dma_addr_t), > > + GFP_KERNEL); > > + if (!page_addr_list) > > + return ERR_PTR(ret); > > + > > + /* Allocate target SG list. */ > > + st = kmalloc(sizeof(*st), GFP_KERNEL); > > + if (!st) > > + goto err_st_alloc; > > + > > + ret = sg_alloc_table(st, size, GFP_KERNEL); > > + if (ret) > > + goto err_sg_alloc; > > + > > + /* Populate source page list from the object. */ > > + i = 0; > > + for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages) > > + page_addr_list[i++] = dma_addr; > > You could use the i915_gem_object_get_dma_address() for this. We > definitely will reuse the index (as the one object will likely be > remapped into each CRTC). (Avoids having a temporary vmalloc here.) > > > + > > + GEM_BUG_ON(i != n_pages); > > + st->nents = 0; > > + sg = st->sgl; > > + > > + for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { > > + sg = remap_pages(page_addr_list, rem_info->plane[i].offset, > > + rem_info->plane[i].width, rem_info->plane[i].height, > > + rem_info->plane[i].stride, st, sg); > > + } > > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > > index 2a116a91420b..e15ac9f64c36 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > > @@ -160,6 +160,19 @@ typedef u64 gen8_ppgtt_pml4e_t; > > > > struct sg_table; > > > > +struct intel_remapped_info { > > + struct intel_remapped_plane_info { > > + /* tiles */ > > + unsigned int width, height, stride, offset; > > + } plane[2]; > > + unsigned int unused; > > Tag it as mbz, since we do use it inside the compare. Hmm, I wonder if > it actually is better if it doesn't exist if it isn't used, then it > should be zero.. Hmm, not sure if that's defined at all, might have to > say memset and don't rely on {} zeroing? > > Should work fine as a memcmp key for the rbtree. This whole thing is a bit questionale the way I did it. When populating the gtt_view I just poke at view->rotated and rely on matching layout for view->remapped. To make it less magic maybe I should embed one inside the other? > > > +} __packed; > > + > > +static inline void assert_intel_remapped_info_is_packed(void) > > +{ > > + BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 10*sizeof(unsigned int)); > > +} > > + > > struct intel_rotation_info { > > struct intel_rotation_plane_info { > > /* tiles */ > > @@ -186,6 +199,7 @@ enum i915_ggtt_view_type { > > I915_GGTT_VIEW_NORMAL = 0, > > I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info), > > I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info), > > + I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info), > > }; > > > > static inline void assert_i915_ggtt_view_type_is_unique(void) > > @@ -197,6 +211,7 @@ static inline void assert_i915_ggtt_view_type_is_unique(void) > > case I915_GGTT_VIEW_NORMAL: > > case I915_GGTT_VIEW_PARTIAL: > > case I915_GGTT_VIEW_ROTATED: > > + case I915_GGTT_VIEW_REMAPPED: > > /* gcc complains if these are identical cases */ > > break; > > } > > @@ -208,6 +223,7 @@ struct i915_ggtt_view { > > /* Members need to contain no holes/padding */ > > struct intel_partial_info partial; > > struct intel_rotation_info rotated; > > + struct intel_remapped_info remapped; > > }; > > }; > > > > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > > index 11d834f94220..eb54755ec25b 100644 > > --- a/drivers/gpu/drm/i915/i915_vma.c > > +++ b/drivers/gpu/drm/i915/i915_vma.c > > @@ -164,6 +164,9 @@ vma_create(struct drm_i915_gem_object *obj, > > } else if (view->type == I915_GGTT_VIEW_ROTATED) { > > vma->size = intel_rotation_info_size(&view->rotated); > > vma->size <<= PAGE_SHIFT; > > + } else if (view->type == I915_GGTT_VIEW_REMAPPED) { > > + vma->size = intel_remapped_info_size(&view->remapped); > > + vma->size <<= PAGE_SHIFT; > > } > > } > > > > @@ -462,7 +465,8 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) > > * Explicitly disable for rotated VMA since the display does not > > * need the fence and the VMA is not accessible to other users. > > */ > > - if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) > > + if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED || > > + vma->ggtt_view.type == I915_GGTT_VIEW_REMAPPED) > > return; > > Not fenceable? I suppose, you only want the fence tracking from GGTT > mmaps. Do we need to make sure FBC finally works on unfenced vma? I > guess we do. Might be a nice bonus. I keep putting off touching FBC in a significant way though because I know it won't stop with the first patch. > > > fenceable = (vma->node.size >= vma->fence_size && > > diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h > > index f06d66377107..19bb3115226c 100644 > > --- a/drivers/gpu/drm/i915/i915_vma.h > > +++ b/drivers/gpu/drm/i915/i915_vma.h > > @@ -257,8 +257,11 @@ i915_vma_compare(struct i915_vma *vma, > > */ > > BUILD_BUG_ON(I915_GGTT_VIEW_NORMAL >= I915_GGTT_VIEW_PARTIAL); > > BUILD_BUG_ON(I915_GGTT_VIEW_PARTIAL >= I915_GGTT_VIEW_ROTATED); > > + BUILD_BUG_ON(I915_GGTT_VIEW_ROTATED >= I915_GGTT_VIEW_REMAPPED); > > BUILD_BUG_ON(offsetof(typeof(*view), rotated) != > > - offsetof(typeof(*view), partial)); > > + offsetof(typeof(*view), partial) || > > + offsetof(typeof(*view), rotated) != > > + offsetof(typeof(*view), remapped)); > > Two separate BUILD_BUG_ON() I think, one pair of offsetof() will be > harder to interpret should if fail; two pairs never! ack -- Ville Syrjälä Intel _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx