On 25/09/2018 20:37, Ville Syrjala wrote:
From: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
To overcome display engine stride limits we'll want to remap the
pages in the GTT. To that end we need a new gtt_view type which
is just like the "rotated" type except not rotated.
v2: Use intel_remapped_plane_info base type
s/unused/unused_mbz/ (Chris)
Separate BUILD_BUG_ON()s (Chris)
Use I915_GTT_PAGE_SIZE (Chris)
Signed-off-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
---
drivers/gpu/drm/i915/i915_debugfs.c | 12 ++++
drivers/gpu/drm/i915/i915_gem_gtt.c | 91 +++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem_gtt.h | 25 +++++++--
drivers/gpu/drm/i915/i915_vma.c | 6 +-
drivers/gpu/drm/i915/i915_vma.h | 3 +
drivers/gpu/drm/i915/intel_display.c | 11 ++++
drivers/gpu/drm/i915/intel_drv.h | 1 +
drivers/gpu/drm/i915/selftests/i915_vma.c | 6 +-
8 files changed, 147 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index b4744a68cd88..edb9f6f3c0cf 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -196,6 +196,18 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
vma->ggtt_view.rotated.plane[1].offset);
break;
+ case I915_GGTT_VIEW_REMAPPED:
+ seq_printf(m, ", remapped [(%ux%u, stride=%u, offset=%u), (%ux%u, stride=%u, offset=%u)]",
+ vma->ggtt_view.remapped.plane[0].width,
+ vma->ggtt_view.remapped.plane[0].height,
+ vma->ggtt_view.remapped.plane[0].stride,
+ vma->ggtt_view.remapped.plane[0].offset,
+ vma->ggtt_view.remapped.plane[1].width,
+ vma->ggtt_view.remapped.plane[1].height,
+ vma->ggtt_view.remapped.plane[1].stride,
+ vma->ggtt_view.remapped.plane[1].offset);
+ break;
+
default:
MISSING_CASE(vma->ggtt_view.type);
break;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f6c7ab413081..e4d49c345c81 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3798,6 +3798,92 @@ intel_rotate_pages(struct intel_rotation_info *rot_info,
return ERR_PTR(ret);
}
+static struct scatterlist *
+remap_pages(const dma_addr_t *in, unsigned int offset,
+ unsigned int width, unsigned int height,
+ unsigned int stride,
+ struct sg_table *st, struct scatterlist *sg)
+{
+ unsigned int column, row;
+
+ for (row = 0; row < height; row++) {
+ for (column = 0; column < width; column++) {
+ st->nents++;
+ /* We don't need the pages, but need to initialize
+ * the entries so the sg list can be happily traversed.
+ * The only thing we need are DMA addresses.
+ */
+ sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
+ sg_dma_address(sg) = in[offset + column];
+ sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
+ sg = sg_next(sg);
For this type of remapping you could I think potentially get some
decreased memory use benefits by coalescing the consecutive addresses
into single sg entries. If I understood correctly what it's doing at
least.. Since framebuffers could have a good chance to have been
initially allocated in larger than page size chunks.
If you decide to do that, then...
+ }
+ offset += stride;
+ }
+
+ return sg;
+}
+
+static noinline struct sg_table *
+intel_remap_pages(struct intel_remapped_info *rem_info,
+ struct drm_i915_gem_object *obj)
+{
+ const unsigned long n_pages = obj->base.size / I915_GTT_PAGE_SIZE;
+ unsigned int size = intel_remapped_info_size(rem_info);
+ struct sgt_iter sgt_iter;
+ dma_addr_t dma_addr;
+ unsigned long i;
+ dma_addr_t *page_addr_list;
+ struct sg_table *st;
+ struct scatterlist *sg;
+ int ret = -ENOMEM;
+
+ /* Allocate a temporary list of source pages for random access. */
+ page_addr_list = kvmalloc_array(n_pages,
+ sizeof(dma_addr_t),
+ GFP_KERNEL);
+ if (!page_addr_list)
+ return ERR_PTR(ret);
+
+ /* Allocate target SG list. */
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ goto err_st_alloc;
+
+ ret = sg_alloc_table(st, size, GFP_KERNEL);
+ if (ret)
+ goto err_sg_alloc;
+
+ /* Populate source page list from the object. */
+ i = 0;
+ for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
+ page_addr_list[i++] = dma_addr;
+
+ GEM_BUG_ON(i != n_pages);
+ st->nents = 0;
+ sg = st->sgl;
+
+ for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
+ sg = remap_pages(page_addr_list, rem_info->plane[i].offset,
+ rem_info->plane[i].width, rem_info->plane[i].height,
+ rem_info->plane[i].stride, st, sg);
+ }
+
+ kvfree(page_addr_list);
+
... put i915_sg_trim here to lose the trailing end of unused sg entries.
One more thing, do you really need random access for this
transformation? Or you could walk the sg list as it is? Just if you hit
a too long chunk you need to copy a trimmed version over and know where
to continue for the next row. If doable it would be better than having
to kvmalloc_array.
Regards,
Tvrtko
+ return st;
+
+err_sg_alloc:
+ kfree(st);
+err_st_alloc:
+ kvfree(page_addr_list);
+
+ DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+ obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
+
+ return ERR_PTR(ret);
+}
+
static noinline struct sg_table *
intel_partial_pages(const struct i915_ggtt_view *view,
struct drm_i915_gem_object *obj)
@@ -3874,6 +3960,11 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
break;
+ case I915_GGTT_VIEW_REMAPPED:
+ vma->pages =
+ intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
+ break;
+
case I915_GGTT_VIEW_PARTIAL:
vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
break;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 7e2af5f4f39b..69a22f57e6ca 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -160,11 +160,18 @@ typedef u64 gen8_ppgtt_pml4e_t;
struct sg_table;
+struct intel_remapped_plane_info {
+ /* in gtt pages */
+ unsigned int width, height, stride, offset;
+} __packed;
+
+struct intel_remapped_info {
+ struct intel_remapped_plane_info plane[2];
+ unsigned int unused_mbz;
+} __packed;
+
struct intel_rotation_info {
- struct intel_rotation_plane_info {
- /* tiles */
- unsigned int width, height, stride, offset;
- } plane[2];
+ struct intel_remapped_plane_info plane[2];
} __packed;
struct intel_partial_info {
@@ -176,12 +183,20 @@ enum i915_ggtt_view_type {
I915_GGTT_VIEW_NORMAL = 0,
I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info),
I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info),
+ I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info),
};
static inline void assert_i915_gem_gtt_types(void)
{
BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 8*sizeof(unsigned int));
BUILD_BUG_ON(sizeof(struct intel_partial_info) != sizeof(u64) + sizeof(unsigned int));
+ BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 9*sizeof(unsigned int));
+
+ /* Check that rotation/remapped shares offsets for simplicity */
+ BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) !=
+ offsetof(struct intel_rotation_info, plane[0]));
+ BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) !=
+ offsetofend(struct intel_rotation_info, plane[1]));
/* As we encode the size of each branch inside the union into its type,
* we have to be careful that each branch has a unique size.
@@ -190,6 +205,7 @@ static inline void assert_i915_gem_gtt_types(void)
case I915_GGTT_VIEW_NORMAL:
case I915_GGTT_VIEW_PARTIAL:
case I915_GGTT_VIEW_ROTATED:
+ case I915_GGTT_VIEW_REMAPPED:
/* gcc complains if these are identical cases */
break;
}
@@ -201,6 +217,7 @@ struct i915_ggtt_view {
/* Members need to contain no holes/padding */
struct intel_partial_info partial;
struct intel_rotation_info rotated;
+ struct intel_remapped_info remapped;
};
};
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 31efc971a3a8..1793c702ab94 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -164,6 +164,9 @@ vma_create(struct drm_i915_gem_object *obj,
} else if (view->type == I915_GGTT_VIEW_ROTATED) {
vma->size = intel_rotation_info_size(&view->rotated);
vma->size <<= PAGE_SHIFT;
+ } else if (view->type == I915_GGTT_VIEW_REMAPPED) {
+ vma->size = intel_remapped_info_size(&view->remapped);
+ vma->size <<= PAGE_SHIFT;
}
}
@@ -464,7 +467,8 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
* Explicitly disable for rotated VMA since the display does not
* need the fence and the VMA is not accessible to other users.
*/
- if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
+ if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED ||
+ vma->ggtt_view.type == I915_GGTT_VIEW_REMAPPED)
return;
fenceable = (vma->node.size >= vma->fence_size &&
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 4f7c1c7599f4..64cf029c028a 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -265,8 +265,11 @@ i915_vma_compare(struct i915_vma *vma,
*/
BUILD_BUG_ON(I915_GGTT_VIEW_NORMAL >= I915_GGTT_VIEW_PARTIAL);
BUILD_BUG_ON(I915_GGTT_VIEW_PARTIAL >= I915_GGTT_VIEW_ROTATED);
+ BUILD_BUG_ON(I915_GGTT_VIEW_ROTATED >= I915_GGTT_VIEW_REMAPPED);
BUILD_BUG_ON(offsetof(typeof(*view), rotated) !=
offsetof(typeof(*view), partial));
+ BUILD_BUG_ON(offsetof(typeof(*view), rotated) !=
+ offsetof(typeof(*view), remapped));
return memcmp(&vma->ggtt_view.partial, &view->partial, view->type);
}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 1fe5cf3ed062..4f9f519ccd46 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2006,6 +2006,17 @@ unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info
return size;
}
+unsigned int intel_remapped_info_size(const struct intel_remapped_info *rem_info)
+{
+ unsigned int size = 0;
+ int i;
+
+ for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++)
+ size += rem_info->plane[i].width * rem_info->plane[i].height;
+
+ return size;
+}
+
static void
intel_fill_fb_ggtt_view(struct i915_ggtt_view *view,
const struct drm_framebuffer *fb,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 4fcba99d12c0..a88a16b74f24 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1503,6 +1503,7 @@ unsigned int intel_fb_xy_to_linear(int x, int y,
void intel_add_fb_offsets(int *x, int *y,
const struct intel_plane_state *state, int plane);
unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info);
+unsigned int intel_remapped_info_size(const struct intel_remapped_info *rem_info);
bool intel_has_pending_fb_unpin(struct drm_i915_private *dev_priv);
void intel_mark_busy(struct drm_i915_private *dev_priv);
void intel_mark_idle(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index ffa74290e054..4fc49c27f13c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -395,8 +395,8 @@ assert_rotated(struct drm_i915_gem_object *obj,
return sg;
}
-static unsigned int rotated_size(const struct intel_rotation_plane_info *a,
- const struct intel_rotation_plane_info *b)
+static unsigned int rotated_size(const struct intel_remapped_plane_info *a,
+ const struct intel_remapped_plane_info *b)
{
return a->width * a->height + b->width * b->height;
}
@@ -406,7 +406,7 @@ static int igt_vma_rotate(void *arg)
struct drm_i915_private *i915 = arg;
struct i915_address_space *vm = &i915->ggtt.vm;
struct drm_i915_gem_object *obj;
- const struct intel_rotation_plane_info planes[] = {
+ const struct intel_remapped_plane_info planes[] = {
{ .width = 1, .height = 1, .stride = 1 },
{ .width = 2, .height = 2, .stride = 2 },
{ .width = 4, .height = 4, .stride = 4 },
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx