Re: [PATCH v8 21/22] drm/i915/vm_bind: Properly build persistent map sg table

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Dec 12, 2022 at 06:17:01PM +0000, Matthew Auld wrote:
On 29/11/2022 07:26, Niranjana Vishwanathapura wrote:
Properly build the sg table for persistent mapping which can
be partial map of the underlying object. Ensure the sg pages
are properly set for page backed regions. The dump capture
support requires this for page backed regions.

Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_vma.c | 120 +++++++++++++++++++++++++++++++-
 1 file changed, 119 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 1b9033865768..68a9ac77b4f2 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1298,6 +1298,120 @@ intel_partial_pages(const struct i915_gtt_view *view,
 	return ERR_PTR(ret);
 }
+static unsigned int
+intel_copy_dma_sg(struct sg_table *src_st, struct sg_table *dst_st,
+		  u64 offset, u64 length, bool dry_run)
+{
+	struct scatterlist *dst_sg, *src_sg;
+	unsigned int i, len, nents = 0;
+
+	dst_sg = dst_st->sgl;
+	for_each_sgtable_dma_sg(src_st, src_sg, i) {
+		if (sg_dma_len(src_sg) <= offset) {
+			offset -= sg_dma_len(src_sg);
+			continue;
+		}
+
+		nents++;
+		len = min(sg_dma_len(src_sg) - offset, length);
+		if (!dry_run) {
+			sg_dma_address(dst_sg) = sg_dma_address(src_sg) + offset;
+			sg_dma_len(dst_sg) = len;
+			dst_sg = sg_next(dst_sg);
+		}
+
+		length -= len;
+		offset = 0;
+		if (!length)
+			break;
+	}
+	WARN_ON_ONCE(length);
+
+	return nents;
+}
+
+static unsigned int
+intel_copy_sg(struct sg_table *src_st, struct sg_table *dst_st,
+	      u64 offset, u64 length, bool dry_run)
+{
+	struct scatterlist *dst_sg, *src_sg;
+	unsigned int i, len, nents = 0;
+
+	dst_sg = dst_st->sgl;
+	for_each_sgtable_sg(src_st, src_sg, i) {
+		if (src_sg->length <= offset) {
+			offset -= src_sg->length;
+			continue;
+		}
+
+		nents++;
+		len = min(src_sg->length - offset, length);
+		if (!dry_run) {
+			unsigned long pfn;
+
+			pfn = page_to_pfn(sg_page(src_sg)) + offset / PAGE_SIZE;
+			sg_set_page(dst_sg, pfn_to_page(pfn), len, 0);
+			dst_sg = sg_next(dst_sg);
+		}
+
+		length -= len;
+		offset = 0;
+		if (!length)
+			break;
+	}
+	WARN_ON_ONCE(length);
+
+	return nents;
+}
+
+static noinline struct sg_table *
+intel_persistent_partial_pages(const struct i915_gtt_view *view,
+			       struct drm_i915_gem_object *obj)
+{
+	u64 offset = view->partial.offset << PAGE_SHIFT;
+	struct sg_table *st, *obj_st = obj->mm.pages;
+	u64 length = view->partial.size << PAGE_SHIFT;
+	struct scatterlist *sg;
+	unsigned int nents;
+	int ret = -ENOMEM;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		goto err_st_alloc;
+
+	/* Get required sg_table size */
+	nents = intel_copy_dma_sg(obj_st, st, offset, length, true);
+	if (i915_gem_object_has_struct_page(obj)) {
+		unsigned int pg_nents;
+
+		pg_nents = intel_copy_sg(obj_st, st, offset, length, true);
+		if (nents < pg_nents)
+			nents = pg_nents;
+	}
+
+	ret = sg_alloc_table(st, nents, GFP_KERNEL);
+	if (ret)
+		goto err_sg_alloc;
+
+	/* Build sg_table for specified <offset, length> section */
+	intel_copy_dma_sg(obj_st, st, offset, length, false);
+	if (i915_gem_object_has_struct_page(obj))
+		intel_copy_sg(obj_st, st, offset, length, false);
+
+	/* Mark last sg */
+	sg = st->sgl;
+	while (sg_next(sg))
+		sg = sg_next(sg);
+	sg_mark_end(sg);

Do we need this bit? The nents is exactly orig_nents, and sg_alloc_table will already mark the end for you.


Ok, looks like we don't need it as sg_alloc_table() sets it.
While looking at sg_alloc_table(), looks like it is possible for it
to return with -ENOMEM with partial built table, but we are not
cleaning it anywhere. Something to consider later may be.
https://elixir.bootlin.com/linux/latest/source/lib/scatterlist.c#L330

Is it not possible to re-use remap_contiguous_pages() somehow? Also do we need the dry_run bit if we use sg_trim()? Maybe something like:

dst = sg_alloc_table(partial.size);

remap_contigious_pages_sg(dst, src);
i915_sg_trim(dst);

dst->nents = 0;
sg = remap_contigious_pages_dma_sg(dst, src);


But then those remap_contiguous_pages[_dma]_sg would look just like
out intel_copy[_dma]_sg().
And the problem I have with i915_sg_trim() is that it uses _sg iterator
only and not _dma_sg iterator. I think at least in theory, it is possible
to have more number of dma_sg elements than the sg (page) elements. Right?
That is why I am doing a dry run of both above and getting the max of both.

Niranjana

+
+	return st;
+
+err_sg_alloc:
+	kfree(st);
+err_st_alloc:
+	return ERR_PTR(ret);
+}
+
 static int
 __i915_vma_get_pages(struct i915_vma *vma)
 {
@@ -1330,7 +1444,11 @@ __i915_vma_get_pages(struct i915_vma *vma)
 		break;
 	case I915_GTT_VIEW_PARTIAL:
-		pages = intel_partial_pages(&vma->gtt_view, vma->obj);
+		if (i915_vma_is_persistent(vma))
+			pages = intel_persistent_partial_pages(&vma->gtt_view,
+							       vma->obj);
+		else
+			pages = intel_partial_pages(&vma->gtt_view, vma->obj);
 		break;
 	}



[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux