On Mon, Jan 09, 2017 at 12:40:07PM +0000, Tvrtko Ursulin wrote: > > On 09/01/2017 12:28, Chris Wilson wrote: > >On Mon, Jan 09, 2017 at 12:23:37PM +0000, Tvrtko Ursulin wrote: > >> > >>Hi, > >> > >>On 20/12/2016 13:42, Tvrtko Ursulin wrote: > >>>From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > >>> > >>>Rather than freeing and re-allocating the pages when DMA mapping > >>>in large chunks fails, we can just rebuild the sg table with no > >>>coalescing. > >>> > >>>Also change back the page counter to unsigned int because that > >>>is what the sg API supports. > >>> > >>>Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > >>>Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > >>>--- > >>>Only compile tested! > >>>--- > >>>drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++++++++++++---------- > >>>1 file changed, 30 insertions(+), 10 deletions(-) > >>> > >>>diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > >>>index 5275f6248ce3..e73f9f5a5d23 100644 > >>>--- a/drivers/gpu/drm/i915/i915_gem.c > >>>+++ b/drivers/gpu/drm/i915/i915_gem.c > >>>@@ -2340,12 +2340,36 @@ static void i915_sg_trim(struct sg_table *orig_st) > >>> *orig_st = new_st; > >>>} > >>> > >>>+static void i915_sg_uncoalesce(struct sg_table *orig_st, unsigned long nents) > >>>+{ > >>>+ struct sg_table new_st; > >>>+ struct scatterlist *new_sg; > >>>+ struct sgt_iter sgt_iter; > >>>+ struct page *page; > >>>+ > >>>+ if (sg_alloc_table(&new_st, nents, GFP_KERNEL)) > >>>+ return; > >>>+ > >>>+ new_sg = new_st.sgl; > >>>+ for_each_sgt_page(page, sgt_iter, orig_st) { > >>>+ sg_set_page(new_sg, page, PAGE_SIZE, 0); > >>>+ /* called before being DMA mapped, no need to copy sg->dma_* */ > >>>+ new_sg = sg_next(new_sg); > >>>+ } > >>>+ > >>>+ GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ > >>>+ > >>>+ sg_free_table(orig_st); > >>>+ > >>>+ *orig_st = new_st; > >>>+} > >>>+ > >>>static struct sg_table * > >>>i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) > >>>{ > >>> struct drm_i915_private *dev_priv = to_i915(obj->base.dev); > >>>- const unsigned long page_count = obj->base.size / PAGE_SIZE; > >>>- unsigned long i; > >>>+ const unsigned int page_count = obj->base.size / PAGE_SIZE; > >>>+ unsigned int i; > >>> struct address_space *mapping; > >>> struct sg_table *st; > >>> struct scatterlist *sg; > >>>@@ -2371,7 +2395,6 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) > >>> if (st == NULL) > >>> return ERR_PTR(-ENOMEM); > >>> > >>>-rebuild_st: > >>> if (sg_alloc_table(st, page_count, GFP_KERNEL)) { > >>> kfree(st); > >>> return ERR_PTR(-ENOMEM); > >>>@@ -2429,6 +2452,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) > >>> /* Trim unused sg entries to avoid wasting memory. */ > >>> i915_sg_trim(st); > >>> > >>>+prepare_gtt: > >>> ret = i915_gem_gtt_prepare_pages(obj, st); > >>> if (ret) { > >>> /* DMA remapping failed? One possible cause is that > >>>@@ -2436,16 +2460,12 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) > >>> * for PAGE_SIZE chunks instead may be helpful. > >>> */ > >>> if (max_segment > PAGE_SIZE) { > >>>- for_each_sgt_page(page, sgt_iter, st) > >>>- put_page(page); > >>>- sg_free_table(st); > >>>- > >>>+ i915_sg_uncoalesce(st, page_count); > >>> max_segment = PAGE_SIZE; > >>>- goto rebuild_st; > >>>+ goto prepare_gtt; > >>> } else { > >>> dev_warn(&dev_priv->drm.pdev->dev, > >>>- "Failed to DMA remap %lu pages\n", > >>>- page_count); > >>>+ "Failed to DMA remap %u pages\n", page_count); > >>> goto err_pages; > >>> } > >>> } > >>> > >> > >>Are you still against this? As a reminder it saves a > >>put_page/allocate page-from-shmemfs cycle on dma mapping failures. > > > >I still regard it as incomplete. Why bother saving that trip and not the > >actual page allocation itself. > > Page allocation is exactly what it avoids re-doing!? It is not. The shmemfs pages are not being freed nor reallocated, just their use count being manipulated. > You meant sg_alloc_table maybe? We could move the trim to be last > and do the uncoalesce in place but it would be so much more complex > that I am not sure it is worth it for this edge case. Only the sg_table is being reallocated and doing actual kfree/kmalloc. > The patch above was fast and simple improvement to the dma remapping > retry you added. I didn't value it as being a significant improvement and thought you have a more complete solution in mind. For starters we can reduce the duplication here with diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ed3cd1a5f9bb..1bfec811f9b7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2199,20 +2199,12 @@ void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) invalidate_mapping_pages(mapping, 0, (loff_t)-1); } -static void -i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, - struct sg_table *pages) +static void free_sg_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) { struct sgt_iter sgt_iter; struct page *page; - __i915_gem_object_release_shmem(obj, pages, true); - - i915_gem_gtt_finish_pages(obj, pages); - - if (i915_gem_object_needs_bit17_swizzle(obj)) - i915_gem_object_save_bit_17_swizzle(obj, pages); - for_each_sgt_page(page, sgt_iter, pages) { if (obj->mm.dirty) set_page_dirty(page); @@ -2222,9 +2214,23 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, put_page(page); } + sg_free_table(pages); + obj->mm.dirty = false; +} - sg_free_table(pages); +static void +i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + __i915_gem_object_release_shmem(obj, pages, true); + + i915_gem_gtt_finish_pages(obj, pages); + + if (i915_gem_object_needs_bit17_swizzle(obj)) + i915_gem_object_save_bit_17_swizzle(obj, pages); + + free_sg_pages(obj, pages); kfree(pages); } @@ -2322,7 +2328,6 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) struct address_space *mapping; struct sg_table *st; struct scatterlist *sg; - struct sgt_iter sgt_iter; struct page *page; unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned int max_segment; @@ -2409,10 +2414,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) * for PAGE_SIZE chunks instead may be helpful. */ if (max_segment > PAGE_SIZE) { - for_each_sgt_page(page, sgt_iter, st) - put_page(page); - sg_free_table(st); - + free_sg_pages(obj, st); max_segment = PAGE_SIZE; goto rebuild_st; } else { @@ -2431,9 +2433,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) err_sg: sg_mark_end(sg); err_pages: - for_each_sgt_page(page, sgt_iter, st) - put_page(page); - sg_free_table(st); + free_sg_pages(obj, st); kfree(st); /* shmemfs first checks if there is enough memory to allocate the page -- Chris Wilson, Intel Open Source Technology Centre _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx