Re: [PATCH v3] drm/i915/gtt: Avoid calling kcalloc in a loop when allocating temp bitmaps

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 9/1/2015 10:06 AM, Michał Winiarski wrote:
On each call to gen8_alloc_va_range_3lvl we're allocating temporary
bitmaps needed for error handling. Unfortunately, when we increase
address space size (48b ppgtt) we do additional (512 - 4) calls to
kcalloc, increasing latency between exec and actual start of execution
on the GPU. Let's just do a single kcalloc, we can also drop the size
from free_gen8_temp_bitmaps since it's no longer used.

v2: Use GFP_TEMPORARY to make the allocations reclaimable.
v3: Drop the 2D array, just allocate a single block.

Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>
Cc: Michel Thierry <michel.thierry@xxxxxxxxx>
Signed-off-by: Michał Winiarski <michal.winiarski@xxxxxxxxx>

Unless Chris thinks otherwise, I see Michał already addressed his comments.

Reviewed-by: Michel Thierry <michel.thierry@xxxxxxxxx>

---
  drivers/gpu/drm/i915/i915_gem_gtt.c | 45 +++++++++++++------------------------
  1 file changed, 16 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 4a76807..f810762 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1126,13 +1126,8 @@ unwind_out:
  }

  static void
-free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts,
-		       uint32_t pdpes)
+free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
  {
-	int i;
-
-	for (i = 0; i < pdpes; i++)
-		kfree(new_pts[i]);
  	kfree(new_pts);
  	kfree(new_pds);
  }
@@ -1142,29 +1137,20 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts,
   */
  static
  int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
-					 unsigned long ***new_pts,
+					 unsigned long **new_pts,
  					 uint32_t pdpes)
  {
-	int i;
  	unsigned long *pds;
-	unsigned long **pts;
+	unsigned long *pts;

-	pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_KERNEL);
+	pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
  	if (!pds)
  		return -ENOMEM;

-	pts = kcalloc(pdpes, sizeof(unsigned long *), GFP_KERNEL);
-	if (!pts) {
-		kfree(pds);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < pdpes; i++) {
-		pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES),
-				 sizeof(unsigned long), GFP_KERNEL);
-		if (!pts[i])
-			goto err_out;
-	}
+	pts = kcalloc(pdpes * BITS_TO_LONGS(I915_PDES),
+			sizeof(unsigned long), GFP_TEMPORARY);
+	if (!pts)
+		goto err_out;

  	*new_pds = pds;
  	*new_pts = pts;
@@ -1172,7 +1158,7 @@ int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
  	return 0;

  err_out:
-	free_gen8_temp_bitmaps(pds, pts, pdpes);
+	free_gen8_temp_bitmaps(pds, pts);
  	return -ENOMEM;
  }

@@ -1193,7 +1179,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
  {
  	struct i915_hw_ppgtt *ppgtt =
  		container_of(vm, struct i915_hw_ppgtt, base);
-	unsigned long *new_page_dirs, **new_page_tables;
+	unsigned long *new_page_dirs, *new_page_tables;
  	struct drm_device *dev = vm->dev;
  	struct i915_page_directory *pd;
  	const uint64_t orig_start = start;
@@ -1220,14 +1206,14 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
  	ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
  						new_page_dirs);
  	if (ret) {
-		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes);
+		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
  		return ret;
  	}

  	/* For every page directory referenced, allocate page tables */
  	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
  		ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
-						new_page_tables[pdpe]);
+						new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
  		if (ret)
  			goto err_out;
  	}
@@ -1278,20 +1264,21 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
  		gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
  	}

-	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes);
+	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
  	mark_tlbs_dirty(ppgtt);
  	return 0;

  err_out:
  	while (pdpe--) {
-		for_each_set_bit(temp, new_page_tables[pdpe], I915_PDES)
+		for_each_set_bit(temp, new_page_tables + pdpe *
+				BITS_TO_LONGS(I915_PDES), I915_PDES)
  			free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
  	}

  	for_each_set_bit(pdpe, new_page_dirs, pdpes)
  		free_pd(dev, pdp->page_directory[pdpe]);

-	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes);
+	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
  	mark_tlbs_dirty(ppgtt);
  	return ret;
  }

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux