Re: [PATCH v4 09/24] drm/i915: Track GEN6 page table usage

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2/20/2015 4:41 PM, Mika Kuoppala wrote:
Michel Thierry <michel.thierry@xxxxxxxxx> writes:

From: Ben Widawsky <benjamin.widawsky@xxxxxxxxx>

Instead of implementing the full tracking + dynamic allocation, this
patch does a bit less than half of the work, by tracking and warning on
unexpected conditions. The tracking itself follows which PTEs within a
page table are currently being used for objects. The next patch will
modify this to actually allocate the page tables only when necessary.

With the current patch there isn't much in the way of making a gen
agnostic range allocation function. However, in the next patch we'll add
more specificity which makes having separate functions a bit easier to
manage.

One important change introduced here is that DMA mappings are
created/destroyed at the same page directories/tables are
allocated/deallocated.

Notice that aliasing PPGTT is not managed here. The patch which actually
begins dynamic allocation/teardown explains the reasoning for this.

v2: s/pdp.page_directory/pdp.page_directorys
Make a scratch page allocation helper

v3: Rebase and expand commit message.

v4: Allocate required pagetables only when it is needed, _bind_to_vm
instead of bind_vma (Daniel).

v5: Rebased to remove the unnecessary noise in the diff, also:
  - PDE mask is GEN agnostic, renamed GEN6_PDE_MASK to I915_PDE_MASK.
  - Removed unnecessary checks in gen6_alloc_va_range.
  - Changed map/unmap_px_single macros to use dma functions directly and
    be part of a static inline function instead.
  - Moved drm_device plumbing through page tables operation to its own
    patch.
  - Moved allocate/teardown_va_range calls until they are fully
    implemented (in subsequent patch).
  - Merged pt and scratch_pt unmap_and_free path.
  - Moved scratch page allocator helper to the patch that will use it.

v6: Reduce complexity by not tearing down pagetables dynamically, the
same can be achieved while freeing empty vms. (Daniel)

Cc: Daniel Vetter <daniel@xxxxxxxx>
Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx>
Signed-off-by: Michel Thierry <michel.thierry@xxxxxxxxx> (v3+)
---
  drivers/gpu/drm/i915/i915_gem_gtt.c | 191 +++++++++++++++++++++++++-----------
  drivers/gpu/drm/i915/i915_gem_gtt.h |  75 ++++++++++++++
  2 files changed, 206 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e2bcd10..760585e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -274,29 +274,88 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
  	return pte;
  }
-static void unmap_and_free_pt(struct i915_page_table_entry *pt, struct drm_device *dev)
+#define i915_dma_unmap_single(px, dev) \
+	__i915_dma_unmap_single((px)->daddr, dev)
+
+static inline void __i915_dma_unmap_single(dma_addr_t daddr,
+					struct drm_device *dev)
+{
+	struct device *device = &dev->pdev->dev;
+
+	dma_unmap_page(device, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
+}
+
+/**
+ * i915_dma_map_px_single() - Create a dma mapping for a page table/dir/etc.
+ * @px:		Page table/dir/etc to get a DMA map for
+ * @dev:	drm device
+ *
+ * Page table allocations are unified across all gens. They always require a
+ * single 4k allocation, as well as a DMA mapping. If we keep the structs
+ * symmetric here, the simple macro covers us for every page table type.
+ *
+ * Return: 0 if success.
+ */
+#define i915_dma_map_px_single(px, dev) \
+	i915_dma_map_page_single((px)->page, (dev), &(px)->daddr)
+
If this is symmetrical to i915_dma_unmap_single() is the _px_ needed?

+static inline int i915_dma_map_page_single(struct page *page,
+					   struct drm_device *dev,
+					   dma_addr_t *daddr)
+{
+	struct device *device = &dev->pdev->dev;
+
+	*daddr = dma_map_page(device, page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
+	return dma_mapping_error(device, *daddr);
+}
+
+static void unmap_and_free_pt(struct i915_page_table_entry *pt,
+			       struct drm_device *dev)
  {
  	if (WARN_ON(!pt->page))
  		return;
+
+	i915_dma_unmap_single(pt, dev);
  	__free_page(pt->page);
+	kfree(pt->used_ptes);
  	kfree(pt);
  }
static struct i915_page_table_entry *alloc_pt_single(struct drm_device *dev)
  {
  	struct i915_page_table_entry *pt;
+	const size_t count = INTEL_INFO(dev)->gen >= 8 ?
+		GEN8_PTES_PER_PAGE : I915_PPGTT_PT_ENTRIES;
+	int ret = -ENOMEM;
pt = kzalloc(sizeof(*pt), GFP_KERNEL);
  	if (!pt)
  		return ERR_PTR(-ENOMEM);
+ pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
+				GFP_KERNEL);
+
+	if (!pt->used_ptes)
+		goto fail_bitmap;
+
  	pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (!pt->page) {
-		kfree(pt);
-		return ERR_PTR(-ENOMEM);
-	}
+	if (!pt->page)
+		goto fail_page;
+
+	ret = i915_dma_map_px_single(pt, dev);
+	if (ret)
+		goto fail_dma;
return pt;
+
+fail_dma:
+	__free_page(pt->page);
+fail_page:
+	kfree(pt->used_ptes);
+fail_bitmap:
+	kfree(pt);
+
+	return ERR_PTR(ret);
  }
/**
@@ -836,26 +895,36 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
  	}
  }
-static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
+/* Write pde (index) from the page directory @pd to the page table @pt */
+static void gen6_write_pdes(struct i915_page_directory_entry *pd,
For me it seems that you will write only one pde entry so s/pdes/pde ?

+			    const int pde, struct i915_page_table_entry *pt)
  {
-	struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
-	gen6_gtt_pte_t __iomem *pd_addr;
-	uint32_t pd_entry;
-	int i;
+	struct i915_hw_ppgtt *ppgtt =
+		container_of(pd, struct i915_hw_ppgtt, pd);
+	u32 pd_entry;
- WARN_ON(ppgtt->pd.pd_offset & 0x3f);
-	pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
-		ppgtt->pd.pd_offset / sizeof(gen6_gtt_pte_t);
-	for (i = 0; i < ppgtt->num_pd_entries; i++) {
-		dma_addr_t pt_addr;
+	pd_entry = GEN6_PDE_ADDR_ENCODE(pt->daddr);
+	pd_entry |= GEN6_PDE_VALID;
- pt_addr = ppgtt->pd.page_tables[i]->daddr;
-		pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
-		pd_entry |= GEN6_PDE_VALID;
+	writel(pd_entry, ppgtt->pd_addr + pde);
- writel(pd_entry, pd_addr + i);
-	}
-	readl(pd_addr);
+	/* XXX: Caller needs to make sure the write completes if necessary */
+}
Move this comment on top of the function and lift the XXX:

+
+/* Write all the page tables found in the ppgtt structure to incrementing page
+ * directories. */
+static void gen6_write_page_range(struct drm_i915_private *dev_priv,
+				struct i915_page_directory_entry *pd, uint32_t start, uint32_t length)
+{
+	struct i915_page_table_entry *pt;
+	uint32_t pde, temp;
+
+	gen6_for_each_pde(pt, pd, start, length, temp, pde)
+		gen6_write_pdes(pd, pde, pt);
+
+	/* Make sure write is complete before other code can use this page
+	 * table. Also require for WC mapped PTEs */
+	readl(dev_priv->gtt.gsm);
  }
static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
@@ -1071,6 +1140,28 @@ static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
  			       4096, PCI_DMA_BIDIRECTIONAL);
  }
+static int gen6_alloc_va_range(struct i915_address_space *vm,
+			       uint64_t start, uint64_t length)
+{
+	struct i915_hw_ppgtt *ppgtt =
+				container_of(vm, struct i915_hw_ppgtt, base);
+	struct i915_page_table_entry *pt;
+	uint32_t pde, temp;
+
+	gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
+		DECLARE_BITMAP(tmp_bitmap, I915_PPGTT_PT_ENTRIES);
+
+		bitmap_zero(tmp_bitmap, I915_PPGTT_PT_ENTRIES);
+		bitmap_set(tmp_bitmap, gen6_pte_index(start),
+			   gen6_pte_count(start, length));
+
+		bitmap_or(pt->used_ptes, pt->used_ptes, tmp_bitmap,
+				I915_PPGTT_PT_ENTRIES);
+	}
+
+	return 0;
+}
+
  static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
  {
  	int i;
@@ -1117,20 +1208,24 @@ alloc:
  					       0, dev_priv->gtt.base.total,
  					       0);
  		if (ret)
-			return ret;
+			goto err_out;
retried = true;
  		goto alloc;
  	}
if (ret)
-		return ret;
+		goto err_out;
+
if (ppgtt->node.start < dev_priv->gtt.mappable_end)
  		DRM_DEBUG("Forced to use aperture for PDEs\n");
ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
  	return 0;
+
+err_out:
+	return ret;
  }
static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
@@ -1152,30 +1247,6 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
  	return 0;
  }
-static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt)
-{
-	struct drm_device *dev = ppgtt->base.dev;
-	int i;
-
-	for (i = 0; i < ppgtt->num_pd_entries; i++) {
-		struct page *page;
-		dma_addr_t pt_addr;
-
-		page = ppgtt->pd.page_tables[i]->page;
-		pt_addr = pci_map_page(dev->pdev, page, 0, 4096,
-				       PCI_DMA_BIDIRECTIONAL);
-
-		if (pci_dma_mapping_error(dev->pdev, pt_addr)) {
-			gen6_ppgtt_unmap_pages(ppgtt);
-			return -EIO;
-		}
-
-		ppgtt->pd.page_tables[i]->daddr = pt_addr;
-	}
-
-	return 0;
-}
-
  static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
  {
  	struct drm_device *dev = ppgtt->base.dev;
@@ -1196,12 +1267,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
  	if (ret)
  		return ret;
- ret = gen6_ppgtt_setup_page_tables(ppgtt);
-	if (ret) {
-		gen6_ppgtt_free(ppgtt);
-		return ret;
-	}
-
+	ppgtt->base.allocate_va_range = gen6_alloc_va_range;
  	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
  	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
  	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
@@ -1212,13 +1278,17 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
  	ppgtt->pd.pd_offset =
  		ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t);
+ ppgtt->pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
+		ppgtt->pd.pd_offset / sizeof(gen6_gtt_pte_t);
+
  	ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
+ gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
+
  	DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
  			 ppgtt->node.size >> 20,
  			 ppgtt->node.start / PAGE_SIZE);
- gen6_write_pdes(ppgtt);
  	DRM_DEBUG("Adding PPGTT at offset %x\n",
  		  ppgtt->pd.pd_offset << 10);
@@ -1491,13 +1561,14 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
  		/* TODO: Perhaps it shouldn't be gen6 specific */
-		if (i915_is_ggtt(vm)) {
-			if (dev_priv->mm.aliasing_ppgtt)
-				gen6_write_pdes(dev_priv->mm.aliasing_ppgtt);
-			continue;
-		}
- gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base));
+		struct i915_hw_ppgtt *ppgtt =
+			container_of(vm, struct i915_hw_ppgtt, base);
+
+		if (i915_is_ggtt(vm))
+			ppgtt = dev_priv->mm.aliasing_ppgtt;
If we have ggtt but aliasing is not enabled we get NULL here and oops
over in next function?

-Mika

I applied the changes in v7.

Thanks for the review.

-Michel

Attachment: smime.p7s
Description: S/MIME Cryptographic Signature

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux