[PATCH 21/34] drm/i915: Enlarge vma->pin_count

Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> · Mon, 21 Jan 2019 22:21:04 +0000

Previously we only accommodated having a vma pinned by a small number of
users, with the maximum being pinned for use by the display engine. As
such, we used a small bitfield only large enough to allow the vma to
be pinned twice (for back/front buffers) in each scanout plane. Keeping
the maximum permissible pin_count small allows us to quickly catch a
potential leak. However, as we want to split a 4096B page into 64
different cachelines and pin each cacheline for use by a different
timeline, we will exceed the current maximum permissible vma->pin_count
and so time has come to enlarge it.

Whilst we are here, try to pull together the similar bits:

Address/layout specification:
 - bias, mappable, zone_4g: address limit specifiers
 - fixed: address override, limits still apply though
 - high: not strictly an address limit, but an address direction to search

Search controls:
 - nonblock, nonfault, noevict

v2: Rewrite the guideline comment on bit consumption.

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Reviewed-by: John Harrison <john.C.Harrison@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_gem_gtt.h | 26 ++++++++---------
 drivers/gpu/drm/i915/i915_vma.h     | 45 +++++++++++++++++++----------
 2 files changed, 42 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index bd679c8c56dd..03ade71b8d9a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -642,19 +642,19 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 
 /* Flags used by pin/bind&friends. */
 #define PIN_NONBLOCK		BIT_ULL(0)
-#define PIN_MAPPABLE		BIT_ULL(1)
-#define PIN_ZONE_4G		BIT_ULL(2)
-#define PIN_NONFAULT		BIT_ULL(3)
-#define PIN_NOEVICT		BIT_ULL(4)
-
-#define PIN_MBZ			BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */
-#define PIN_GLOBAL		BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */
-#define PIN_USER		BIT_ULL(7) /* I915_VMA_LOCAL_BIND */
-#define PIN_UPDATE		BIT_ULL(8)
-
-#define PIN_HIGH		BIT_ULL(9)
-#define PIN_OFFSET_BIAS		BIT_ULL(10)
-#define PIN_OFFSET_FIXED	BIT_ULL(11)
+#define PIN_NONFAULT		BIT_ULL(1)
+#define PIN_NOEVICT		BIT_ULL(2)
+#define PIN_MAPPABLE		BIT_ULL(3)
+#define PIN_ZONE_4G		BIT_ULL(4)
+#define PIN_HIGH		BIT_ULL(5)
+#define PIN_OFFSET_BIAS		BIT_ULL(6)
+#define PIN_OFFSET_FIXED	BIT_ULL(7)
+
+#define PIN_MBZ			BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */
+#define PIN_GLOBAL		BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */
+#define PIN_USER		BIT_ULL(10) /* I915_VMA_LOCAL_BIND */
+#define PIN_UPDATE		BIT_ULL(11)
+
 #define PIN_OFFSET_MASK		(-I915_GTT_PAGE_SIZE)
 
 #endif
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 7252abc73d3e..5793abe509a2 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -71,29 +71,42 @@ struct i915_vma {
 	unsigned int open_count;
 	unsigned long flags;
 	/**
-	 * How many users have pinned this object in GTT space. The following
-	 * users can each hold at most one reference: pwrite/pread, execbuffer
-	 * (objects are not allowed multiple times for the same batchbuffer),
-	 * and the framebuffer code. When switching/pageflipping, the
-	 * framebuffer code has at most two buffers pinned per crtc.
+	 * How many users have pinned this object in GTT space.
 	 *
-	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
-	 * bits with absolutely no headroom. So use 4 bits.
+	 * This is a tightly bound, fairly small number of users, so we
+	 * stuff inside the flags field so that we can both check for overflow
+	 * and detect a no-op i915_vma_pin() in a single check, while also
+	 * pinning the vma.
+	 *
+	 * The worst case display setup would have the same vma pinned for
+	 * use on each plane on each crtc, while also building the next atomic
+	 * state and holding a pin for the length of the cleanup queue. In the
+	 * future, the flip queue may be increased from 1.
+	 * Estimated worst case: 3 [qlen] * 4 [max crtcs] * 7 [max planes] = 84
+	 *
+	 * For GEM, the number of concurrent users for pwrite/pread is
+	 * unbounded. For execbuffer, it is currently one but will in future
+	 * be extended to allow multiple clients to pin vma concurrently.
+	 *
+	 * We also use suballocated pages, with each suballocation claiming
+	 * its own pin on the shared vma. At present, this is limited to
+	 * exclusive cachelines of a single page, so a maximum of 64 possible
+	 * users.
 	 */
-#define I915_VMA_PIN_MASK 0xf
-#define I915_VMA_PIN_OVERFLOW	BIT(5)
+#define I915_VMA_PIN_MASK 0xff
+#define I915_VMA_PIN_OVERFLOW	BIT(8)
 
 	/** Flags and address space this VMA is bound to */
-#define I915_VMA_GLOBAL_BIND	BIT(6)
-#define I915_VMA_LOCAL_BIND	BIT(7)
+#define I915_VMA_GLOBAL_BIND	BIT(9)
+#define I915_VMA_LOCAL_BIND	BIT(10)
 #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
 
-#define I915_VMA_GGTT		BIT(8)
-#define I915_VMA_CAN_FENCE	BIT(9)
-#define I915_VMA_CLOSED		BIT(10)
-#define I915_VMA_USERFAULT_BIT	11
+#define I915_VMA_GGTT		BIT(11)
+#define I915_VMA_CAN_FENCE	BIT(12)
+#define I915_VMA_CLOSED		BIT(13)
+#define I915_VMA_USERFAULT_BIT	14
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
-#define I915_VMA_GGTT_WRITE	BIT(12)
+#define I915_VMA_GGTT_WRITE	BIT(15)
 
 	unsigned int active_count;
 	struct rb_root active;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx