Thanks,
Andi
Changelog
=========
v5 -> v6:
- restore the original alignment of guard so that it's aligned
coherently with the vma area's alignment.
v4 -> v5:
- remove again the GEM_BUG_ON()
- fix an oversight where the rounding was called without assigning the
value to the guard.
v1 -> v4:
- refer to the cover letter.
[*] https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_110720v5/fi-rkl-11600/igt@i915_module_load@xxxxxxxxx#dmesg-warnings300
drivers/gpu/drm/i915/gt/intel_ggtt.c | 14 +++++---
drivers/gpu/drm/i915/i915_gem_gtt.h | 3 +-
drivers/gpu/drm/i915/i915_vma.c | 43 ++++++++++++++++++++----
drivers/gpu/drm/i915/i915_vma.h | 5 +--
drivers/gpu/drm/i915/i915_vma_resource.c | 4 +--
drivers/gpu/drm/i915/i915_vma_resource.h | 7 +++-
drivers/gpu/drm/i915/i915_vma_types.h | 1 +
7 files changed, 60 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 7644738b9cdbe..784d4a8c43ba9 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -296,8 +296,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
*/
gte = (gen8_pte_t __iomem *)ggtt->gsm;
- gte += vma_res->start / I915_GTT_PAGE_SIZE;
- end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
+ gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
+ end = gte + vma_res->guard / I915_GTT_PAGE_SIZE;
+ while (gte < end)
+ gen8_set_pte(gte++, vm->scratch[0]->encode);
+ end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
gen8_set_pte(gte++, pte_encode | addr);
@@ -347,9 +350,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
dma_addr_t addr;
gte = (gen6_pte_t __iomem *)ggtt->gsm;
- gte += vma_res->start / I915_GTT_PAGE_SIZE;
- end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
+ gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
+ end = gte + vma_res->guard / I915_GTT_PAGE_SIZE;
+ while (gte < end)
+ iowrite32(vm->scratch[0]->encode, gte++);
+ end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
iowrite32(vm->pte_encode(addr, level, flags), gte++);
GEM_BUG_ON(gte > end);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 8c2f57eb5ddaa..2434197830523 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -44,7 +44,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
#define PIN_HIGH BIT_ULL(5)
#define PIN_OFFSET_BIAS BIT_ULL(6)
#define PIN_OFFSET_FIXED BIT_ULL(7)
-#define PIN_VALIDATE BIT_ULL(8) /* validate placement only, no need to call unpin() */
+#define PIN_OFFSET_GUARD BIT_ULL(8)
+#define PIN_VALIDATE BIT_ULL(9) /* validate placement only, no need to call unpin() */
#define PIN_GLOBAL BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */
#define PIN_USER BIT_ULL(11) /* I915_VMA_LOCAL_BIND */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index fefee5fef38d3..34f0e6c923c26 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -419,7 +419,7 @@ i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res,
obj->mm.rsgt, i915_gem_object_is_readonly(obj),
i915_gem_object_is_lmem(obj), obj->mm.region,
vma->ops, vma->private, __i915_vma_offset(vma),
- __i915_vma_size(vma), vma->size);
+ __i915_vma_size(vma), vma->size, vma->guard);
}
/**
@@ -677,6 +677,10 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
i915_vma_offset(vma) != (flags & PIN_OFFSET_MASK))
return true;
+ if (flags & PIN_OFFSET_GUARD &&
+ vma->guard < (flags & PIN_OFFSET_MASK))
+ return true;
+
return false;
}
@@ -749,15 +753,16 @@ static int
i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
u64 size, u64 alignment, u64 flags)
{
- unsigned long color;
+ unsigned long color, guard;
u64 start, end;
int ret;
GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
+ GEM_BUG_ON(hweight64(flags & (PIN_OFFSET_GUARD | PIN_OFFSET_FIXED | PIN_OFFSET_BIAS)) > 1);
size = max(size, vma->size);
- alignment = max(alignment, vma->display_alignment);
+ alignment = max_t(typeof(alignment), alignment, vma->display_alignment);
if (flags & PIN_MAPPABLE) {
size = max_t(typeof(size), size, vma->fence_size);
alignment = max_t(typeof(alignment),
@@ -768,6 +773,18 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
GEM_BUG_ON(!IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
GEM_BUG_ON(!is_power_of_2(alignment));
+ guard = vma->guard; /* retain guard across rebinds */
+ if (flags & PIN_OFFSET_GUARD) {
+ GEM_BUG_ON(overflows_type(flags & PIN_OFFSET_MASK, u32));
+ guard = max_t(u32, guard, flags & PIN_OFFSET_MASK);
+ }
+ /*
+ * As we align the node upon insertion, but the hardware gets
+ * node.start + guard, the easiest way to make that work is
+ * to make the guard a multiple of the alignment size.
+ */
+ guard = ALIGN(guard, alignment);
+
start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
@@ -780,11 +797,12 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
alignment = max(alignment, i915_vm_obj_min_alignment(vma->vm, vma->obj));
- /* If binding the object/GGTT view requires more space than the entire
+ /*
+ * If binding the object/GGTT view requires more space than the entire
* aperture has, reject it early before evicting everything in a vain
* attempt to find space.
*/
- if (size > end) {
+ if (size > end - 2 * guard) {
drm_dbg(&to_i915(vma->obj->base.dev)->drm,
"Attempting to bind an object larger than the aperture: request=%llu > %s aperture=%llu\n",
size, flags & PIN_MAPPABLE ? "mappable" : "total", end);
@@ -801,13 +819,23 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
if (!IS_ALIGNED(offset, alignment) ||
range_overflows(offset, size, end))
return -EINVAL;
+ /*
+ * The caller knows not of the guard added by others and
+ * requests for the offset of the start of its buffer
+ * to be fixed, which may not be the same as the position
+ * of the vma->node due to the guard pages.
+ */
+ if (offset < guard || offset + size > end - guard)
+ return -ENOSPC;
ret = i915_gem_gtt_reserve(vma->vm, ww, &vma->node,
- size, offset, color,
- flags);
+ size + 2 * guard,
+ offset - guard,
+ color, flags);
if (ret)
return ret;
} else {
+ size += 2 * guard;
/*
* We only support huge gtt pages through the 48b PPGTT,
* however we also don't want to force any alignment for
@@ -855,6 +883,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+ vma->guard = guard;
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 3fd4512b1f65f..ed5c9d682a1b2 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -128,7 +128,7 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma)
/* Internal use only. */
static inline u64 __i915_vma_size(const struct i915_vma *vma)
{
- return vma->node.size;
+ return vma->node.size - 2 * vma->guard;
}
/**
@@ -150,7 +150,8 @@ static inline u64 i915_vma_size(const struct i915_vma *vma)
/* Internal use only. */
static inline u64 __i915_vma_offset(const struct i915_vma *vma)
{
- return vma->node.start;
+ /* The actual start of the vma->pages is after the guard pages. */
+ return vma->node.start + vma->guard;
}
/**
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c
index de1342dbfa128..6ba7a7feceba1 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.c
+++ b/drivers/gpu/drm/i915/i915_vma_resource.c
@@ -34,8 +34,8 @@ static struct kmem_cache *slab_vma_resources;
* and removal of fences increases as O(ln(pending_unbinds)) instead of
* O(1) for a single fence without interval tree.
*/
-#define VMA_RES_START(_node) ((_node)->start)
-#define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size - 1)
+#define VMA_RES_START(_node) ((_node)->start - (_node)->guard)
+#define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size + (_node)->guard - 1)
INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb,
u64, __subtree_last,
VMA_RES_START, VMA_RES_LAST, static, vma_res_itree);
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h
index 54edf3739ca0b..c1864e3d0b43e 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.h
+++ b/drivers/gpu/drm/i915/i915_vma_resource.h
@@ -57,6 +57,7 @@ struct i915_page_sizes {
* @node_size: Size of the allocated range manager node with padding
* subtracted.
* @vma_size: Bind size.
+ * @guard: The size of guard area preceding and trailing the bind.
* @page_sizes_gtt: Resulting page sizes from the bind operation.
* @bound_flags: Flags indicating binding status.
* @allocated: Backend private data. TODO: Should move into @private.
@@ -115,6 +116,7 @@ struct i915_vma_resource {
u64 start;
u64 node_size;
u64 vma_size;
+ u32 guard;
u32 page_sizes_gtt;
u32 bound_flags;
@@ -179,6 +181,7 @@ static inline void i915_vma_resource_put(struct i915_vma_resource *vma_res)
* @start: Offset into the address space of bind range start after padding.
* @node_size: Size of the allocated range manager node minus padding.
* @size: Bind size.
+ * @guard: The size of the guard area preceding and trailing the bind.
*
* Initializes a vma resource allocated using i915_vma_resource_alloc().
* The reason for having separate allocate and initialize function is that
@@ -197,7 +200,8 @@ static inline void i915_vma_resource_init(struct i915_vma_resource *vma_res,
void *private,
u64 start,
u64 node_size,
- u64 size)
+ u64 size,
+ u32 guard)
{
__i915_vma_resource_init(vma_res);
vma_res->vm = vm;
@@ -215,6 +219,7 @@ static inline void i915_vma_resource_init(struct i915_vma_resource *vma_res,
vma_res->start = start;
vma_res->node_size = node_size;
vma_res->vma_size = size;
+ vma_res->guard = guard;
}
static inline void i915_vma_resource_fini(struct i915_vma_resource *vma_res)
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index 0375812792b9c..77fda2244d161 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -202,6 +202,7 @@ struct i915_vma {
/* mmap-offset associated with fencing for this vma */
struct i915_mmap_offset *mmo;
+ u32 guard; /* padding allocated around vma->pages within the node */
u32 fence_size;
u32 fence_alignment;
u32 display_alignment;