On Fri, Jun 03, 2016 at 05:55:21PM +0100, Chris Wilson wrote: > Our GPUs impose certain requirements upon buffers that depend upon how > exactly they are used. Typically this is expressed as that they require > a larger surface than would be naively computed by pitch * height. > Normally such requirements are hidden away in the userspace driver, but > when we accept pointers from strangers and later impose extra conditions > on them, the original client allocator has no idea about the > monstrosities in the GPU and we require the userspace driver to inform > the kernel how many padding pages are required beyond the client > allocation. > > v2: Long time, no see > v3: Try an anonymous union for uapi struct compatability > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Hm, where's the userspace for this? Commit message should elaborate imo a bit more on what's going on here ... -Daniel > --- > drivers/gpu/drm/i915/i915_drv.h | 6 ++- > drivers/gpu/drm/i915/i915_gem.c | 82 +++++++++++++++--------------- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++- > include/uapi/drm/i915_drm.h | 8 ++- > 4 files changed, 65 insertions(+), 47 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index a065325580d8..9520adba33f6 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2945,11 +2945,13 @@ void i915_gem_free_object(struct drm_gem_object *obj); > int __must_check > i915_gem_object_pin(struct drm_i915_gem_object *obj, > struct i915_address_space *vm, > + uint64_t size, > uint32_t alignment, > uint64_t flags); > int __must_check > i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, > const struct i915_ggtt_view *view, > + uint64_t size, > uint32_t alignment, > uint64_t flags); > > @@ -3209,8 +3211,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, > struct drm_i915_private *dev_priv = to_i915(obj->base.dev); > struct i915_ggtt *ggtt = &dev_priv->ggtt; > > - return i915_gem_object_pin(obj, &ggtt->base, > - alignment, flags | PIN_GLOBAL); > + return i915_gem_object_pin(obj, &ggtt->base, 0, alignment, > + flags | PIN_GLOBAL); > } > > void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 19b8d2ea7698..0f0101300b2b 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1438,7 +1438,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) > } > > /* Now pin it into the GTT if needed */ > - ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); > + ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); > if (ret) > goto unlock; > > @@ -2678,21 +2678,20 @@ static struct i915_vma * > i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > struct i915_address_space *vm, > const struct i915_ggtt_view *ggtt_view, > + uint64_t size, > unsigned alignment, > uint64_t flags) > { > struct drm_device *dev = obj->base.dev; > struct drm_i915_private *dev_priv = to_i915(dev); > - struct i915_ggtt *ggtt = &dev_priv->ggtt; > - u32 fence_alignment, unfenced_alignment; > - u32 search_flag, alloc_flag; > u64 start, end; > - u64 size, fence_size; > + u32 search_flag, alloc_flag; > struct i915_vma *vma; > int ret; > > if (i915_is_ggtt(vm)) { > - u32 view_size; > + u32 fence_size, fence_alignment, unfenced_alignment; > + u64 view_size; > > if (WARN_ON(!ggtt_view)) > return ERR_PTR(-EINVAL); > @@ -2710,48 +2709,39 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > view_size, > obj->tiling_mode, > false); > - size = flags & PIN_MAPPABLE ? fence_size : view_size; > + size = max(size, view_size); > + if (flags & PIN_MAPPABLE) > + size = max_t(u64, size, fence_size); > + > + if (alignment == 0) > + alignment = flags & PIN_MAPPABLE ? fence_alignment : > + unfenced_alignment; > + if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { > + DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", > + ggtt_view ? ggtt_view->type : 0, > + alignment); > + return ERR_PTR(-EINVAL); > + } > } else { > - fence_size = i915_gem_get_gtt_size(dev, > - obj->base.size, > - obj->tiling_mode); > - fence_alignment = i915_gem_get_gtt_alignment(dev, > - obj->base.size, > - obj->tiling_mode, > - true); > - unfenced_alignment = > - i915_gem_get_gtt_alignment(dev, > - obj->base.size, > - obj->tiling_mode, > - false); > - size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; > + size = max_t(u64, size, obj->base.size); > + alignment = 4096; > } > > start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; > end = vm->total; > if (flags & PIN_MAPPABLE) > - end = min_t(u64, end, ggtt->mappable_end); > + end = min_t(u64, end, dev_priv->ggtt.mappable_end); > if (flags & PIN_ZONE_4G) > end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); > > - if (alignment == 0) > - alignment = flags & PIN_MAPPABLE ? fence_alignment : > - unfenced_alignment; > - if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { > - DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", > - ggtt_view ? ggtt_view->type : 0, > - alignment); > - return ERR_PTR(-EINVAL); > - } > - > /* If binding the object/GGTT view requires more space than the entire > * aperture has, reject it early before evicting everything in a vain > * attempt to find space. > */ > if (size > end) { > - DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", > + DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", > ggtt_view ? ggtt_view->type : 0, > - size, > + size, obj->base.size, > flags & PIN_MAPPABLE ? "mappable" : "total", > end); > return ERR_PTR(-E2BIG); > @@ -3243,7 +3233,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, > * (e.g. libkms for the bootup splash), we have to ensure that we > * always use map_and_fenceable for all scanout buffers. > */ > - ret = i915_gem_object_ggtt_pin(obj, view, alignment, > + ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment, > view->type == I915_GGTT_VIEW_NORMAL ? > PIN_MAPPABLE : 0); > if (ret) > @@ -3393,12 +3383,17 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) > } > > static bool > -i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) > +i915_vma_misplaced(struct i915_vma *vma, > + uint64_t size, > + uint32_t alignment, > + uint64_t flags) > { > struct drm_i915_gem_object *obj = vma->obj; > > - if (alignment && > - vma->node.start & (alignment - 1)) > + if (vma->node.size < size) > + return true; > + > + if (alignment && vma->node.start & (alignment - 1)) > return true; > > if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) > @@ -3442,6 +3437,7 @@ static int > i915_gem_object_do_pin(struct drm_i915_gem_object *obj, > struct i915_address_space *vm, > const struct i915_ggtt_view *ggtt_view, > + uint64_t size, > uint32_t alignment, > uint64_t flags) > { > @@ -3469,7 +3465,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, > if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) > return -EBUSY; > > - if (i915_vma_misplaced(vma, alignment, flags)) { > + if (i915_vma_misplaced(vma, size, alignment, flags)) { > WARN(vma->pin_count, > "bo is already pinned in %s with incorrect alignment:" > " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," > @@ -3490,8 +3486,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, > > bound = vma ? vma->bound : 0; > if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { > - vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, > - flags); > + vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, > + size, alignment, flags); > if (IS_ERR(vma)) > return PTR_ERR(vma); > } else { > @@ -3513,17 +3509,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, > int > i915_gem_object_pin(struct drm_i915_gem_object *obj, > struct i915_address_space *vm, > + uint64_t size, > uint32_t alignment, > uint64_t flags) > { > return i915_gem_object_do_pin(obj, vm, > i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, > - alignment, flags); > + size, alignment, flags); > } > > int > i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, > const struct i915_ggtt_view *view, > + uint64_t size, > uint32_t alignment, > uint64_t flags) > { > @@ -3534,7 +3532,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, > BUG_ON(!view); > > return i915_gem_object_do_pin(obj, &ggtt->base, view, > - alignment, flags | PIN_GLOBAL); > + size, alignment, flags | PIN_GLOBAL); > } > > void > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 40937a09855d..c1e7ee212e7e 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -652,10 +652,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, > flags |= PIN_HIGH; > } > > - ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); > + ret = i915_gem_object_pin(obj, vma->vm, > + entry->pad_to_size, > + entry->alignment, > + flags); > if ((ret == -ENOSPC || ret == -E2BIG) && > only_mappable_for_reloc(entry->flags)) > ret = i915_gem_object_pin(obj, vma->vm, > + entry->pad_to_size, > entry->alignment, > flags & ~PIN_MAPPABLE); > if (ret) > @@ -718,6 +722,9 @@ eb_vma_misplaced(struct i915_vma *vma) > vma->node.start & (entry->alignment - 1)) > return true; > > + if (vma->node.size < entry->pad_to_size) > + return true; > + > if (entry->flags & EXEC_OBJECT_PINNED && > vma->node.start != entry->offset) > return true; > @@ -1058,6 +1065,13 @@ validate_exec_list(struct drm_device *dev, > if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) > return -EINVAL; > > + /* pad_to_size was once a reserved field, so sanitize it */ > + if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) { > + if (offset_in_page(exec[i].pad_to_size)) > + return -EINVAL; > + } else > + exec[i].pad_to_size = 0; > + > /* First check for malicious input causing overflow in > * the worst case where we need to allocate the entire > * relocation tree as a single array. > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index d6c668e58426..3b861746ba7a 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -701,10 +701,14 @@ struct drm_i915_gem_exec_object2 { > #define EXEC_OBJECT_WRITE (1<<2) > #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) > #define EXEC_OBJECT_PINNED (1<<4) > -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) > +#define EXEC_OBJECT_PAD_TO_SIZE (1<<5) > +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1) > __u64 flags; > > - __u64 rsvd1; > + union { > + __u64 rsvd1; > + __u64 pad_to_size; > + }; > __u64 rsvd2; > }; > > -- > 2.8.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx