On Mon, Sep 15, 2014 at 01:33:44PM +0100, Chris Wilson wrote: > When constructing a batchbuffer, it is sometimes crucial to know the > largest hole into which we can fit a fenceable buffer (for example when > handling very large objects on gen2 and gen3). This depends on the > fragmentation of pinned buffers inside the aperture, a question only the > kernel can easily answer. > > This patch extends the current DRM_I915_GEM_GET_APERTURE ioctl to > include a couple of new fields in its reply to userspace - the total > amount of space available in the mappable region of the aperture and > also the single largest block available. > > This is not quite what userspace wants to answer the question of whether > this batch will fit as fences are also required to meet severe alignment > constraints within the batch. For this purpose, a third conservative > estimate of largest fence available is also provided. For when userspace > needs more than one batch, we also provide the culmulative space > available for fences such that it has some additional guidance to how > much space it could allocate to fences. Conservatism still wins. > > The patch also adds a debugfs file for convenient testing and reporting. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 28 +++++++++ > drivers/gpu/drm/i915/i915_gem.c | 111 ++++++++++++++++++++++++++++++++++-- > include/uapi/drm/i915_drm.h | 20 +++++++ > 3 files changed, 155 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 9e63ccbea52e..41d92f29aef1 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -534,6 +534,33 @@ static int obj_rank_by_ggtt(void *priv, struct list_head *A, struct list_head *B > return i915_gem_obj_ggtt_offset(a) - i915_gem_obj_ggtt_offset(b); > } > > +static int i915_gem_aperture_info(struct seq_file *m, void *data) > +{ > + struct drm_info_node *node = m->private; > + struct drm_device *dev = node->minor->dev; > + struct drm_i915_gem_get_aperture arg; > + int ret; > + > + ret = i915_gem_get_aperture_ioctl(dev, &arg, NULL); > + if (ret) > + return ret; > + > + seq_printf(m, "Total size of the GTT: %llu bytes\n", > + arg.aper_size); > + seq_printf(m, "Available space in the GTT: %llu bytes\n", > + arg.aper_available_size); > + seq_printf(m, "Available space in the mappable aperture: %u bytes\n", > + arg.map_available_size); > + seq_printf(m, "Single largest space in the mappable aperture: %u bytes\n", > + arg.map_largest_size); > + seq_printf(m, "Available space for fences: %u bytes\n", > + arg.fence_available_size); > + seq_printf(m, "Single largest fence available: %u bytes\n", > + arg.fence_largest_size); > + > + return 0; > +} > + > static int i915_gem_gtt_info(struct seq_file *m, void *data) > { > struct drm_info_node *node = m->private; > @@ -4198,6 +4225,7 @@ static int i915_debugfs_create(struct dentry *root, > static const struct drm_info_list i915_debugfs_list[] = { > {"i915_capabilities", i915_capabilities, 0}, > {"i915_gem_objects", i915_gem_object_info, 0}, > + {"i915_gem_aperture", i915_gem_aperture_info, 0}, > {"i915_gem_gtt", i915_gem_gtt_info, 0}, > {"i915_gem_pinned", i915_gem_gtt_info, 0, (void *) PINNED_LIST}, > {"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST}, > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 4b9de297b967..4b75086a1dc9 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -31,6 +31,7 @@ > #include "i915_drv.h" > #include "i915_trace.h" > #include "intel_drv.h" > +#include <linux/list_sort.h> > #include <linux/oom.h> > #include <linux/shmem_fs.h> > #include <linux/slab.h> > @@ -260,6 +261,49 @@ i915_gem_init_ioctl(struct drm_device *dev, void *data, > return 0; > } > > +static int obj_rank_by_ggtt(void *priv, > + struct list_head *A, > + struct list_head *B) > +{ > + struct drm_i915_gem_object *a = list_entry(A,typeof(*a), obj_exec_link); > + struct drm_i915_gem_object *b = list_entry(B,typeof(*b), obj_exec_link); > + > + return i915_gem_obj_ggtt_offset(a) - i915_gem_obj_ggtt_offset(b); > +} > + > +static u32 __fence_size(struct drm_i915_private *dev_priv, u32 start, u32 end) > +{ > + u32 size = end - start; > + u32 fence_size; > + > + if (INTEL_INFO(dev_priv)->gen < 4) { > + u32 fence_max; > + u32 fence_next; > + > + if (IS_GEN3(dev_priv)) { > + fence_max = I830_FENCE_MAX_SIZE_VAL << 20; > + fence_next = 1024*1024; > + } else { > + fence_max = I830_FENCE_MAX_SIZE_VAL << 19; > + fence_next = 512*1024; > + } > + > + fence_max = min(fence_max, size); > + fence_size = 0; > + while (fence_next <= fence_max) { > + u32 base = ALIGN(start, fence_next); > + if (base + fence_next > end) > + break; > + > + fence_size = fence_next; > + fence_next <<= 1; > + } > + } else > + fence_size = size; > + > + return fence_size; > +} > + > int > i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, > struct drm_file *file) > @@ -267,17 +311,76 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, > struct drm_i915_private *dev_priv = dev->dev_private; > struct drm_i915_gem_get_aperture *args = data; > struct drm_i915_gem_object *obj; > - size_t pinned; > + struct list_head map_list; > + const u32 map_limit = dev_priv->gtt.mappable_end; > + size_t pinned, map_space, map_largest, fence_space, fence_largest; > + u32 last, size; > + > + INIT_LIST_HEAD(&map_list); > > pinned = 0; > + map_space = map_largest = 0; > + fence_space = fence_largest = 0; > + > mutex_lock(&dev->struct_mutex); > - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) > - if (i915_gem_obj_is_pinned(obj)) > - pinned += i915_gem_obj_ggtt_size(obj); > + list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { > + struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); > + > + if (vma == NULL || !vma->pin_count) > + continue; > + > + pinned += vma->node.size; > + > + if (vma->node.start < map_limit) > + list_add(&obj->obj_exec_link, &map_list); > + } > + > + last = ~0; > + list_sort(NULL, &map_list, obj_rank_by_ggtt); > + while (!list_empty(&map_list)) { > + struct i915_vma *vma; > + > + obj = list_first_entry(&map_list, typeof(*obj), obj_exec_link); > + list_del_init(&obj->obj_exec_link); > + > + vma = i915_gem_obj_to_ggtt(obj); > + if (last == ~0) > + goto skip_first; > + > + size = vma->node.start - last; > + if (size > map_largest) > + map_largest = size; > + map_space += size; > + > + size = __fence_size(dev_priv, last, vma->node.start); > + if (size > fence_largest) > + fence_largest = size; > + fence_space += size; > + > +skip_first: > + last = vma->node.start + vma->node.size; > + } > + if (last == ~0U) > + last = 0; > + if (last < map_limit) { > + size = map_limit - last; > + if (size > map_largest) > + map_largest = size; > + map_space += size; > + > + size = __fence_size(dev_priv, last, map_limit); > + if (size > fence_largest) > + fence_largest = size; > + fence_space += size; > + } > mutex_unlock(&dev->struct_mutex); > > args->aper_size = dev_priv->gtt.base.total; > args->aper_available_size = args->aper_size - pinned; > + args->map_available_size = map_space; > + args->map_largest_size = map_largest; > + args->fence_available_size = fence_space; > + args->fence_largest_size = fence_largest; > > return 0; > } > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 422513ba9e59..35308691cf8d 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -941,6 +941,26 @@ struct drm_i915_gem_get_aperture { > * bytes > */ > __u64 aper_available_size; > + > + /** > + * Available space in the mappable region of the aperture, in bytes > + */ > + __u32 map_available_size; > + > + /** > + * Single largest available region inside the mappable region, in bytes. > + */ > + __u32 map_largest_size; > + > + /** > + * Culmulative space available for fences, in bytes > + */ > + __u32 fence_available_size; > + > + /** > + * Single largest fenceable region, in bytes. > + */ > + __u32 fence_largest_size; > }; So what will happen when old usermode program (with short old structure) calls the ioctl ? I believe the memory which happens to be located after the structure is corrupted, or am I missing some magic there ? I.e., the question is why this patch does not break the ABI. > > struct drm_i915_get_pipe_from_crtc_id { > -- > 2.1.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Attachment:
pgpqPIXL8t4Oa.pgp
Description: PGP signature
_______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx