On Fri, Jul 05, 2013 at 02:42:04PM +0100, Chris Wilson wrote: > Despite being a unified memory architecture (UMA) some bits of memory > are more equal than others. In particular we have the thorny issue of > stolen memory, memory stolen from the system by the BIOS and reserved > for igfx use. Stolen memory is required for some functions of the GPU > and display engine, but in general it goes wasted. Whilst we cannot > return it back to the system, we need to find some other method for > utilising it. As we do not support direct access to the physical address > in the stolen region, it behaves like a different class of memory, > closer in kin to local GPU memory. This strongly suggests that we need a > placement model like TTM if we are to fully utilize these discrete > chunks of differing memory. > > This new create ioctl therefore exists to allow the user to create these > second class buffer objects from stolen memory. At the moment direct > access by the CPU through mmaps and pread/pwrite are verboten on the > objects, and so the user must be aware of the limitations of the objects > created. Yet, those limitations rarely reduce the desired functionality > in many use cases and so the user should be able to easily fill the > stolen memory and so help to reduce overall memory pressure. > > The most obvious use case for stolen memory is for the creation of objects > for the display engine which already have very similar restrictions on > access. However, we want a reasonably general ioctl in order to cater > for diverse scenarios beyond the author's imagination. > > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> Looks like a sane idea. But I think we should call the new placement flags placaments, not domains, since imo they are a rather different concept than what we currently call domains. I also wonder whether we should have some for of initial domain (for people who want to do the clflushing up-front, or in case we ever get a clflushed page-cache), but the spare bits in flags should be good enough. Or maybe leave the domain ioctl struct member and move the placement defines to the high bits. And I want i-g-t tests, both basic functionality tests and checking whether all the verboten stuff is indeed forbidden. -Daniel > --- > drivers/gpu/drm/i915/i915_dma.c | 5 +- > drivers/gpu/drm/i915/i915_drv.h | 15 +++-- > drivers/gpu/drm/i915/i915_gem.c | 69 +++++++++++++++++++-- > drivers/gpu/drm/i915/i915_gem_tiling.c | 107 ++++++++++++++++++--------------- > include/uapi/drm/i915_drm.h | 55 +++++++++++++++++ > 5 files changed, 194 insertions(+), 57 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index 0e22142..0c7299d 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -1876,8 +1876,8 @@ struct drm_ioctl_desc i915_ioctls[] = { > DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_UNLOCKED), > DRM_IOCTL_DEF_DRV(I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_UNLOCKED), > DRM_IOCTL_DEF_DRV(I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_UNLOCKED), > - DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling, DRM_UNLOCKED), > - DRM_IOCTL_DEF_DRV(I915_GEM_GET_TILING, i915_gem_get_tiling, DRM_UNLOCKED), > + DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling_ioctl, DRM_UNLOCKED), > + DRM_IOCTL_DEF_DRV(I915_GEM_GET_TILING, i915_gem_get_tiling_ioctl, DRM_UNLOCKED), > DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_UNLOCKED), > DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, DRM_UNLOCKED), > DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED), > @@ -1889,6 +1889,7 @@ struct drm_ioctl_desc i915_ioctls[] = { > DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED), > DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED), > DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED), > + DRM_IOCTL_DEF_DRV(I915_GEM_CREATE2, i915_gem_create2_ioctl, DRM_AUTH|DRM_UNLOCKED), > }; > > int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index dc7475d..cb54a66 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1646,6 +1646,8 @@ int i915_gem_init_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv); > int i915_gem_create_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv); > +int i915_gem_create2_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_priv); > int i915_gem_pread_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv); > int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > @@ -1680,10 +1682,10 @@ int i915_gem_entervt_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv); > int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv); > -int i915_gem_set_tiling(struct drm_device *dev, void *data, > - struct drm_file *file_priv); > -int i915_gem_get_tiling(struct drm_device *dev, void *data, > - struct drm_file *file_priv); > +int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_priv); > +int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_priv); > int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv); > int i915_gem_wait_ioctl(struct drm_device *dev, void *data, > @@ -1797,6 +1799,8 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj); > int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj, > uint32_t read_domains, > uint32_t write_domain); > +int __must_check i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, > + int tiling_mode, int pitch); > int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); > int __must_check i915_gem_init(struct drm_device *dev); > int __must_check i915_gem_init_hw(struct drm_device *dev); > @@ -1832,6 +1836,9 @@ void i915_gem_detach_phys_object(struct drm_device *dev, > void i915_gem_free_all_phys_object(struct drm_device *dev); > void i915_gem_release(struct drm_device *dev, struct drm_file *file); > > +bool > +i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode); > + > uint32_t > i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode); > uint32_t > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 3ea54c8..ac24bfb 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -242,8 +242,7 @@ i915_gem_dumb_create(struct drm_file *file, > /* have to work out size/pitch and return them */ > args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64); > args->size = args->pitch * args->height; > - return i915_gem_create(file, dev, > - args->size, &args->handle); > + return i915_gem_create(file, dev, args->size, &args->handle); > } > > int i915_gem_dumb_destroy(struct drm_file *file, > @@ -261,9 +260,71 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, > struct drm_file *file) > { > struct drm_i915_gem_create *args = data; > + return i915_gem_create(file, dev, args->size, &args->handle); > +} > + > +int > +i915_gem_create2_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file) > +{ > + struct drm_i915_gem_create2 *args = data; > + struct drm_i915_gem_object *obj; > + unsigned cache_level; > + int ret; > + > + if (args->flags & ~(I915_CREATE_FLAG_INEXACT_DOMAIN)) > + return -EINVAL; > + > + if (!i915_tiling_ok(dev, > + args->stride, args->size, args->tiling_mode)) > + return -EINVAL; > + > + switch (args->caching) { > + case I915_CACHING_NONE: > + cache_level = I915_CACHE_NONE; > + break; > + case I915_CACHING_CACHED: > + cache_level = I915_CACHE_LLC; > + break; > + default: > + return -EINVAL; > + } > + > + if (args->size == 0 || args->size & 4095) > + return -EINVAL; > + > + obj = NULL; > + switch (args->domain) { > + case I915_CREATE_DOMAIN_SYSTEM: > + obj = i915_gem_alloc_object(dev, args->size); > + break; > + case I915_CREATE_DOMAIN_STOLEN: > + obj = i915_gem_object_create_stolen(dev, args->size); > + break; > + default: > + return -EINVAL; > + } > + if (obj == NULL) > + return -ENOMEM; > + > + mutex_lock(&dev->struct_mutex); > + ret = i915_gem_object_set_cache_level(obj, cache_level); > + if (ret) > + goto out; > > - return i915_gem_create(file, dev, > - args->size, &args->handle); > + ret = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride); > + if (ret) > + goto out; > + > + ret = drm_gem_handle_create(file, &obj->base, &args->handle); > + if (ret) > + goto out; > + > + trace_i915_gem_object_create(obj); > +out: > + drm_gem_object_unreference(&obj->base); > + mutex_unlock(&dev->struct_mutex); > + return ret; > } > > static inline int > diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c > index 92a8d27..cbd89f8 100644 > --- a/drivers/gpu/drm/i915/i915_gem_tiling.c > +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c > @@ -201,7 +201,7 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) > } > > /* Check pitch constriants for all chips & tiling formats */ > -static bool > +bool > i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) > { > int tile_width; > @@ -285,13 +285,67 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) > return true; > } > > +int > +i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, > + int tiling_mode, int stride) > +{ > + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; > + int ret; > + > + if (tiling_mode == obj->tiling_mode && stride == obj->stride) > + return 0; > + > + /* We need to rebind the object if its current allocation > + * no longer meets the alignment restrictions for its new > + * tiling mode. Otherwise we can just leave it alone, but > + * need to ensure that any fence register is updated before > + * the next fenced (either through the GTT or by the BLT unit > + * on older GPUs) access. > + * > + * After updating the tiling parameters, we then flag whether > + * we need to update an associated fence register. Note this > + * has to also include the unfenced register the GPU uses > + * whilst executing a fenced command for an untiled object. > + */ > + > + obj->map_and_fenceable = > + !i915_gem_obj_ggtt_bound(obj) || > + (i915_gem_obj_ggtt_offset(obj) + obj->base.size <= dev_priv->gtt.mappable_end && > + i915_gem_object_fence_ok(obj, tiling_mode)); > + > + /* Rebind if we need a change of alignment */ > + ret = 0; > + if (!obj->map_and_fenceable) { > + u32 unfenced_alignment = > + i915_gem_get_gtt_alignment(dev_priv->dev, > + obj->base.size, tiling_mode, > + false); > + if (i915_gem_obj_ggtt_offset(obj) & (unfenced_alignment - 1)) > + ret = i915_gem_object_unbind(obj); > + } > + > + if (ret == 0) { > + obj->fence_dirty = > + obj->fenced_gpu_access || > + obj->fence_reg != I915_FENCE_REG_NONE; > + > + obj->tiling_mode = tiling_mode; > + obj->stride = stride; > + > + /* Force the fence to be reacquired for GTT access */ > + i915_gem_release_mmap(obj); > + } > + > + return ret; > +} > + > /** > * Sets the tiling mode of an object, returning the required swizzling of > * bit 6 of addresses in the object. > */ > int > -i915_gem_set_tiling(struct drm_device *dev, void *data, > - struct drm_file *file) > +i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file) > { > struct drm_i915_gem_set_tiling *args = data; > drm_i915_private_t *dev_priv = dev->dev_private; > @@ -343,48 +397,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, > } > > mutex_lock(&dev->struct_mutex); > - if (args->tiling_mode != obj->tiling_mode || > - args->stride != obj->stride) { > - /* We need to rebind the object if its current allocation > - * no longer meets the alignment restrictions for its new > - * tiling mode. Otherwise we can just leave it alone, but > - * need to ensure that any fence register is updated before > - * the next fenced (either through the GTT or by the BLT unit > - * on older GPUs) access. > - * > - * After updating the tiling parameters, we then flag whether > - * we need to update an associated fence register. Note this > - * has to also include the unfenced register the GPU uses > - * whilst executing a fenced command for an untiled object. > - */ > - > - obj->map_and_fenceable = > - !i915_gem_obj_ggtt_bound(obj) || > - (i915_gem_obj_ggtt_offset(obj) + obj->base.size <= dev_priv->gtt.mappable_end && > - i915_gem_object_fence_ok(obj, args->tiling_mode)); > - > - /* Rebind if we need a change of alignment */ > - if (!obj->map_and_fenceable) { > - u32 unfenced_alignment = > - i915_gem_get_gtt_alignment(dev, obj->base.size, > - args->tiling_mode, > - false); > - if (i915_gem_obj_ggtt_offset(obj) & (unfenced_alignment - 1)) > - ret = i915_gem_object_unbind(obj); > - } > - > - if (ret == 0) { > - obj->fence_dirty = > - obj->fenced_gpu_access || > - obj->fence_reg != I915_FENCE_REG_NONE; > - > - obj->tiling_mode = args->tiling_mode; > - obj->stride = args->stride; > - > - /* Force the fence to be reacquired for GTT access */ > - i915_gem_release_mmap(obj); > - } > - } > + ret = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride); > /* we have to maintain this existing ABI... */ > args->stride = obj->stride; > args->tiling_mode = obj->tiling_mode; > @@ -410,8 +423,8 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, > * Returns the current tiling mode and required bit 6 swizzling for the object. > */ > int > -i915_gem_get_tiling(struct drm_device *dev, void *data, > - struct drm_file *file) > +i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file) > { > struct drm_i915_gem_get_tiling *args = data; > drm_i915_private_t *dev_priv = dev->dev_private; > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 923ed7f..b3b21fb 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea { > #define DRM_I915_GEM_SET_CACHING 0x2f > #define DRM_I915_GEM_GET_CACHING 0x30 > #define DRM_I915_REG_READ 0x31 > +#define DRM_I915_GEM_CREATE2 0x32 > > #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) > #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) > @@ -228,6 +229,7 @@ typedef struct _drm_i915_sarea { > #define DRM_IOCTL_I915_GEM_ENTERVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT) > #define DRM_IOCTL_I915_GEM_LEAVEVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT) > #define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create) > +#define DRM_IOCTL_I915_GEM_CREATE2 DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE2, struct drm_i915_gem_create2) > #define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread) > #define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) > #define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) > @@ -407,6 +409,59 @@ struct drm_i915_gem_create { > __u32 pad; > }; > > +struct drm_i915_gem_create2 { > + /** > + * Requested size for the object. > + * > + * The (page-aligned) allocated size for the object will be returned. > + */ > + __u64 size; > + > + /** > + * Requested placement or memory domain > + * > + * You can request that the object be created from special memory > + * rather than regular system pages. Such irregular objects may > + * have certain restrictions (such as CPU access to a stolen > + * object is verboten). > + */ > + __u32 domain; > +#define I915_CREATE_DOMAIN_SYSTEM 0 > +#define I915_CREATE_DOMAIN_STOLEN 1 /* Cannot use CPU mmaps or pread/pwrite */ > + /** > + * Requested cache level. > + * > + * See DRM_IOCTL_I915_GEM_SET_CACHING > + */ > + __u32 caching; > + > + /** > + * Requested tiling mode. > + * > + * See DRM_IOCTL_I915_GEM_SET_TILING > + */ > + __u32 tiling_mode; > + /** > + * Requested stride for tiling. > + * > + * See DRM_IOCTL_I915_GEM_SET_TILING > + */ > + __u32 stride; > + > + /** > + * Additional miscellaneous flags > + * > + */ > + __u32 flags; > + > + /** > + * Returned handle for the object. > + * > + * Object handles are nonzero. > + */ > + __u32 handle; > +}; > + > struct drm_i915_gem_pread { > /** Handle for the object being read. */ > __u32 handle; > -- > 1.8.3.2 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch