On Tue, Sep 11, 2018 at 04:14:39PM +0100, Chris Wilson wrote: > Whilst reviewing another new user of stolen memory, Ville made the > observation that we should try to ensure that all permanent allocations > within stolen memory are clustered together at either end of the stolen > region, in order to reduce fragmentation. In the depths of > i915_gem_stolen.c it is not always clear what manner of allocation we > need, so expose the drm_mm search parameter and push the decision to our > callers. > > Suggested-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 15 +++++++++------ > drivers/gpu/drm/i915/i915_gem_stolen.c | 19 ++++++++++++------- > drivers/gpu/drm/i915/intel_engine_cs.c | 3 ++- > drivers/gpu/drm/i915/intel_fbc.c | 13 ++++++++----- > drivers/gpu/drm/i915/intel_fbdev.c | 3 ++- > drivers/gpu/drm/i915/intel_overlay.c | 3 ++- > drivers/gpu/drm/i915/intel_pm.c | 3 ++- > drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- > 8 files changed, 38 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 7ea442033a57..e68102141067 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -3302,19 +3302,22 @@ static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv) > > /* i915_gem_stolen.c */ > int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, > - struct drm_mm_node *node, u64 size, > - unsigned alignment); > + struct drm_mm_node *node, > + u64 size, unsigned int alignment, > + unsigned int search); > int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, > - struct drm_mm_node *node, u64 size, > - unsigned alignment, u64 start, > - u64 end); > + struct drm_mm_node *node, > + u64 size, unsigned int alignment, > + u64 start, u64 end, > + unsigned int search); > void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, > struct drm_mm_node *node); > int i915_gem_init_stolen(struct drm_i915_private *dev_priv); > void i915_gem_cleanup_stolen(struct drm_device *dev); > struct drm_i915_gem_object * > i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, > - resource_size_t size); > + resource_size_t size, > + unsigned int search); > struct drm_i915_gem_object * > i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, > resource_size_t stolen_offset, > diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c > index 53440bf87650..ed440e280dd0 100644 > --- a/drivers/gpu/drm/i915/i915_gem_stolen.c > +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c > @@ -43,8 +43,10 @@ > */ > > int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, > - struct drm_mm_node *node, u64 size, > - unsigned alignment, u64 start, u64 end) > + struct drm_mm_node *node, > + u64 size, unsigned int alignment, > + u64 start, u64 end, > + unsigned int search) > { > int ret; > > @@ -58,7 +60,7 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, > mutex_lock(&dev_priv->mm.stolen_lock); > ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, > size, alignment, 0, > - start, end, DRM_MM_INSERT_BEST); > + start, end, search); > mutex_unlock(&dev_priv->mm.stolen_lock); > > return ret; > @@ -66,10 +68,12 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, > > int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, > struct drm_mm_node *node, u64 size, > - unsigned alignment) > + unsigned int alignment, > + unsigned int search) > { > return i915_gem_stolen_insert_node_in_range(dev_priv, node, size, > - alignment, 0, U64_MAX); > + alignment, 0, U64_MAX, > + search); > } > > void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, > @@ -591,7 +595,8 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, > > struct drm_i915_gem_object * > i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, > - resource_size_t size) > + resource_size_t size, > + unsigned int search) > { > struct drm_i915_gem_object *obj; > struct drm_mm_node *stolen; > @@ -607,7 +612,7 @@ i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, > if (!stolen) > return NULL; > > - ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096); > + ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096, search); > if (ret) { > kfree(stolen); > return NULL; > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > index 10cd051ba29e..c945a9fb54ae 100644 > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > @@ -499,7 +499,8 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, > > WARN_ON(engine->scratch); > > - obj = i915_gem_object_create_stolen(engine->i915, size); > + obj = i915_gem_object_create_stolen(engine->i915, > + size, DRM_MM_INSERT_LOW); > if (!obj) > obj = i915_gem_object_create_internal(engine->i915, size); > if (IS_ERR(obj)) { > diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c > index 01d1d2088f04..bf62d3d08e82 100644 > --- a/drivers/gpu/drm/i915/intel_fbc.c > +++ b/drivers/gpu/drm/i915/intel_fbc.c > @@ -457,8 +457,9 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, > */ > > /* Try to over-allocate to reduce reallocations and fragmentation. */ > - ret = i915_gem_stolen_insert_node_in_range(dev_priv, node, size <<= 1, > - 4096, 0, end); > + ret = i915_gem_stolen_insert_node_in_range(dev_priv, node, > + size <<= 1, 4096, > + 0, end, DRM_MM_INSERT_BEST); > if (ret == 0) > return compression_threshold; > > @@ -468,8 +469,9 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, > (fb_cpp == 2 && compression_threshold == 2)) > return 0; > > - ret = i915_gem_stolen_insert_node_in_range(dev_priv, node, size >>= 1, > - 4096, 0, end); > + ret = i915_gem_stolen_insert_node_in_range(dev_priv, node, > + size >>= 1, 4096, > + 0, end, DRM_MM_INSERT_BEST); > if (ret && INTEL_GEN(dev_priv) <= 4) { > return 0; > } else if (ret) { > @@ -513,7 +515,8 @@ static int intel_fbc_alloc_cfb(struct intel_crtc *crtc) > goto err_fb; > > ret = i915_gem_stolen_insert_node(dev_priv, compressed_llb, > - 4096, 4096); > + 4096, 4096, > + DRM_MM_INSERT_LOW); We seem to alloc/free the line length buffer alongside the cfb. So should this use best instead? > if (ret) > goto err_fb; > > diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c > index fb2f9fce34cd..0e1ddbf1c5a0 100644 > --- a/drivers/gpu/drm/i915/intel_fbdev.c > +++ b/drivers/gpu/drm/i915/intel_fbdev.c > @@ -140,7 +140,8 @@ static int intelfb_alloc(struct drm_fb_helper *helper, > * features. */ > obj = NULL; > if (size * 2 < dev_priv->stolen_usable_size) > - obj = i915_gem_object_create_stolen(dev_priv, size); > + obj = i915_gem_object_create_stolen(dev_priv, > + size, DRM_MM_INSERT_LOW); > if (obj == NULL) > obj = i915_gem_object_create(dev_priv, size); > if (IS_ERR(obj)) { > diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c > index 72eb7e48e8bc..b134b9cabf93 100644 > --- a/drivers/gpu/drm/i915/intel_overlay.c > +++ b/drivers/gpu/drm/i915/intel_overlay.c > @@ -1306,7 +1306,8 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys) > struct i915_vma *vma; > int err; > > - obj = i915_gem_object_create_stolen(overlay->i915, PAGE_SIZE); > + obj = i915_gem_object_create_stolen(overlay->i915, > + PAGE_SIZE, DRM_MM_INSERT_LOW); > if (obj == NULL) > obj = i915_gem_object_create_internal(overlay->i915, PAGE_SIZE); > if (IS_ERR(obj)) > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index d99e5fabe93c..5d18301ba079 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -7382,7 +7382,8 @@ static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) > * overlap with other ranges, such as the frame buffer, protected > * memory, or any other relevant ranges. > */ > - pctx = i915_gem_object_create_stolen(dev_priv, pctx_size); > + pctx = i915_gem_object_create_stolen(dev_priv, > + pctx_size, DRM_MM_INSERT_LOW); I guess there was no special requirement for the placement of this. AFAIK the BIOS always allocates it just below the wopcm, but I suppose it doesn't matter if we take a different approach. > if (!pctx) { > DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); > goto out; > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 472939f5c18f..e6a23a241cf3 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -1104,7 +1104,7 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) > struct drm_i915_gem_object *obj; > struct i915_vma *vma; > > - obj = i915_gem_object_create_stolen(dev_priv, size); > + obj = i915_gem_object_create_stolen(dev_priv, size, DRM_MM_INSERT_BEST); Should these go low? We never reallocate them, right? > if (!obj) > obj = i915_gem_object_create_internal(dev_priv, size); > if (IS_ERR(obj)) > -- > 2.19.0.rc2 -- Ville Syrjälä Intel _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx