On Tue, May 20, 2014 at 08:28:43AM +0100, Chris Wilson wrote: > Before the process killer is invoked, oom-notifiers are executed for one > last try at recovering pages. We can hook into this callback to be sure > that everything that can be is purged from our page lists, and to give a > summary of how much memory is still pinned by the GPU in the case of an > oom. > > References: https://bugs.freedesktop.org/show_bug.cgi?id=72742 > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Tested-by: lu hua <huax.lu@xxxxxxxxx> Dave, can you please have a look at this and ack it from a core mm perspective? I just like your ack since you've worked together with Chris on these issues. Thanks, Daniel > --- > drivers/gpu/drm/i915/i915_drv.h | 1 + > drivers/gpu/drm/i915/i915_gem.c | 74 +++++++++++++++++++++++++++++++++++++++-- > 2 files changed, 72 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index e69cb51de738..389204d44431 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1057,6 +1057,7 @@ struct i915_gem_mm { > /** PPGTT used for aliasing the PPGTT with the GTT */ > struct i915_hw_ppgtt *aliasing_ppgtt; > > + struct notifier_block oom_notifier; > struct shrinker shrinker; > bool shrinker_no_lock_stealing; > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index ea93898d51bc..dc8e1ef50bfb 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -31,6 +31,7 @@ > #include "i915_drv.h" > #include "i915_trace.h" > #include "intel_drv.h" > +#include <linux/oom.h> > #include <linux/shmem_fs.h> > #include <linux/slab.h> > #include <linux/swap.h> > @@ -61,6 +62,9 @@ static unsigned long i915_gem_shrinker_count(struct shrinker *shrinker, > struct shrink_control *sc); > static unsigned long i915_gem_shrinker_scan(struct shrinker *shrinker, > struct shrink_control *sc); > +static int i915_gem_shrinker_oom(struct notifier_block *nb, > + unsigned long event, > + void *ptr); > static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long target); > static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv); > > @@ -4759,6 +4763,9 @@ i915_gem_load(struct drm_device *dev) > dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count; > dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS; > register_shrinker(&dev_priv->mm.shrinker); > + > + dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; > + register_oom_notifier(&dev_priv->mm.oom_notifier); > } > > /* > @@ -5154,15 +5161,76 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) > freed += __i915_gem_shrink(dev_priv, > sc->nr_to_scan - freed, > false); > - if (freed < sc->nr_to_scan) > - freed += i915_gem_shrink_all(dev_priv); > - > if (unlock) > mutex_unlock(&dev->struct_mutex); > > return freed; > } > > +static int > +i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) > +{ > + struct drm_i915_private *dev_priv = > + container_of(nb, struct drm_i915_private, mm.oom_notifier); > + struct drm_device *dev = dev_priv->dev; > + struct drm_i915_gem_object *obj; > + unsigned long timeout = msecs_to_jiffies(5000) + 1; > + unsigned long pinned, bound, unbound, freed; > + bool was_interruptible; > + bool unlock; > + > + while (!i915_gem_shrinker_lock(dev, &unlock) && --timeout) > + schedule_timeout_killable(1); > + if (timeout == 0) { > + pr_err("Unable to purge GPU memory due lock contention.\n"); > + return NOTIFY_DONE; > + } > + > + was_interruptible = dev_priv->mm.interruptible; > + dev_priv->mm.interruptible = false; > + > + freed = i915_gem_shrink_all(dev_priv); > + > + dev_priv->mm.interruptible = was_interruptible; > + > + /* Because we may be allocating inside our own driver, we cannot > + * assert that there are no objects with pinned pages that are not > + * being pointed to by hardware. > + */ > + unbound = bound = pinned = 0; > + list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { > + if (!obj->base.filp) /* not backed by a freeable object */ > + continue; > + > + if (obj->pages_pin_count) > + pinned += obj->base.size; > + else > + unbound += obj->base.size; > + } > + list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { > + if (!obj->base.filp) > + continue; > + > + if (obj->pages_pin_count) > + pinned += obj->base.size; > + else > + bound += obj->base.size; > + } > + > + if (unlock) > + mutex_unlock(&dev->struct_mutex); > + > + pr_info("Purging GPU memory, %lu bytes freed, %lu bytes still pinned.\n", > + freed, pinned); > + if (unbound | bound) > + pr_err("%lu and %lu bytes still available in the " > + "bound and unbound GPU page lists.\n", > + bound, unbound); > + > + *(unsigned long *)ptr += freed; > + return NOTIFY_DONE; > +} > + > struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) > { > struct i915_vma *vma; > -- > 2.0.0.rc2 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx