On Thu, Jan 28, 2021 at 02:16:02PM +0100, Christian König wrote: > TTM implements a rather extensive accounting of allocated memory. > > There are two reasons for this: > 1. It tries to block userspace allocating a huge number of very small > BOs without accounting for the kmalloced memory. > > 2. Make sure we don't over allocate and run into an OOM situation > during swapout while trying to handle the memory shortage. > > This is only partially a good idea. First of all it is perfectly > valid for an application to use all of system memory, limiting it to > 50% is not really acceptable. > > What we need to take care of is that the application is held > accountable for the memory it allocated. This is what control > mechanisms like memcg and the normal Linux page accounting already do. > > Making sure that we don't run into an OOM situation while trying to > cope with a memory shortage is still a good idea, but this is also > not very well implemented since it means another opportunity of > recursion from the driver back into TTM. > > So start to rework all of this by implementing a shrinker callback which > allows for TT object to be swapped out if necessary. > > v2: Switch from limit to shrinker callback. > v3: fix gfp mask handling, use atomic for swapable_pages, add debugfs > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > --- > drivers/gpu/drm/ttm/ttm_bo.c | 4 +- > drivers/gpu/drm/ttm/ttm_memory.c | 7 +- > drivers/gpu/drm/ttm/ttm_tt.c | 111 ++++++++++++++++++++++++++-- > drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- > include/drm/ttm/ttm_bo_api.h | 2 +- > include/drm/ttm/ttm_tt.h | 6 +- > 6 files changed, 117 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > index 20256797f3a6..643befc1a6f2 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo.c > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > @@ -1219,7 +1219,7 @@ EXPORT_SYMBOL(ttm_bo_wait); > * A buffer object shrink method that tries to swap out the first > * buffer object on the bo_global::swap_lru list. > */ > -int ttm_bo_swapout(struct ttm_operation_ctx *ctx) > +int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) > { > struct ttm_global *glob = &ttm_glob; > struct ttm_buffer_object *bo; > @@ -1302,7 +1302,7 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx) > if (bo->bdev->funcs->swap_notify) > bo->bdev->funcs->swap_notify(bo); > > - ret = ttm_tt_swapout(bo->bdev, bo->ttm); > + ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags); > out: > > /** > diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c > index a3bfbd9cea68..634a85c2dc4c 100644 > --- a/drivers/gpu/drm/ttm/ttm_memory.c > +++ b/drivers/gpu/drm/ttm/ttm_memory.c > @@ -37,6 +37,7 @@ > #include <linux/slab.h> > #include <linux/swap.h> > #include <drm/ttm/ttm_pool.h> > +#include <drm/ttm/ttm_tt.h> > > #include "ttm_module.h" > > @@ -276,9 +277,9 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq, > > while (ttm_zones_above_swap_target(glob, from_wq, extra)) { > spin_unlock(&glob->lock); > - ret = ttm_bo_swapout(ctx); > + ret = ttm_bo_swapout(ctx, GFP_KERNEL); > spin_lock(&glob->lock); > - if (unlikely(ret != 0)) > + if (unlikely(ret < 0)) > break; > } > > @@ -453,6 +454,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob) > zone->name, (unsigned long long)zone->max_mem >> 10); > } > ttm_pool_mgr_init(glob->zone_kernel->max_mem/(2*PAGE_SIZE)); > + ttm_tt_mgr_init(); > return 0; > out_no_zone: > ttm_mem_global_release(glob); > @@ -466,6 +468,7 @@ void ttm_mem_global_release(struct ttm_mem_global *glob) > > /* let the page allocator first stop the shrink work. */ > ttm_pool_mgr_fini(); > + ttm_tt_mgr_fini(); > > flush_workqueue(glob->swap_queue); > destroy_workqueue(glob->swap_queue); > diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c > index 7782d5393c7c..b67795de228d 100644 > --- a/drivers/gpu/drm/ttm/ttm_tt.c > +++ b/drivers/gpu/drm/ttm/ttm_tt.c > @@ -38,6 +38,11 @@ > #include <drm/drm_cache.h> > #include <drm/ttm/ttm_bo_driver.h> > > +#include "ttm_module.h" > + > +static struct shrinker mm_shrinker; > +static atomic_long_t swapable_pages; > + > /* > * Allocates a ttm structure for the given BO. > */ > @@ -223,32 +228,41 @@ int ttm_tt_swapin(struct ttm_tt *ttm) > return ret; > } > > -int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm) > +/** > + * ttm_tt_swapout - swap out tt object > + * > + * @bdev: TTM device structure. > + * @ttm: The struct ttm_tt. > + * @gfp_flags: Flags to use for memory allocation. > + * > + * Swapout a TT object to a shmem_file, return number of pages swapped out or > + * negative error code. > + */ > +int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm, > + gfp_t gfp_flags) > { > + loff_t size = (loff_t)ttm->num_pages << PAGE_SHIFT; > struct address_space *swap_space; > struct file *swap_storage; > struct page *from_page; > struct page *to_page; > - gfp_t gfp_mask; > int i, ret; > > - swap_storage = shmem_file_setup("ttm swap", > - ttm->num_pages << PAGE_SHIFT, > - 0); > + swap_storage = shmem_file_setup("ttm swap", size, 0); > if (IS_ERR(swap_storage)) { > pr_err("Failed allocating swap storage\n"); > return PTR_ERR(swap_storage); > } > > swap_space = swap_storage->f_mapping; > - gfp_mask = mapping_gfp_mask(swap_space); > + gfp_flags &= mapping_gfp_mask(swap_space); > > for (i = 0; i < ttm->num_pages; ++i) { > from_page = ttm->pages[i]; > if (unlikely(from_page == NULL)) > continue; > > - to_page = shmem_read_mapping_page_gfp(swap_space, i, gfp_mask); > + to_page = shmem_read_mapping_page_gfp(swap_space, i, gfp_flags); > if (IS_ERR(to_page)) { > ret = PTR_ERR(to_page); > goto out_err; > @@ -263,7 +277,7 @@ int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm) > ttm->swap_storage = swap_storage; > ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED; > > - return 0; > + return ttm->num_pages; > > out_err: > fput(swap_storage); > @@ -280,6 +294,8 @@ static void ttm_tt_add_mapping(struct ttm_device *bdev, struct ttm_tt *ttm) > > for (i = 0; i < ttm->num_pages; ++i) > ttm->pages[i]->mapping = bdev->dev_mapping; > + > + atomic_long_add(ttm->num_pages, &swapable_pages); > } > > int ttm_tt_populate(struct ttm_device *bdev, > @@ -326,6 +342,8 @@ static void ttm_tt_clear_mapping(struct ttm_tt *ttm) > (*page)->mapping = NULL; > (*page++)->index = 0; > } > + > + atomic_long_sub(ttm->num_pages, &swapable_pages); > } > > void ttm_tt_unpopulate(struct ttm_device *bdev, > @@ -341,3 +359,80 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, > ttm_pool_free(&bdev->pool, ttm); > ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED; > } > + > +/* As long as pages are available make sure to release at least one */ > +static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink, > + struct shrink_control *sc) > +{ > + struct ttm_operation_ctx ctx = { > + .no_wait_gpu = false > + }; > + int ret; > + > + if (!(sc->gfp_mask & __GFP_FS)) > + return SHRINK_EMPTY; These two checks here still look like cargo cult to me. I thought the gfp_mask you're getting is for numa/zone-aware shrinking, which we're not doing. __GFP_FS in the shrinker is a bug. Maybe convert to WARN_ON to convince yourself, test, then remove? If you ever get __GFP_FS context in a shrinker lockdep will start screaming real fast :-) With that addressed: Reviewed-by: Daniel Vetter <daniel.vetter@xxxxxxxx> > + > + ret = ttm_bo_swapout(&ctx, GFP_NOFS); > + return ret < 0 ? SHRINK_EMPTY : ret; > +} > + > +/* Return the number of pages available or SHRINK_EMPTY if we have none */ > +static unsigned long ttm_tt_shrinker_count(struct shrinker *shrink, > + struct shrink_control *sc) > +{ > + unsigned long num_pages; > + > + if (!(sc->gfp_mask & __GFP_FS)) > + return SHRINK_EMPTY; > + > + num_pages = atomic_long_read(&swapable_pages); > + return num_pages ? num_pages : SHRINK_EMPTY; > +} > + > +#ifdef CONFIG_DEBUG_FS > + > +/* Test the shrinker functions and dump the result */ > +static int ttm_tt_debugfs_shrink_show(struct seq_file *m, void *data) > +{ > + struct shrink_control sc = { .gfp_mask = GFP_KERNEL }; > + > + fs_reclaim_acquire(GFP_KERNEL); > + seq_printf(m, "%lu/%lu\n", ttm_tt_shrinker_count(&mm_shrinker, &sc), > + ttm_tt_shrinker_scan(&mm_shrinker, &sc)); > + fs_reclaim_release(GFP_KERNEL); > + > + return 0; > +} > +DEFINE_SHOW_ATTRIBUTE(ttm_tt_debugfs_shrink); > + > +#endif > + > + > + > +/** > + * ttm_tt_mgr_init - register with the MM shrinker > + * > + * Register with the MM shrinker for swapping out BOs. > + */ > +int ttm_tt_mgr_init(void) > +{ > +#ifdef CONFIG_DEBUG_FS > + debugfs_create_file("tt_shrink", 0400, ttm_debugfs_root, NULL, > + &ttm_tt_debugfs_shrink_fops); > +#endif > + > + mm_shrinker.count_objects = ttm_tt_shrinker_count; > + mm_shrinker.scan_objects = ttm_tt_shrinker_scan; > + mm_shrinker.seeks = 1; > + return register_shrinker(&mm_shrinker); > +} > + > +/** > + * ttm_tt_mgr_fini - unregister our MM shrinker > + * > + * Unregisters the MM shrinker. > + */ > +void ttm_tt_mgr_fini(void) > +{ > + unregister_shrinker(&mm_shrinker); > +} > diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c > index b454d80c273e..710ba5169a74 100644 > --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c > +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c > @@ -1383,7 +1383,7 @@ static int vmw_pm_freeze(struct device *kdev) > vmw_execbuf_release_pinned_bo(dev_priv); > vmw_resource_evict_all(dev_priv); > vmw_release_device_early(dev_priv); > - while (ttm_bo_swapout(&ctx) == 0); > + while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0); > if (dev_priv->enable_fb) > vmw_fifo_resource_dec(dev_priv); > if (atomic_read(&dev_priv->num_fifo_resources) != 0) { > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h > index 62734db0b421..1297a8fb7ccb 100644 > --- a/include/drm/ttm/ttm_bo_api.h > +++ b/include/drm/ttm/ttm_bo_api.h > @@ -569,7 +569,7 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp, > const char __user *wbuf, char __user *rbuf, > size_t count, loff_t *f_pos, bool write); > > -int ttm_bo_swapout(struct ttm_operation_ctx *ctx); > +int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); > > /** > * ttm_bo_uses_embedded_gem_object - check if the given bo uses the > diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h > index 0020a0588985..cce57fb49e2c 100644 > --- a/include/drm/ttm/ttm_tt.h > +++ b/include/drm/ttm/ttm_tt.h > @@ -135,7 +135,8 @@ void ttm_tt_destroy_common(struct ttm_device *bdev, struct ttm_tt *ttm); > * Swap in a previously swap out ttm_tt. > */ > int ttm_tt_swapin(struct ttm_tt *ttm); > -int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm); > +int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm, > + gfp_t gfp_flags); > > /** > * ttm_tt_populate - allocate pages for a ttm > @@ -155,6 +156,9 @@ int ttm_tt_populate(struct ttm_device *bdev, struct ttm_tt *ttm, struct ttm_oper > */ > void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm); > > +int ttm_tt_mgr_init(void); > +void ttm_tt_mgr_fini(void); > + > #if IS_ENABLED(CONFIG_AGP) > #include <linux/agp_backend.h> > > -- > 2.25.1 > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel