On Wed, Mar 24, 2021 at 02:48:45PM +0100, Christian König wrote: > The shrinker based approach still has some flaws. Especially that we need > temporary pages to free up the pages allocated to the driver is problematic > in a shrinker. > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > --- > drivers/gpu/drm/ttm/ttm_device.c | 14 ++-- > drivers/gpu/drm/ttm/ttm_tt.c | 112 ++++++++++++------------------- > include/drm/ttm/ttm_tt.h | 3 +- > 3 files changed, 53 insertions(+), 76 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c > index 95e1b7b1f2e6..388da2a7f0bb 100644 > --- a/drivers/gpu/drm/ttm/ttm_device.c > +++ b/drivers/gpu/drm/ttm/ttm_device.c > @@ -53,7 +53,6 @@ static void ttm_global_release(void) > goto out; > > ttm_pool_mgr_fini(); > - ttm_tt_mgr_fini(); > > __free_page(glob->dummy_read_page); > memset(glob, 0, sizeof(*glob)); > @@ -64,7 +63,7 @@ static void ttm_global_release(void) > static int ttm_global_init(void) > { > struct ttm_global *glob = &ttm_glob; > - unsigned long num_pages; > + unsigned long num_pages, num_dma32; > struct sysinfo si; > int ret = 0; > unsigned i; > @@ -79,8 +78,15 @@ static int ttm_global_init(void) > * system memory. > */ > num_pages = ((u64)si.totalram * si.mem_unit) >> PAGE_SHIFT; > - ttm_pool_mgr_init(num_pages * 50 / 100); > - ttm_tt_mgr_init(); > + num_pages /= 2; > + > + /* But for DMA32 we limit ourself to only use 2GiB maximum. */ > + num_dma32 = (u64)(si.totalram - si.totalhigh) * si.mem_unit > + >> PAGE_SHIFT; > + num_dma32 = min(num_dma32, 2UL << (30 - PAGE_SHIFT)); > + > + ttm_pool_mgr_init(num_pages); > + ttm_tt_mgr_init(num_pages, num_dma32); > > spin_lock_init(&glob->lru_lock); > glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32); > diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c > index 2f0833c98d2c..5d8820725b75 100644 > --- a/drivers/gpu/drm/ttm/ttm_tt.c > +++ b/drivers/gpu/drm/ttm/ttm_tt.c > @@ -40,8 +40,18 @@ > > #include "ttm_module.h" > > -static struct shrinker mm_shrinker; > -static atomic_long_t swapable_pages; > +static unsigned long ttm_pages_limit; > + > +MODULE_PARM_DESC(pages_limit, "Limit for the allocated pages"); > +module_param_named(pages_limit, ttm_pages_limit, ulong, 0644); > + > +static unsigned long ttm_dma32_pages_limit; > + > +MODULE_PARM_DESC(dma32_pages_limit, "Limit for the allocated DMA32 pages"); > +module_param_named(dma32_pages_limit, ttm_dma32_pages_limit, ulong, 0644); > + > +static atomic_long_t ttm_pages_allocated; > +static atomic_long_t ttm_dma32_pages_allocated; Making this configurable looks an awful lot like "job done, move on". Just the revert to hardcoded 50% (or I guess just revert the shrinker patch at that point) for -fixes is imo better. Then I guess retry again for 5.14 or so. -Daniel > > /* > * Allocates a ttm structure for the given BO. > @@ -294,8 +304,6 @@ static void ttm_tt_add_mapping(struct ttm_device *bdev, struct ttm_tt *ttm) > > for (i = 0; i < ttm->num_pages; ++i) > ttm->pages[i]->mapping = bdev->dev_mapping; > - > - atomic_long_add(ttm->num_pages, &swapable_pages); > } > > int ttm_tt_populate(struct ttm_device *bdev, > @@ -309,12 +317,25 @@ int ttm_tt_populate(struct ttm_device *bdev, > if (ttm_tt_is_populated(ttm)) > return 0; > > + atomic_long_add(ttm->num_pages, &ttm_pages_allocated); > + if (bdev->pool.use_dma32) > + atomic_long_add(ttm->num_pages, &ttm_dma32_pages_allocated); > + > + while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit || > + atomic_long_read(&ttm_dma32_pages_allocated) > > + ttm_dma32_pages_limit) { > + > + ret = ttm_bo_swapout(ctx, GFP_KERNEL); > + if (ret) > + goto error; > + } > + > if (bdev->funcs->ttm_tt_populate) > ret = bdev->funcs->ttm_tt_populate(bdev, ttm, ctx); > else > ret = ttm_pool_alloc(&bdev->pool, ttm, ctx); > if (ret) > - return ret; > + goto error; > > ttm_tt_add_mapping(bdev, ttm); > ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED; > @@ -327,6 +348,12 @@ int ttm_tt_populate(struct ttm_device *bdev, > } > > return 0; > + > +error: > + atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); > + if (bdev->pool.use_dma32) > + atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated); > + return ret; > } > EXPORT_SYMBOL(ttm_tt_populate); > > @@ -342,12 +369,9 @@ static void ttm_tt_clear_mapping(struct ttm_tt *ttm) > (*page)->mapping = NULL; > (*page++)->index = 0; > } > - > - atomic_long_sub(ttm->num_pages, &swapable_pages); > } > > -void ttm_tt_unpopulate(struct ttm_device *bdev, > - struct ttm_tt *ttm) > +void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) > { > if (!ttm_tt_is_populated(ttm)) > return; > @@ -357,76 +381,24 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, > bdev->funcs->ttm_tt_unpopulate(bdev, ttm); > else > ttm_pool_free(&bdev->pool, ttm); > - ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED; > -} > - > -/* As long as pages are available make sure to release at least one */ > -static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink, > - struct shrink_control *sc) > -{ > - struct ttm_operation_ctx ctx = { > - .no_wait_gpu = false > - }; > - int ret; > - > - ret = ttm_bo_swapout(&ctx, GFP_NOFS); > - return ret < 0 ? SHRINK_EMPTY : ret; > -} > - > -/* Return the number of pages available or SHRINK_EMPTY if we have none */ > -static unsigned long ttm_tt_shrinker_count(struct shrinker *shrink, > - struct shrink_control *sc) > -{ > - unsigned long num_pages; > - > - num_pages = atomic_long_read(&swapable_pages); > - return num_pages ? num_pages : SHRINK_EMPTY; > -} > > -#ifdef CONFIG_DEBUG_FS > + atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); > + if (bdev->pool.use_dma32) > + atomic_long_sub(ttm->num_pages, &ttm_dma32_pages_allocated); > > -/* Test the shrinker functions and dump the result */ > -static int ttm_tt_debugfs_shrink_show(struct seq_file *m, void *data) > -{ > - struct shrink_control sc = { .gfp_mask = GFP_KERNEL }; > - > - fs_reclaim_acquire(GFP_KERNEL); > - seq_printf(m, "%lu/%lu\n", ttm_tt_shrinker_count(&mm_shrinker, &sc), > - ttm_tt_shrinker_scan(&mm_shrinker, &sc)); > - fs_reclaim_release(GFP_KERNEL); > - > - return 0; > + ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED; > } > -DEFINE_SHOW_ATTRIBUTE(ttm_tt_debugfs_shrink); > - > -#endif > - > - > > /** > * ttm_tt_mgr_init - register with the MM shrinker > * > * Register with the MM shrinker for swapping out BOs. > */ > -int ttm_tt_mgr_init(void) > +void ttm_tt_mgr_init(unsigned long num_pages, unsigned long num_dma32_pages) > { > -#ifdef CONFIG_DEBUG_FS > - debugfs_create_file("tt_shrink", 0400, ttm_debugfs_root, NULL, > - &ttm_tt_debugfs_shrink_fops); > -#endif > - > - mm_shrinker.count_objects = ttm_tt_shrinker_count; > - mm_shrinker.scan_objects = ttm_tt_shrinker_scan; > - mm_shrinker.seeks = 1; > - return register_shrinker(&mm_shrinker); > -} > + if (!ttm_pages_limit) > + ttm_pages_limit = num_pages; > > -/** > - * ttm_tt_mgr_fini - unregister our MM shrinker > - * > - * Unregisters the MM shrinker. > - */ > -void ttm_tt_mgr_fini(void) > -{ > - unregister_shrinker(&mm_shrinker); > + if (!ttm_dma32_pages_limit) > + ttm_dma32_pages_limit = num_dma32_pages; > } > diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h > index 069f8130241a..134d09ef7766 100644 > --- a/include/drm/ttm/ttm_tt.h > +++ b/include/drm/ttm/ttm_tt.h > @@ -157,8 +157,7 @@ int ttm_tt_populate(struct ttm_device *bdev, struct ttm_tt *ttm, struct ttm_oper > */ > void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm); > > -int ttm_tt_mgr_init(void); > -void ttm_tt_mgr_fini(void); > +void ttm_tt_mgr_init(unsigned long num_pages, unsigned long num_dma32_pages); > > #if IS_ENABLED(CONFIG_AGP) > #include <linux/agp_backend.h> > -- > 2.25.1 > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch