On Mon, 2021-09-27 at 12:41 +0100, Matthew Auld wrote: > For cached objects we can allocate our pages directly in shmem. This > should make it possible(in a later patch) to utilise the existing > i915-gem shrinker code for such objects. For now this is still > disabled. Some minor comments below, with those either fixed or deemed unnecessary, Reviewed-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx> > > v2(Thomas): > - Add optional try_to_writeback hook for objects. Importantly we > need > to check if the object is even still shrinkable; in between us > dropping the shrinker LRU lock and acquiring the object lock it > could for > example have been moved. Also we need to differentiate between > "lazy" shrinking and the immediate writeback mode. Also later we > need to > handle objects which don't even have mm.pages, so bundling this > into > put_pages() would require somehow handling that edge case, hence > just letting the ttm backend handle everything in > try_to_writeback > doesn't seem too bad. > v3(Thomas): > - Likely a bad idea to touch the object from the unpopulate hook, > since it's not possible to hold a reference, without also > creating > circular dependency, so likely this is too fragile. For now just > ensure we at least mark the pages as dirty/accessed when called > from the > shrinker on WILLNEED objects. > - s/try_to_writeback/shrinker_release_pages, since this can do more > than just writeback. > - Get rid of do_backup boolean and just set the SWAPPED flag prior > to > calling unpopulate. > - Keep shmem_tt as lowest priority for the TTM LRU bo_swapout walk, > since > these just get skipped anyway. We can try to come up with > something > better later. > > Signed-off-by: Matthew Auld <matthew.auld@xxxxxxxxx> > Cc: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx> > Cc: Christian König <christian.koenig@xxxxxxx> > --- > drivers/gpu/drm/i915/gem/i915_gem_object.h | 8 + > .../gpu/drm/i915/gem/i915_gem_object_types.h | 2 + > drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 14 +- > drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 17 +- > drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 240 > ++++++++++++++++-- > 5 files changed, 245 insertions(+), 36 deletions(-) > > ... > + > + err = dma_map_sg_attrs(i915_tt->dev, > + st->sgl, st->nents, > + PCI_DMA_BIDIRECTIONAL, nit: Since this is a dma api call, should we use DMA_BIDIRECTIONAL instead of PCI_DMA_BIDIRECTIONAL? DMA_BIDIRECTIONAL is used elsewhere in this file, but not throughout the driver IIRC. > + DMA_ATTR_SKIP_CPU_SYNC | > + DMA_ATTR_NO_KERNEL_MAPPING | > + DMA_ATTR_NO_WARN); > + if (err <= 0) { > + err = -EINVAL; > + goto err_free_st; > + } > + > + i = 0; > + for_each_sgt_page(page, sgt_iter, st) > + ttm->pages[i++] = page; > + > + if (ttm->page_flags & TTM_TT_FLAG_SWAPPED) > + ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED; > + > + i915_tt->cached_st = st; > + return 0; > + > +err_free_st: > + shmem_free_st(st, filp->f_mapping, false, false); > + return err; > +} > + > +static void i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm) > +{ > + struct i915_ttm_tt *i915_tt = container_of(ttm, > typeof(*i915_tt), ttm); > + bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED; > + > + dma_unmap_sg(i915_tt->dev, i915_tt->cached_st->sgl, > + i915_tt->cached_st->nents, > + PCI_DMA_BIDIRECTIONAL); Same here. > + > + shmem_free_st(fetch_and_zero(&i915_tt->cached_st), > + file_inode(i915_tt->filp)->i_mapping, > + backup, backup); > +} > + > static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object > *bo, > uint32_t page_flags) > { > struct ttm_resource_manager *man = > ttm_manager_type(bo->bdev, bo->resource->mem_type); > struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); > + enum ttm_caching caching = i915_ttm_select_tt_caching(obj); > struct i915_ttm_tt *i915_tt; > int ret; > > @@ -196,36 +279,62 @@ static struct ttm_tt *i915_ttm_tt_create(struct > ttm_buffer_object *bo, > man->use_tt) > page_flags |= TTM_TT_FLAG_ZERO_ALLOC; > > - ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, > - i915_ttm_select_tt_caching(obj)); > - if (ret) { > - kfree(i915_tt); > - return NULL; > + if (i915_gem_object_is_shrinkable(obj) && caching == > ttm_cached) { > + page_flags |= TTM_TT_FLAG_EXTERNAL | > + TTM_TT_FLAG_EXTERNAL_MAPPABLE; > + i915_tt->is_shmem = true; > } > > + ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching); > + if (ret) > + goto err_free; > + > i915_tt->dev = obj->base.dev->dev; > > return &i915_tt->ttm; > + > +err_free: > + kfree(i915_tt); > + return NULL; > +} > + > +static int i915_ttm_tt_populate(struct ttm_device *bdev, > + struct ttm_tt *ttm, > + struct ttm_operation_ctx *ctx) > +{ > + struct i915_ttm_tt *i915_tt = container_of(ttm, > typeof(*i915_tt), ttm); > + > + if (i915_tt->is_shmem) > + return i915_ttm_tt_shmem_populate(bdev, ttm, ctx); > + > + return ttm_pool_alloc(&bdev->pool, ttm, ctx); > } > > static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct > ttm_tt *ttm) > { > struct i915_ttm_tt *i915_tt = container_of(ttm, > typeof(*i915_tt), ttm); > > - if (i915_tt->cached_st) { > - dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st, > - DMA_BIDIRECTIONAL, 0); > - sg_free_table(i915_tt->cached_st); > - kfree(i915_tt->cached_st); > - i915_tt->cached_st = NULL; > + if (i915_tt->is_shmem) { > + i915_ttm_tt_shmem_unpopulate(ttm); > + } else { > + if (i915_tt->cached_st) { > + dma_unmap_sgtable(i915_tt->dev, i915_tt- > >cached_st, > + DMA_BIDIRECTIONAL, 0); > + sg_free_table(i915_tt->cached_st); > + kfree(i915_tt->cached_st); > + i915_tt->cached_st = NULL; > + } > + ttm_pool_free(&bdev->pool, ttm); > } > - ttm_pool_free(&bdev->pool, ttm); > } > > static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct > ttm_tt *ttm) > { > struct i915_ttm_tt *i915_tt = container_of(ttm, > typeof(*i915_tt), ttm); > > + if (i915_tt->filp) > + fput(i915_tt->filp); > + > ttm_tt_fini(ttm); > kfree(i915_tt); > } > @@ -235,6 +344,14 @@ static bool i915_ttm_eviction_valuable(struct > ttm_buffer_object *bo, > { > struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); > > + /* > + * EXTERNAL objects should never be swapped out by TTM, > instead we need > + * to handle that ourselves. TTM will already skip such > objects for us, > + * but we would like to avoid grabbing locks for no good > reason. > + */ > + if (bo->ttm && bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL) > + return -EBUSY; > + > /* Will do for now. Our pinned objects are still on TTM's LRU > lists */ > return i915_gem_object_evictable(obj); > } > @@ -328,9 +445,11 @@ static void > i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) > i915_gem_object_set_cache_coherency(obj, cache_level); > } > > -static void i915_ttm_purge(struct drm_i915_gem_object *obj) > +static int __i915_ttm_purge(struct drm_i915_gem_object *obj) > { > struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); > + struct i915_ttm_tt *i915_tt = > + container_of(bo->ttm, typeof(*i915_tt), ttm); > struct ttm_operation_ctx ctx = { > .interruptible = true, > .no_wait_gpu = false, > @@ -339,17 +458,79 @@ static void i915_ttm_purge(struct > drm_i915_gem_object *obj) > int ret; > > if (obj->mm.madv == __I915_MADV_PURGED) > - return; > + return 0; > > - /* TTM's purge interface. Note that we might be reentering. > */ > ret = ttm_bo_validate(bo, &place, &ctx); > - if (!ret) { > - obj->write_domain = 0; > - obj->read_domains = 0; > - i915_ttm_adjust_gem_after_move(obj); > - i915_ttm_free_cached_io_st(obj); > - obj->mm.madv = __I915_MADV_PURGED; > + if (ret) > + return ret; > + > + if (bo->ttm && i915_tt->filp) { > + /* > + * The below fput(which eventually calls > shmem_truncate) might > + * be delayed by worker, so when directly called to > purge the > + * pages(like by the shrinker) we should try to be > more > + * aggressive and release the pages immediately. > + */ > + shmem_truncate_range(file_inode(i915_tt->filp), > + 0, (loff_t)-1); > + fput(fetch_and_zero(&i915_tt->filp)); > + } > + > + obj->write_domain = 0; > + obj->read_domains = 0; > + i915_ttm_adjust_gem_after_move(obj); > + i915_ttm_free_cached_io_st(obj); > + obj->mm.madv = __I915_MADV_PURGED; > + return 0; > +} > + > +static void i915_ttm_purge(struct drm_i915_gem_object *obj) > +{ > + __i915_ttm_purge(obj); Do we need a comment here as to why we choose to ignore the return value? I typically use a void cast (void)__i915_ttm_purge(obj); to indicate that ignoring the return value is intentional. Not sure if that's common practice with i915? > +} > + > +static int i915_ttm_shrinker_release_pages(struct > drm_i915_gem_object *obj, > + bool should_writeback) > +{ > + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); > + struct i915_ttm_tt *i915_tt = > + container_of(bo->ttm, typeof(*i915_tt), ttm); > + struct ttm_operation_ctx ctx = { > + .interruptible = true, > + .no_wait_gpu = false, > + }; > + struct ttm_placement place = {}; > + int ret; > + > + if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM) > + return 0; > + > + GEM_BUG_ON(!i915_tt->is_shmem); > + > + if (!i915_tt->filp) > + return 0; > + > + switch (obj->mm.madv) { > + case I915_MADV_DONTNEED: > + return __i915_ttm_purge(obj); > + case __I915_MADV_PURGED: > + return 0; > + } > + > + if (bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED) > + return 0; > + > + bo->ttm->page_flags |= TTM_TT_FLAG_SWAPPED; > + ret = ttm_bo_validate(bo, &place, &ctx); > + if (ret) { > + bo->ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED; > + return ret; > } > + > + if (should_writeback) > + __shmem_writeback(obj->base.size, i915_tt->filp- > >f_mapping); > + > + return 0; > } > > static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) > @@ -618,6 +799,7 @@ static unsigned long i915_ttm_io_mem_pfn(struct > ttm_buffer_object *bo, > > static struct ttm_device_funcs i915_ttm_bo_driver = { > .ttm_tt_create = i915_ttm_tt_create, > + .ttm_tt_populate = i915_ttm_tt_populate, > .ttm_tt_unpopulate = i915_ttm_tt_unpopulate, > .ttm_tt_destroy = i915_ttm_tt_destroy, > .eviction_valuable = i915_ttm_eviction_valuable, > @@ -685,12 +867,17 @@ static int __i915_ttm_get_pages(struct > drm_i915_gem_object *obj, > } > > if (!i915_gem_object_has_pages(obj)) { > + struct i915_ttm_tt *i915_tt = > + container_of(bo->ttm, typeof(*i915_tt), ttm); > + > /* Object either has a page vector or is an iomem > object */ > st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj- > >ttm.cached_io_st; > if (IS_ERR(st)) > return PTR_ERR(st); > > __i915_gem_object_set_pages(obj, st, > i915_sg_dma_sizes(st->sgl)); > + if (!bo->ttm || !i915_tt->is_shmem) > + i915_gem_object_make_unshrinkable(obj); > } > > return ret; > @@ -770,6 +957,8 @@ static void i915_ttm_put_pages(struct > drm_i915_gem_object *obj, > static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) > { > struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); > + struct i915_ttm_tt *i915_tt = > + container_of(bo->ttm, typeof(*i915_tt), ttm); > > /* > * Don't manipulate the TTM LRUs while in TTM bo destruction. > @@ -782,7 +971,10 @@ static void i915_ttm_adjust_lru(struct > drm_i915_gem_object *obj) > * Put on the correct LRU list depending on the MADV status > */ > spin_lock(&bo->bdev->lru_lock); > - if (obj->mm.madv != I915_MADV_WILLNEED) { > + if (bo->ttm && i915_tt->filp) { > + /* Try to keep shmem_tt from being considered for > shrinking. */ > + bo->priority = TTM_MAX_BO_PRIORITY - 1; > + } else if (obj->mm.madv != I915_MADV_WILLNEED) { > bo->priority = I915_TTM_PRIO_PURGE; > } else if (!i915_gem_object_has_pages(obj)) { > if (bo->priority < I915_TTM_PRIO_HAS_PAGES) > @@ -887,9 +1079,12 @@ static const struct drm_i915_gem_object_ops > i915_gem_ttm_obj_ops = { > .get_pages = i915_ttm_get_pages, > .put_pages = i915_ttm_put_pages, > .truncate = i915_ttm_purge, > + .shrinker_release_pages = i915_ttm_shrinker_release_pages, > + > .adjust_lru = i915_ttm_adjust_lru, > .delayed_free = i915_ttm_delayed_free, > .migrate = i915_ttm_migrate, > + > .mmap_offset = i915_ttm_mmap_offset, > .mmap_ops = &vm_ops_ttm, > }; > @@ -937,7 +1132,6 @@ int __i915_gem_ttm_object_init(struct > intel_memory_region *mem, > drm_gem_private_object_init(&i915->drm, &obj->base, size); > i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, > flags); > i915_gem_object_init_memory_region(obj, mem); > - i915_gem_object_make_unshrinkable(obj); > INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | > __GFP_NOWARN); > mutex_init(&obj->ttm.get_io_page.lock); > bo_type = (obj->flags & I915_BO_ALLOC_USER) ? > ttm_bo_type_device :