On Mon, Nov 02, 2020 at 01:58:07PM +0100, Christian König wrote: > During eviction we do want to trigger the OOM killer. > > Only while doing new allocations we should try to avoid that and > return -ENOMEM to the application. > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 --- > drivers/gpu/drm/ttm/ttm_pool.c | 2 +- > drivers/gpu/drm/ttm/ttm_tt.c | 7 ------- > include/drm/ttm/ttm_bo_api.h | 2 ++ > include/drm/ttm/ttm_bo_driver.h | 3 --- > 6 files changed, 5 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > index 1aa516429c80..52041f48e1c9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > @@ -516,6 +516,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, > struct ttm_operation_ctx ctx = { > .interruptible = (bp->type != ttm_bo_type_kernel), > .no_wait_gpu = bp->no_wait_gpu, > + /* We opt to avoid OOM on system pages allocations */ > + .retry_mayfail = true, Hm this is a bit confusingly named imo, but it comes from the gfp flags. So maybe call it gfp_retry_mayfail, to make it clear where it's from and that it's only for system allocations. Personally I'd have called this limited_retry, but I think better to stay consistent. Also I think we should __GFP_NOWARN for this, to avoid splats in dmesg since we expect to handle this. With the bikesheds addressed somehow. Reviewed-by: Daniel Vetter <daniel.vetter@xxxxxxxx> > .resv = bp->resv, > .flags = bp->type != ttm_bo_type_kernel ? > TTM_OPT_FLAG_ALLOW_RES_EVICT : 0 > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index bd6e6641c3fc..c01c060e4ac5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -1914,9 +1914,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) > } > adev->mman.initialized = true; > > - /* We opt to avoid OOM on system pages allocations */ > - adev->mman.bdev.no_retry = true; > - > /* Initialize VRAM pool with all of VRAM divided into pages */ > r = amdgpu_vram_mgr_init(adev); > if (r) { > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c > index 1e50deefb5d5..7100fd0e9e88 100644 > --- a/drivers/gpu/drm/ttm/ttm_pool.c > +++ b/drivers/gpu/drm/ttm/ttm_pool.c > @@ -367,7 +367,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, > if (tt->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC) > gfp_flags |= __GFP_ZERO; > > - if (tt->page_flags & TTM_PAGE_FLAG_NO_RETRY) > + if (ctx->retry_mayfail) > gfp_flags |= __GFP_RETRY_MAYFAIL; > > if (pool->use_dma32) > diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c > index 8861a74ac335..cfd633c7e764 100644 > --- a/drivers/gpu/drm/ttm/ttm_tt.c > +++ b/drivers/gpu/drm/ttm/ttm_tt.c > @@ -51,9 +51,6 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc) > if (bo->ttm) > return 0; > > - if (bdev->no_retry) > - page_flags |= TTM_PAGE_FLAG_NO_RETRY; > - > switch (bo->type) { > case ttm_bo_type_device: > if (zero_alloc) > @@ -211,8 +208,6 @@ int ttm_tt_swapin(struct ttm_tt *ttm) > > swap_space = swap_storage->f_mapping; > gfp_mask = mapping_gfp_mask(swap_space); > - if (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY) > - gfp_mask |= __GFP_RETRY_MAYFAIL; > > for (i = 0; i < ttm->num_pages; ++i) { > from_page = shmem_read_mapping_page_gfp(swap_space, i, > @@ -260,8 +255,6 @@ int ttm_tt_swapout(struct ttm_bo_device *bdev, struct ttm_tt *ttm) > > swap_space = swap_storage->f_mapping; > gfp_mask = mapping_gfp_mask(swap_space); > - if (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY) > - gfp_mask |= __GFP_RETRY_MAYFAIL; > > for (i = 0; i < ttm->num_pages; ++i) { > from_page = ttm->pages[i]; > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h > index 37102e45e496..4c7c2d574db6 100644 > --- a/include/drm/ttm/ttm_bo_api.h > +++ b/include/drm/ttm/ttm_bo_api.h > @@ -195,6 +195,7 @@ struct ttm_bo_kmap_obj { > * > * @interruptible: Sleep interruptible if sleeping. > * @no_wait_gpu: Return immediately if the GPU is busy. > + * @retry_mayfail: Set the __GFP_RETRY_MAYFAIL when allocation pages. > * @resv: Reservation object to allow reserved evictions with. > * @flags: Including the following flags > * > @@ -204,6 +205,7 @@ struct ttm_bo_kmap_obj { > struct ttm_operation_ctx { > bool interruptible; > bool no_wait_gpu; > + bool retry_mayfail; > struct dma_resv *resv; > uint64_t bytes_moved; > uint32_t flags; > diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h > index e9f683fa72dc..da8208f43378 100644 > --- a/include/drm/ttm/ttm_bo_driver.h > +++ b/include/drm/ttm/ttm_bo_driver.h > @@ -276,7 +276,6 @@ extern struct ttm_bo_global { > * @dev_mapping: A pointer to the struct address_space representing the > * device address space. > * @wq: Work queue structure for the delayed delete workqueue. > - * @no_retry: Don't retry allocation if it fails > * > */ > > @@ -314,8 +313,6 @@ struct ttm_bo_device { > */ > > struct delayed_work wq; > - > - bool no_retry; > }; > > static inline struct ttm_resource_manager *ttm_manager_type(struct ttm_bo_device *bdev, > -- > 2.25.1 > > _______________________________________________ > dri-devel mailing list > dri-devel@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/dri-devel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel