RE: [PATCH] drm/ttm: check if free mem space is under the lower limit

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




-----Original Message-----
From: Koenig, Christian 
Sent: Thursday, February 22, 2018 7:28 PM
To: He, Roger <Hongbo.He@xxxxxxx>; dri-devel@xxxxxxxxxxxxxxxxxxxxx
Subject: Re: [PATCH] drm/ttm: check if free mem space is under the lower limit

Am 22.02.2018 um 11:10 schrieb Roger He:
> the free mem space and the lower limit both include two parts:
> system memory and swap space.
>
> For the OOM triggered by TTM, that is the case as below:
> first swap space is full of swapped out pages and soon system memory 
> also is filled up with ttm pages. and then any memory allocation 
> request will run into OOM.
>
> to cover two cases:
> a. if no swap disk at all or free swap space is under swap mem
>     limit but available system mem is bigger than sys mem limit,
>     allow TTM allocation;
>
> b. if the available system mem is less than sys mem limit but
>     free swap space is bigger than swap mem limit, allow TTM
>     allocation.
>
> v2: merge two memory limit(swap and system) into one
> v3: keep original behavior except ttm_opt_ctx->flags with
>      TTM_OPT_FLAG_FORCE_ALLOC
> v4: always set force_alloc as tx->flags & TTM_OPT_FLAG_FORCE_ALLOC
> v5: add an attribute for lower_mem_limit
>
> Signed-off-by: Roger He <Hongbo.He@xxxxxxx>
> ---
>   drivers/gpu/drm/ttm/ttm_memory.c         | 94 ++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/ttm/ttm_page_alloc.c     |  3 +
>   drivers/gpu/drm/ttm/ttm_page_alloc_dma.c |  3 +
>   include/drm/ttm/ttm_memory.h             |  5 ++
>   4 files changed, 105 insertions(+)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_memory.c 
> b/drivers/gpu/drm/ttm/ttm_memory.c
> index aa0c381..d015e39 100644
> --- a/drivers/gpu/drm/ttm/ttm_memory.c
> +++ b/drivers/gpu/drm/ttm/ttm_memory.c
> @@ -36,6 +36,7 @@
>   #include <linux/mm.h>
>   #include <linux/module.h>
>   #include <linux/slab.h>
> +#include <linux/swap.h>
>   
>   #define TTM_MEMORY_ALLOC_RETRIES 4
>   
> @@ -166,6 +167,52 @@ static struct kobj_type ttm_mem_zone_kobj_type = {
>   	.default_attrs = ttm_mem_zone_attrs,
>   };
>   
> +static struct attribute ttm_mem_global_lower_mem_limit = {
> +	.name = "lower_mem_limit",
> +	.mode = S_IRUGO | S_IWUSR
> +};
> +
> +static ssize_t ttm_mem_global_show(struct kobject *kobj,
> +				 struct attribute *attr,
> +				 char *buffer)
> +{
> +	struct ttm_mem_global *glob =
> +		container_of(kobj, struct ttm_mem_global, kobj);
> +	uint64_t val = 0;
> +
> +	spin_lock(&glob->lock);
> +	val = glob->lower_mem_limit;
> +	spin_unlock(&glob->lock);
> +
> +	return snprintf(buffer, PAGE_SIZE, "%llu\n",
> +			(unsigned long long) val << 2);

	What is that shift good for?

Because the unit of lower_mem_limit is 4KB, so (val << 2) can get KB for consistent with other parameters as below(all are showed with KB bytes.):

static struct attribute *ttm_mem_zone_attrs[] = {
	&ttm_mem_sys,
	&ttm_mem_emer,
	&ttm_mem_max,
	&ttm_mem_swap,
	&ttm_mem_used,
	NULL
};
 
> +}
> +
> +static ssize_t ttm_mem_global_store(struct kobject *kobj,
> +				  struct attribute *attr,
> +				  const char *buffer,
> +				  size_t size)
> +{
> +	int chars;
> +	uint64_t val64;
> +	unsigned long val;
> +	struct ttm_mem_global *glob =
> +		container_of(kobj, struct ttm_mem_global, kobj);
> +
> +	chars = sscanf(buffer, "%lu", &val);
> +	if (chars == 0)
> +		return size;
> +
> +	val64 = val;
> +	val64 >>= 2;

	Dito?
Covert from KB to 4K page size here.

> +
> +	spin_lock(&glob->lock);
> +	glob->lower_mem_limit = val64;
> +	spin_unlock(&glob->lock);
> +
> +	return size;
> +}
> +
>   static void ttm_mem_global_kobj_release(struct kobject *kobj)
>   {
>   	struct ttm_mem_global *glob =
> @@ -174,8 +221,20 @@ static void ttm_mem_global_kobj_release(struct kobject *kobj)
>   	kfree(glob);
>   }
>   
> +static struct attribute *ttm_mem_global_attrs[] = {
> +	&ttm_mem_global_lower_mem_limit,
> +	NULL
> +};
> +
> +static const struct sysfs_ops ttm_mem_global_ops = {
> +	.show = &ttm_mem_global_show,
> +	.store = &ttm_mem_global_store,
> +};
> +
>   static struct kobj_type ttm_mem_glob_kobj_type = {
>   	.release = &ttm_mem_global_kobj_release,
> +	.sysfs_ops = &ttm_mem_global_ops,
> +	.default_attrs = ttm_mem_global_attrs,
>   };
>   
>   static bool ttm_zones_above_swap_target(struct ttm_mem_global *glob, 
> @@ -375,6 +434,11 @@ int ttm_mem_global_init(struct ttm_mem_global 
> *glob)
>   
>   	si_meminfo(&si);
>   
> +	/* lower limit of swap space and 256MB is enough */
> +	glob->lower_mem_limit = 256 << 8;
> +	/* lower limit of ram and keep consistent with each zone->emer_mem */
> +	glob->lower_mem_limit += si.totalram >> 2;
> +

	Mhm, I fear we need to set this to zero by default.

Do you mean:  glob->lower_mem_limit = 0  ?
I don't get your point here. Then how amdgpu set it with above value to prevent OOM?

Thanks
Roger(Hongbo.He)
>   	ret = ttm_mem_init_kernel_zone(glob, &si);
>   	if (unlikely(ret != 0))
>   		goto out_no_zone;
> @@ -469,6 +533,36 @@ void ttm_mem_global_free(struct ttm_mem_global *glob,
>   }
>   EXPORT_SYMBOL(ttm_mem_global_free);
>   
> +/*
> + * check if the available mem is under lower memory limit
> + *
> + * a. if no swap disk at all or free swap space is under 
> +swap_mem_limit
> + * but available system mem is bigger than sys_mem_limit, allow TTM
> + * allocation;
> + *
> + * b. if the available system mem is less than sys_mem_limit but free
> + * swap disk is bigger than swap_mem_limit, allow TTM allocation.
> + */
> +bool
> +ttm_check_under_lowerlimit(struct ttm_mem_global *glob,
> +			uint64_t num_pages,
> +			struct ttm_operation_ctx *ctx)
> +{
> +	bool ret = false;
> +	int64_t available;
> +
> +	if (ctx->flags & TTM_OPT_FLAG_FORCE_ALLOC)
> +		return false;
> +
> +	available = get_nr_swap_pages() + si_mem_available();
> +	available -= num_pages;
> +	if (available < glob->lower_mem_limit)
> +		ret = true;
> +
> +	return ret;

Don't use a local variable, just use "return true" / "return false;".

Regards,
Christian.

> +}
> +EXPORT_SYMBOL(ttm_check_under_lowerlimit);
> +
>   static int ttm_mem_global_reserve(struct ttm_mem_global *glob,
>   				  struct ttm_mem_zone *single_zone,
>   				  uint64_t amount, bool reserve) diff --git 
> a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
> b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> index 5edcd89..c9d8903 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> @@ -1100,6 +1100,9 @@ int ttm_pool_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
>   	if (ttm->state != tt_unpopulated)
>   		return 0;
>   
> +	if (ttm_check_under_lowerlimit(mem_glob, ttm->num_pages, ctx))
> +		return -ENOMEM;
> +
>   	ret = ttm_get_pages(ttm->pages, ttm->num_pages, ttm->page_flags,
>   			    ttm->caching_state);
>   	if (unlikely(ret != 0)) {
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c 
> b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> index b122f6e..37e03d9 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> @@ -940,6 +940,9 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev,
>   	if (ttm->state != tt_unpopulated)
>   		return 0;
>   
> +	if (ttm_check_under_lowerlimit(mem_glob, num_pages, ctx))
> +		return -ENOMEM;
> +
>   	INIT_LIST_HEAD(&ttm_dma->pages_list);
>   	i = 0;
>   
> diff --git a/include/drm/ttm/ttm_memory.h 
> b/include/drm/ttm/ttm_memory.h index 8936285..737b5fe 100644
> --- a/include/drm/ttm/ttm_memory.h
> +++ b/include/drm/ttm/ttm_memory.h
> @@ -49,6 +49,8 @@
>    * @work: The workqueue callback for the shrink queue.
>    * @lock: Lock to protect the @shrink - and the memory accounting members,
>    * that is, essentially the whole structure with some exceptions.
> + * @lower_mem_limit: include lower limit of swap space and lower 
> + limit of
> + * system memory.
>    * @zones: Array of pointers to accounting zones.
>    * @num_zones: Number of populated entries in the @zones array.
>    * @zone_kernel: Pointer to the kernel zone.
> @@ -67,6 +69,7 @@ struct ttm_mem_global {
>   	struct workqueue_struct *swap_queue;
>   	struct work_struct work;
>   	spinlock_t lock;
> +	uint64_t lower_mem_limit;
>   	struct ttm_mem_zone *zones[TTM_MEM_MAX_ZONES];
>   	unsigned int num_zones;
>   	struct ttm_mem_zone *zone_kernel;
> @@ -90,4 +93,6 @@ extern void ttm_mem_global_free_page(struct ttm_mem_global *glob,
>   				     struct page *page, uint64_t size);
>   extern size_t ttm_round_pot(size_t size);
>   extern uint64_t ttm_get_kernel_zone_memory_size(struct 
> ttm_mem_global *glob);
> +extern bool ttm_check_under_lowerlimit(struct ttm_mem_global *glob,
> +			uint64_t num_pages, struct ttm_operation_ctx *ctx);
>   #endif

_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/dri-devel




[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux