This is an experimental patch for drop pages in empty cgroup. comments ? == An experimental patch. Drop all pages in memcontrol cgroup if cgroup's task is empty. Please type "sync" before try to drop. Unless sync, maybe -EBUSY will return. Problem: not handle mlocked pages now. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> --- include/linux/memcontrol.h | 13 ++++- mm/memcontrol.c | 113 +++++++++++++++++++++++++++++++++++++++++++++ mm/vmscan.c | 16 +++++- 3 files changed, 140 insertions(+), 2 deletions(-) Index: linux-2.6.23-rc8-mm1/mm/memcontrol.c =================================================================== --- linux-2.6.23-rc8-mm1.orig/mm/memcontrol.c +++ linux-2.6.23-rc8-mm1/mm/memcontrol.c @@ -63,6 +63,7 @@ struct mem_cgroup { */ spinlock_t lru_lock; unsigned long control_type; /* control RSS or RSS+Pagecache */ + unsigned long force_drop; }; /* @@ -135,6 +136,31 @@ static inline int page_cgroup_locked(str &page->page_cgroup); } +static inline unsigned long long mem_cgroup_usage(struct mem_cgroup *mem) +{ + return mem->res.usage; +} + +int mem_cgroup_reclaim_end(struct mem_cgroup *mem) +{ + if (!mem) + return 0; + if (mem_cgroup_usage(mem) == 0) + return 1; + return 0; +} + +int mem_cgroup_force_reclaim(struct mem_cgroup *mem) +{ + if (!mem) + return 0; + /* Need more precise check if LRU is separated. */ + if (mem->force_drop) + return 1; + else + return 0; +} + void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) { int locked; @@ -437,6 +463,52 @@ void mem_cgroup_uncharge(struct page_cgr } } +/* + * Drop all pages + * # of tasks in this cgroup must be 0 before call this. + */ +int mem_cgroup_drop(struct mem_cgroup *mem) +{ + + unsigned long long before; + struct cgroup *cg = mem->css.cgroup; + int ret = -EBUSY; + unsigned long expire = jiffies + 30 * HZ; /* just pseudo value */ + + css_get(&mem->css); +retry: + /* disallow if there is the task. */ + if (atomic_read(&cg->count)) + goto end; + /* + * We have to call try_to_free_mem_cgroup_pages() several times. + * Especially when there is write-back pages. + */ + if (time_after(jiffies, expire)) + goto end; + + before = mem_cgroup_usage(mem); + + if (before == 0) { + ret = 0; + goto end; + } + mem->force_drop = 1; + if (try_to_free_mem_cgroup_pages(mem, GFP_HIGHUSER_MOVABLE) == 0) + congestion_wait(WRITE, HZ/10); + mem->force_drop = 0; + + /* made some progress */ + if (mem_cgroup_usage(mem) <= before) + goto retry; + +end: + css_put(&mem->css); + return ret; +} + + + int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) { *tmp = memparse(buf, &buf); @@ -522,6 +594,41 @@ static ssize_t mem_control_type_read(str ppos, buf, s - buf); } +static ssize_t mem_drop_type_write(struct cgroup *cont, + struct cftype *cft, struct file *file, + const char __user *userbuf, + size_t nbytes, loff_t *pos) +{ + struct mem_cgroup *mem; + int ret; + char *buf, *end; + unsigned long tmp; + + mem = mem_cgroup_from_cont(cont); + buf = kmalloc(nbytes + 1, GFP_KERNEL); + ret = -ENOMEM; + if (buf == NULL) + goto out; + buf[nbytes] = 0; + ret = -EFAULT; + if (copy_from_user(buf, userbuf, nbytes)) + goto out_free; + ret = -EINVAL; + tmp = simple_strtoul(buf, &end, 10); + + if (*end != '\0') + goto out_free; + if (tmp) { + ret = mem_cgroup_drop(mem); + if (!ret) + ret = nbytes; + } +out_free: + kfree(buf); +out: + return ret; +} + static struct cftype mem_cgroup_files[] = { { .name = "usage_in_bytes", @@ -544,6 +651,11 @@ static struct cftype mem_cgroup_files[] .write = mem_control_type_write, .read = mem_control_type_read, }, + { + .name = "drop_in_force", + .write = mem_drop_type_write, + .read = mem_cgroup_read, + }, }; static struct mem_cgroup init_mem_cgroup; @@ -567,6 +679,7 @@ mem_cgroup_create(struct cgroup_subsys * INIT_LIST_HEAD(&mem->inactive_list); spin_lock_init(&mem->lru_lock); mem->control_type = MEM_CGROUP_TYPE_ALL; + mem->force_drop = 0; return &mem->css; } Index: linux-2.6.23-rc8-mm1/include/linux/memcontrol.h =================================================================== --- linux-2.6.23-rc8-mm1.orig/include/linux/memcontrol.h +++ linux-2.6.23-rc8-mm1/include/linux/memcontrol.h @@ -47,6 +47,10 @@ extern int mem_cgroup_cache_charge(struc gfp_t gfp_mask); extern struct mem_cgroup *mm_cgroup(struct mm_struct *mm); +/* called when page reclaim has no progress in mem cgroup */ +extern int mem_cgroup_reclaim_end(struct mem_cgroup *mem); +extern int mem_cgroup_force_reclaim(struct mem_cgroup *mem); + static inline void mem_cgroup_uncharge_page(struct page *page) { mem_cgroup_uncharge(page_get_page_cgroup(page)); @@ -102,7 +106,14 @@ static inline struct mem_cgroup *mm_cgro { return NULL; } - +static inline int mem_cgroup_force_reclaim(struct mem_cgroup *mem) +{ + return 0; +} +static inline int mem_cgroup_reclaim_end(struct mem_cgroup *mem) +{ + return 0; +} #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ Index: linux-2.6.23-rc8-mm1/mm/vmscan.c =================================================================== --- linux-2.6.23-rc8-mm1.orig/mm/vmscan.c +++ linux-2.6.23-rc8-mm1/mm/vmscan.c @@ -1168,6 +1168,13 @@ static unsigned long shrink_zone(int pri zone->nr_scan_inactive = 0; else nr_inactive = 0; + /* TODO: we need to know # of pages to be reclaimed per group */ + if (mem_cgroup_force_reclaim(sc->mem_cgroup)) { + if (!nr_active) + nr_active = sc->swap_cluster_max; + if (!nr_inactive) + nr_inactive = sc->swap_cluster_max; + } while (nr_active || nr_inactive) { if (nr_active) { @@ -1256,6 +1263,7 @@ static unsigned long do_try_to_free_page int ret = 0; unsigned long total_scanned = 0; unsigned long nr_reclaimed = 0; + unsigned long progress; struct reclaim_state *reclaim_state = current->reclaim_state; unsigned long lru_pages = 0; int i; @@ -1276,7 +1284,8 @@ static unsigned long do_try_to_free_page sc->nr_scanned = 0; if (!priority) disable_swap_token(); - nr_reclaimed += shrink_zones(priority, zones, sc); + progress = shrink_zones(priority, zones, sc); + nr_reclaimed += progress; /* * Don't shrink slabs when reclaiming memory from * over limit cgroups @@ -1292,6 +1301,11 @@ static unsigned long do_try_to_free_page ret = 1; goto out; } + if (progress == 0 && + mem_cgroup_reclaim_end(sc->mem_cgroup)) { + ret = 1; + goto out; + } /* * Try to write back as many pages as we just scanned. This _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers