Re: [PATCH 6/7] memcg: remove pre_destroy()

Ying Han <yinghan@xxxxxxxxxx> · Tue, 17 Apr 2012 10:47:13 -0700

On Thu, Apr 12, 2012 at 4:30 AM, KAMEZAWA Hiroyuki
<kamezawa.hiroyu@xxxxxxxxxxxxxx> wrote:
> Tejun Heo, cgroup maintainer, tries to remove ->pre_destroy() to
> prevent rmdir() from failure of EBUSY or some.
>
> This patch removes pre_destroy() in memcg. All remaining charges
> will be moved to other cgroup, without any failure,  ->destroy()
> just schedule a work and it will destroy the memcg.
> Then, rmdir will never fail. The kernel will take care of remaining
> resources in the cgroup to be accounted correctly.
>
> After this patch, memcg will be destroyed by workqueue in asynchrnous way.

Is it necessary to change the destroy asynchronously?

Frankly, i don't that that much. It will leave the system in a
deterministic state on admin perspective. The current synchronous
destroy works fine, and admin can rely on that w/ charging change
after the destroy returns.

--Ying

> Then, we can modify 'moving' logic to work asynchrnously, i.e,
> we don't force users to wait for the end of rmdir(), now. We don't
> need to use heavy synchronous calls. This patch modifies logics as
>
>  - Use mem_cgroup_drain_stock_async rather tan drain_stock_sync.
>  - lru_add_drain_all() will be called only when necessary, in a lazy way.
>
> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
> ---
>  mm/memcontrol.c |   52 ++++++++++++++++++++++------------------------------
>  1 files changed, 22 insertions(+), 30 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 22c8faa..e466809 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -315,6 +315,8 @@ struct mem_cgroup {
>  #ifdef CONFIG_INET
>        struct tcp_memcontrol tcp_mem;
>  #endif
> +
> +       struct work_struct work_destroy;
>  };
>
>  /* Stuffs for move charges at task migration. */
> @@ -2105,7 +2107,6 @@ static void drain_all_stock_async(struct mem_cgroup *root_memcg)
>        mutex_unlock(&percpu_charge_mutex);
>  }
>
> -/* This is a synchronous drain interface. */
>  static void drain_all_stock_sync(struct mem_cgroup *root_memcg)
>  {
>        /* called when force_empty is called */
> @@ -3661,10 +3662,9 @@ static int mem_cgroup_recharge_lru(struct mem_cgroup *memcg,
>                pc = lookup_page_cgroup(page);
>
>                ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL);
> -               if (ret == -EINTR)
> -                       break;
>
> -               if (ret == -EBUSY || ret == -EINVAL) {
> +               VM_BUG_ON(ret != 0 && ret != -EBUSY);
> +               if (ret) {
>                        /* found lock contention or "pc" is obsolete. */
>                        busy = page;
>                        cond_resched();
> @@ -3677,22 +3677,19 @@ static int mem_cgroup_recharge_lru(struct mem_cgroup *memcg,
>        return ret;
>  }
>
> -
> -static int mem_cgroup_recharge(struct mem_cgroup *memcg)
> +/*
> + * This function is called after ->destroy(). So, we cannot access cgroup
> + * of this memcg.
> + */
> +static void mem_cgroup_recharge(struct work_struct *work)
>  {
> +       struct mem_cgroup *memcg;
>        int ret, node, zid;
> -       struct cgroup *cgrp = memcg->css.cgroup;
>
> +       memcg = container_of(work, struct mem_cgroup, work_destroy);
> +       /* No task points this memcg. call this only once */
> +       drain_all_stock_async(memcg);
>        do {
> -               ret = -EBUSY;
> -               if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))
> -                       goto out;
> -               ret = -EINTR;
> -               if (signal_pending(current))
> -                       goto out;
> -               /* This is for making all *used* pages to be on LRU. */
> -               lru_add_drain_all();
> -               drain_all_stock_sync(memcg);
>                ret = 0;
>                mem_cgroup_start_move(memcg);
>                for_each_node_state(node, N_HIGH_MEMORY) {
> @@ -3710,13 +3707,14 @@ static int mem_cgroup_recharge(struct mem_cgroup *memcg)
>                }
>                mem_cgroup_end_move(memcg);
>                cond_resched();
> -       /* "ret" should also be checked to ensure all lists are empty. */
> -       } while (memcg->res.usage > 0 || ret);
> -out:
> -       return ret;
> +               /* drain LRU only when we canoot find pages on LRU */
> +               if (res_counter_read_u64(&memcg->res, RES_USAGE) &&
> +                   !mem_cgroup_nr_lru_pages(memcg, LRU_ALL))
> +                       lru_add_drain_all();
> +       } while (res_counter_read_u64(&memcg->res, RES_USAGE) || ret);
> +       mem_cgroup_put(memcg);
>  }
>
> -
>  /*
>  * make mem_cgroup's charge to be 0 if there is no task. This is only called
>  * by memory.force_empty file, an user request.
> @@ -4803,6 +4801,7 @@ static void vfree_work(struct work_struct *work)
>        memcg = container_of(work, struct mem_cgroup, work_freeing);
>        vfree(memcg);
>  }
> +
>  static void vfree_rcu(struct rcu_head *rcu_head)
>  {
>        struct mem_cgroup *memcg;
> @@ -4982,20 +4981,14 @@ free_out:
>        return ERR_PTR(error);
>  }
>
> -static int mem_cgroup_pre_destroy(struct cgroup *cont)
> -{
> -       struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
> -
> -       return mem_cgroup_recharge(memcg);
> -}
> -
>  static void mem_cgroup_destroy(struct cgroup *cont)
>  {
>        struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
>
>        kmem_cgroup_destroy(cont);
>
> -       mem_cgroup_put(memcg);
> +       INIT_WORK(&memcg->work_destroy, mem_cgroup_recharge);
> +       schedule_work(&memcg->work_destroy);
>  }
>
>  static int mem_cgroup_populate(struct cgroup_subsys *ss,
> @@ -5589,7 +5582,6 @@ struct cgroup_subsys mem_cgroup_subsys = {
>        .name = "memory",
>        .subsys_id = mem_cgroup_subsys_id,
>        .create = mem_cgroup_create,
> -       .pre_destroy = mem_cgroup_pre_destroy,
>        .destroy = mem_cgroup_destroy,
>        .populate = mem_cgroup_populate,
>        .can_attach = mem_cgroup_can_attach,
> --
> 1.7.4.1
>
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
> Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href