Re: [RFC] [PATCH 3/7 v2] memcg: remove PCG_MOVE_LOCK flag from pc->flags

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jan 13, 2012 at 12:40 AM, KAMEZAWA Hiroyuki
<kamezawa.hiroyu@xxxxxxxxxxxxxx> wrote:
>
> From 1008e84d94245b1e7c4d237802ff68ff00757736 Mon Sep 17 00:00:00 2001
> From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
> Date: Thu, 12 Jan 2012 15:53:24 +0900
> Subject: [PATCH 3/7] memcg: remove PCG_MOVE_LOCK flag from pc->flags.
>
> PCG_MOVE_LOCK bit is used for bit spinlock for avoiding race between
> memcg's account moving and page state statistics updates.
>
> Considering page-statistics update, very hot path, this lock is
> taken only when someone is moving account (or PageTransHuge())
> And, now, all moving-account between memcgroups (by task-move)
> are serialized.

This might be a side question, can you clarify the serialization here?
Does it mean that we only allow one task-move at a time system-wide?

Thanks

--Ying
>
> So, it seems too costly to have 1bit per page for this purpose.
>
> This patch removes PCG_MOVE_LOCK and add hashed rwlock array
> instead of it. This works well enough. Even when we need to
> take the lock, we don't need to disable IRQ in hot path because
> of using rwlock.
>
> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
> ---
>  include/linux/page_cgroup.h |   19 -----------
>  mm/memcontrol.c             |   72 ++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 65 insertions(+), 26 deletions(-)
>
> diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
> index a2d1177..5dba799 100644
> --- a/include/linux/page_cgroup.h
> +++ b/include/linux/page_cgroup.h
> @@ -8,7 +8,6 @@ enum {
>        PCG_USED, /* this object is in use. */
>        PCG_MIGRATION, /* under page migration */
>        /* flags for mem_cgroup and file and I/O status */
> -       PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
>        PCG_FILE_MAPPED, /* page is accounted as "mapped" */
>        __NR_PCG_FLAGS,
>  };
> @@ -95,24 +94,6 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc)
>        bit_spin_unlock(PCG_LOCK, &pc->flags);
>  }
>
> -static inline void move_lock_page_cgroup(struct page_cgroup *pc,
> -       unsigned long *flags)
> -{
> -       /*
> -        * We know updates to pc->flags of page cache's stats are from both of
> -        * usual context or IRQ context. Disable IRQ to avoid deadlock.
> -        */
> -       local_irq_save(*flags);
> -       bit_spin_lock(PCG_MOVE_LOCK, &pc->flags);
> -}
> -
> -static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
> -       unsigned long *flags)
> -{
> -       bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags);
> -       local_irq_restore(*flags);
> -}
> -
>  #else /* CONFIG_CGROUP_MEM_RES_CTLR */
>  struct page_cgroup;
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 9019069..61e276f 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1338,6 +1338,65 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
>        return false;
>  }
>
> +/*
> + * At moving acccounting information between cgroups, we'll have race with
> + * page satus accounting. To avoid that, we need some locks. In general,
> + * ading atomic ops to hot path is very bad. We're using 2 level logic.
> + *
> + * When a thread starts moving account information, per-cpu MEM_CGROUP_ON_MOVE
> + * value is set. If MEM_CGROUP_ON_MOVE==0, there are no race and page status
> + * update can be done withou any locks. If MEM_CGROUP_ON_MOVE>0, we use
> + * following hashed rwlocks.
> + * - At updating information, we hold rlock.
> + * - When a page is picked up and being moved, wlock is held.
> + *
> + * This logic works well enough because moving account is not an usual event.
> + */
> +
> +/*
> + * This rwlock is accessed only when MEM_CGROUP_ON_MOVE > 0.
> + */
> +#define NR_MOVE_ACCOUNT_LOCKS  (NR_CPUS)
> +#define move_account_hash(page) ((page_to_pfn(page) % NR_MOVE_ACCOUNT_LOCKS))
> +static rwlock_t move_account_locks[NR_MOVE_ACCOUNT_LOCKS];
> +
> +static rwlock_t *__mem_cgroup_account_move_lock(struct page *page)
> +{
> +       int hnum = move_account_hash(page);
> +
> +       return &move_account_locks[hnum];
> +}
> +
> +static void mem_cgroup_account_move_rlock(struct page *page)
> +{
> +       read_lock(__mem_cgroup_account_move_lock(page));
> +}
> +
> +static void mem_cgroup_account_move_runlock(struct page *page)
> +{
> +       read_unlock(__mem_cgroup_account_move_lock(page));
> +}
> +
> +static void mem_cgroup_account_move_wlock(struct page *page,
> +               unsigned long *flags)
> +{
> +       write_lock_irqsave(__mem_cgroup_account_move_lock(page), *flags);
> +}
> +
> +static void mem_cgroup_account_move_wunlock(struct page *page,
> +               unsigned long flags)
> +{
> +       write_unlock_irqrestore(__mem_cgroup_account_move_lock(page), flags);
> +}
> +
> +static  void mem_cgroup_account_move_lock_init(void)
> +{
> +       int num;
> +
> +       for (num = 0; num < NR_MOVE_ACCOUNT_LOCKS; num++)
> +               rwlock_init(&move_account_locks[num]);
> +}
> +
>  /**
>  * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode.
>  * @memcg: The memory cgroup that went over limit
> @@ -1859,7 +1918,6 @@ void mem_cgroup_update_page_stat(struct page *page,
>        struct mem_cgroup *memcg;
>        struct page_cgroup *pc = lookup_page_cgroup(page);
>        bool need_unlock = false;
> -       unsigned long uninitialized_var(flags);
>
>        if (mem_cgroup_disabled())
>                return;
> @@ -1871,7 +1929,7 @@ void mem_cgroup_update_page_stat(struct page *page,
>        /* pc->mem_cgroup is unstable ? */
>        if (unlikely(mem_cgroup_stealed(memcg))) {
>                /* take a lock against to access pc->mem_cgroup */
> -               move_lock_page_cgroup(pc, &flags);
> +               mem_cgroup_account_move_rlock(page);
>                need_unlock = true;
>                memcg = pc->mem_cgroup;
>                if (!memcg || !PageCgroupUsed(pc))
> @@ -1894,7 +1952,7 @@ void mem_cgroup_update_page_stat(struct page *page,
>
>  out:
>        if (unlikely(need_unlock))
> -               move_unlock_page_cgroup(pc, &flags);
> +               mem_cgroup_account_move_runlock(page);
>        rcu_read_unlock();
>        return;
>  }
> @@ -2457,8 +2515,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
>
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>
> -#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
> -                       (1 << PCG_MIGRATION))
> +#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) |  (1 << PCG_MIGRATION))
>  /*
>  * Because tail pages are not marked as "used", set it. We're under
>  * zone->lru_lock, 'splitting on pmd' and compound_lock.
> @@ -2537,7 +2594,7 @@ static int mem_cgroup_move_account(struct page *page,
>        if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
>                goto unlock;
>
> -       move_lock_page_cgroup(pc, &flags);
> +       mem_cgroup_account_move_wlock(page, &flags);
>
>        if (PageCgroupFileMapped(pc)) {
>                /* Update mapped_file data for mem_cgroup */
> @@ -2561,7 +2618,7 @@ static int mem_cgroup_move_account(struct page *page,
>         * guaranteed that "to" is never removed. So, we don't check rmdir
>         * status here.
>         */
> -       move_unlock_page_cgroup(pc, &flags);
> +       mem_cgroup_account_move_wunlock(page, flags);
>        ret = 0;
>  unlock:
>        unlock_page_cgroup(pc);
> @@ -4938,6 +4995,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
>                        INIT_WORK(&stock->work, drain_local_stock);
>                }
>                hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
> +               mem_cgroup_account_move_lock_init();
>        } else {
>                parent = mem_cgroup_from_cont(cont->parent);
>                memcg->use_hierarchy = parent->use_hierarchy;
> --
> 1.7.4.1
>
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]