One of contended lock is counter->lock. Now, counter->usage is changed by PAGE_SIZE. This patch changes this to be PAGE_SIZE * borrow_factor and cache "borrow" in per cpu area. This reduce # of lock against counter->lock. Signed-off-by: KAMEZAWA Hiroyuki <kaemzawa.hiroyu@xxxxxxxxxxxxxx> Index: linux-2.6.24-mm1/mm/memcontrol.c =================================================================== --- linux-2.6.24-mm1.orig/mm/memcontrol.c +++ linux-2.6.24-mm1/mm/memcontrol.c @@ -47,7 +47,8 @@ enum mem_cgroup_stat_index { */ MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ MEM_CGROUP_STAT_RSS, /* # of pages charged as rss */ - + MEM_CGROUP_STAT_BORROW, /* # of per-cpu borrow resource from + global resource */ MEM_CGROUP_STAT_NSTATS, }; @@ -134,6 +135,9 @@ struct mem_cgroup { struct mem_cgroup_lru_info info; int prev_priority; /* for recording reclaim priority */ + + int borrow_unit; /* size of unit for borrowing resource */ + /* * statistics. */ @@ -611,6 +615,92 @@ unsigned long mem_cgroup_isolate_pages(u return nr_taken; } +/* FIXME? we assume that size is always PAGE_SIZE. */ + +static int mem_cgroup_borrow_and_charge(struct mem_cgroup *mem, int size) +{ + unsigned long flags; + int ret; + + ret = 0; + + local_irq_save(flags); + if (mem->borrow_unit) { + int cpu; + s64 *bwp; + cpu = smp_processor_id(); + bwp = &mem->stat.cpustat[cpu].count[MEM_CGROUP_STAT_BORROW]; + if (*bwp > size) { + *bwp -= size; + goto out; + } + /* try to charge */ + ret = res_counter_charge(&mem->res, mem->borrow_unit); + if (!ret) { /* success */ + *bwp += (mem->borrow_unit - size); + goto out; + } + } + spin_lock(&mem->res.lock); + ret = res_counter_charge_locked(&mem->res, size); + spin_unlock(&mem->res.lock); +out: + local_irq_restore(flags); + return ret; +} + +static void mem_cgroup_return_and_uncharge(struct mem_cgroup *mem, int size) +{ + unsigned long flags; + int uncharge_size = 0; + + local_irq_save(flags); + if (mem->borrow_unit) { + int limit = mem->borrow_unit * 2; + int cpu; + s64 *bwp; + cpu = smp_processor_id(); + bwp = &mem->stat.cpustat[cpu].count[MEM_CGROUP_STAT_BORROW]; + *bwp += size; + if (*bwp > limit) { + uncharge_size = *bwp - mem->borrow_unit; + *bwp = mem->borrow_unit; + } + } else + uncharge_size = size; + + if (uncharge_size) { + spin_lock(&mem->res.lock); + res_counter_uncharge_locked(&mem->res, size); + spin_unlock(&mem->res.lock); + } + local_irq_restore(flags); + + return; +} + +static void drain_local_borrow(void *data) +{ + int cpu; + int borrow; + unsigned long flags; + struct mem_cgroup *mem = data; + + local_irq_save(flags); + cpu = smp_processor_id(); + borrow = mem->stat.cpustat[cpu].count[MEM_CGROUP_STAT_BORROW]; + mem->stat.cpustat[cpu].count[MEM_CGROUP_STAT_BORROW] = 0; + spin_lock(&mem->res.lock); + res_counter_uncharge_locked(&mem->res, borrow); + spin_unlock(&mem->res.lock); + local_irq_restore(flags); +} + +static void drain_all_borrow(struct mem_cgroup *mem) +{ + on_each_cpu(drain_local_borrow, mem, 0, 1); +} + /* * Charge the memory controller for page usage. * Return @@ -681,7 +771,7 @@ retry: * If we created the page_cgroup, we should free it on exceeding * the cgroup limit. */ - while (res_counter_charge(&mem->res, PAGE_SIZE)) { + while (mem_cgroup_borrow_and_charge(mem, PAGE_SIZE)) { int ret; if (!(gfp_mask & __GFP_WAIT)) goto out; @@ -709,6 +799,8 @@ retry: if (res_counter_check_under_limit(&mem->res)) continue; + if (nr_retries < MEM_CGROUP_RECLAIM_RETRIES) + drain_all_borrow(mem); if (!nr_retries--) { mem_cgroup_out_of_memory(mem, gfp_mask); goto out; @@ -805,7 +897,7 @@ void mem_cgroup_uncharge(struct page_cgr if (clear_page_cgroup(page, pc) == pc) { mem = pc->mem_cgroup; css_put(&mem->css); - res_counter_uncharge(&mem->res, PAGE_SIZE); + mem_cgroup_return_and_uncharge(mem, PAGE_SIZE); spin_lock_irqsave(&mz->lru_lock, flags); __mem_cgroup_remove_list(pc); spin_unlock_irqrestore(&mz->lru_lock, flags); @@ -1005,6 +1097,7 @@ int mem_cgroup_force_empty(struct mem_cg /* drop all page_cgroup in inactive_list */ mem_cgroup_force_empty_list(mem, mz, 0); } + drain_all_borrow(mem); } ret = 0; out: @@ -1109,12 +1202,29 @@ static u64 mem_throttle_read(struct cgro return (u64)mem->throttle.limit; } +static int mem_bulkratio_write(struct cgroup *cont, struct cftype *cft, u64 val) +{ + struct mem_cgroup *mem = mem_cgroup_from_cont(cont); + int unit = val * PAGE_SIZE; + if (unit > (PAGE_SIZE << (MAX_ORDER/2))) + return -EINVAL; + mem->borrow_unit = unit; + return 0; +} + +static u64 mem_bulkratio_read(struct cgroup *cont, struct cftype *cft) +{ + struct mem_cgroup *mem = mem_cgroup_from_cont(cont); + return (u64)(mem->borrow_unit/PAGE_SIZE); +} + static const struct mem_cgroup_stat_desc { const char *msg; u64 unit; } mem_cgroup_stat_desc[] = { [MEM_CGROUP_STAT_CACHE] = { "cache", PAGE_SIZE, }, [MEM_CGROUP_STAT_RSS] = { "rss", PAGE_SIZE, }, + [MEM_CGROUP_STAT_BORROW] = { "borrow", 1, }, }; static int mem_control_stat_show(struct seq_file *m, void *arg) @@ -1205,6 +1315,11 @@ static struct cftype mem_cgroup_files[] .read_uint = mem_throttle_read, }, { + .name = "bulkratio", + .write_uint = mem_bulkratio_write, + .read_uint = mem_bulkratio_read, + }, + { .name = "stat", .open = mem_control_stat_open, }, @@ -1279,6 +1394,8 @@ mem_cgroup_create(struct cgroup_subsys * mem->throttle.limit = 10240; /* maybe enough big for no throttle */ atomic_set(&mem->throttle.reclaimers, 0); + mem->borrow_unit = 0; /* Work at strict/precise mode as default */ + return &mem->css; free_out: for_each_node_state(node, N_POSSIBLE) _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers