On Mon, 20 Feb 2012 15:34:28 -0800 (PST) Hugh Dickins <hughd@xxxxxxxxxx> wrote: return NULL; > > + lruvec = page_lock_lruvec(page); > lock_page_cgroup(pc); > Do we need to take lrulock+irq disable per page in this very very hot path ? Hmm.... How about adding NR_ISOLATED counter into lruvec ? Then, we can delay freeing lruvec until all conunters goes down to zero. as... bool we_can_free_lruvec = true; lock_lruvec(lruvec->lock); for_each_lru_lruvec(lru) if (!list_empty(&lruvec->lru[lru])) we_can_free_lruvec = false; if (lruvec->nr_isolated) we_can_free_lruvec = false; unlock_lruvec(lruvec) if (we_can_free_lruvec) kfree(lruvec); If compaction, lumpy reclaim free a page taken from LRU, it knows what it does and can decrement lruvec->nr_isolated properly (it seems zone's NR_ISOLATED is decremented at putback.) Thanks, -Kame > memcg = pc->mem_cgroup; > @@ -2915,14 +2944,17 @@ __mem_cgroup_uncharge_common(struct page > mem_cgroup_charge_statistics(memcg, anon, -nr_pages); > > ClearPageCgroupUsed(pc); > + > /* > - * pc->mem_cgroup is not cleared here. It will be accessed when it's > - * freed from LRU. This is safe because uncharged page is expected not > - * to be reused (freed soon). Exception is SwapCache, it's handled by > - * special functions. > + * Once an uncharged page is isolated from the mem_cgroup's lru, > + * it no longer protects that mem_cgroup from rmdir: reset to root. > */ > + if (!PageLRU(page) && pc->mem_cgroup != root_mem_cgroup) > + pc->mem_cgroup = root_mem_cgroup; > > unlock_page_cgroup(pc); > + unlock_lruvec(lruvec); > + > /* > * even after unlock, we have memcg->res.usage here and this memcg > * will never be freed. > @@ -2939,6 +2971,7 @@ __mem_cgroup_uncharge_common(struct page > > unlock_out: > unlock_page_cgroup(pc); > + unlock_lruvec(lruvec); > return NULL; > } > > @@ -3327,7 +3360,9 @@ static struct page_cgroup *lookup_page_c > * the first time, i.e. during boot or memory hotplug; > * or when mem_cgroup_disabled(). > */ > - if (likely(pc) && PageCgroupUsed(pc)) > + if (!pc || PageCgroupUsed(pc)) > + return pc; > + if (pc->mem_cgroup && pc->mem_cgroup != root_mem_cgroup) > return pc; > return NULL; > } > --- mmotm.orig/mm/swap.c 2012-02-18 11:57:42.679524592 -0800 > +++ mmotm/mm/swap.c 2012-02-18 11:57:49.107524745 -0800 > @@ -52,6 +52,7 @@ static void __page_cache_release(struct > lruvec = page_lock_lruvec(page); > VM_BUG_ON(!PageLRU(page)); > __ClearPageLRU(page); > + mem_cgroup_reset_uncharged_to_root(page); > del_page_from_lru_list(page, lruvec, page_off_lru(page)); > unlock_lruvec(lruvec); > } > @@ -583,6 +584,7 @@ void release_pages(struct page **pages, > page_relock_lruvec(page, &lruvec); > VM_BUG_ON(!PageLRU(page)); > __ClearPageLRU(page); > + mem_cgroup_reset_uncharged_to_root(page); > del_page_from_lru_list(page, lruvec, page_off_lru(page)); > } > > --- mmotm.orig/mm/vmscan.c 2012-02-18 11:57:42.679524592 -0800 > +++ mmotm/mm/vmscan.c 2012-02-18 11:57:49.107524745 -0800 > @@ -1087,11 +1087,11 @@ int __isolate_lru_page(struct page *page > > if (likely(get_page_unless_zero(page))) { > /* > - * Be careful not to clear PageLRU until after we're > - * sure the page is not being freed elsewhere -- the > - * page release code relies on it. > + * Beware of interface change: now leave ClearPageLRU(page) > + * to the caller, because memcg's lumpy and compaction > + * cases (approaching the page by its physical location) > + * may not have the right lru_lock yet. > */ > - ClearPageLRU(page); > ret = 0; > } > > @@ -1154,7 +1154,16 @@ static unsigned long isolate_lru_pages(u > > switch (__isolate_lru_page(page, mode, file)) { > case 0: > +#ifdef CONFIG_DEBUG_VM > + /* check lock on page is lock we already got */ > + page_relock_lruvec(page, &lruvec); > + BUG_ON(lruvec != home_lruvec); > + BUG_ON(page != lru_to_page(src)); > + BUG_ON(page_lru(page) != lru); > +#endif > + ClearPageLRU(page); > isolated_pages = hpage_nr_pages(page); > + mem_cgroup_reset_uncharged_to_root(page); > mem_cgroup_update_lru_size(lruvec, lru, -isolated_pages); > list_move(&page->lru, dst); > nr_taken += isolated_pages; > @@ -1211,21 +1220,7 @@ static unsigned long isolate_lru_pages(u > !PageSwapCache(cursor_page)) > break; > > - if (__isolate_lru_page(cursor_page, mode, file) == 0) { > - mem_cgroup_page_relock_lruvec(cursor_page, > - &lruvec); > - isolated_pages = hpage_nr_pages(cursor_page); > - mem_cgroup_update_lru_size(lruvec, > - page_lru(cursor_page), -isolated_pages); > - list_move(&cursor_page->lru, dst); > - > - nr_taken += isolated_pages; > - nr_lumpy_taken += isolated_pages; > - if (PageDirty(cursor_page)) > - nr_lumpy_dirty += isolated_pages; > - scan++; > - pfn += isolated_pages - 1; > - } else { > + if (__isolate_lru_page(cursor_page, mode, file) != 0) { > /* > * Check if the page is freed already. > * > @@ -1243,13 +1238,50 @@ static unsigned long isolate_lru_pages(u > continue; > break; > } > + > + /* > + * This locking call is a no-op in the non-memcg > + * case, since we already hold the right lru_lock; > + * but it may change the lock in the memcg case. > + * It is then vital to recheck PageLRU (but not > + * necessary to recheck isolation mode). > + */ > + mem_cgroup_page_relock_lruvec(cursor_page, &lruvec); > + > + if (PageLRU(cursor_page) && > + !PageUnevictable(cursor_page)) { > + ClearPageLRU(cursor_page); > + isolated_pages = hpage_nr_pages(cursor_page); > + mem_cgroup_reset_uncharged_to_root(cursor_page); > + mem_cgroup_update_lru_size(lruvec, > + page_lru(cursor_page), -isolated_pages); > + list_move(&cursor_page->lru, dst); > + > + nr_taken += isolated_pages; > + nr_lumpy_taken += isolated_pages; > + if (PageDirty(cursor_page)) > + nr_lumpy_dirty += isolated_pages; > + scan++; > + pfn += isolated_pages - 1; > + } else { > + /* Cannot hold lru_lock while freeing page */ > + unlock_lruvec(lruvec); > + lruvec = NULL; > + put_page(cursor_page); > + break; > + } > } > > /* If we break out of the loop above, lumpy reclaim failed */ > if (pfn < end_pfn) > nr_lumpy_failed++; > > - lruvec = home_lruvec; > + if (lruvec != home_lruvec) { > + if (lruvec) > + unlock_lruvec(lruvec); > + lruvec = home_lruvec; > + lock_lruvec(lruvec); > + } > } > > *nr_scanned = scan; > @@ -1301,6 +1333,7 @@ int isolate_lru_page(struct page *page) > int lru = page_lru(page); > get_page(page); > ClearPageLRU(page); > + mem_cgroup_reset_uncharged_to_root(page); > del_page_from_lru_list(page, lruvec, lru); > ret = 0; > } > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>