On Wed, Jan 23, 2013 at 03:16:45PM +0900, Minchan Kim wrote: > Looks good to me. Below just nitpicks. > I saw Andrew already took this into mmotm so I'm not sure he or you will do > next spin but anyway, review goes. Just nitpicks and a question. > > On Tue, Jan 22, 2013 at 10:29:51AM +0800, Shaohua Li wrote: > > > > When I use several fast SSD to do swap, swapper_space.tree_lock is heavily > > contended. This makes each swap partition have one address_space to reduce the > > lock contention. There is an array of address_space for swap. The swap entry > > type is the index to the array. > > > > In my test with 3 SSD, this increases the swapout throughput 20%. > > > > V1->V2: simplify code > > > > Signed-off-by: Shaohua Li <shli@xxxxxxxxxxxx> > > Acked-by: Minchan Kim <minchan@xxxxxxxxxx> > > > --- > > fs/proc/meminfo.c | 4 +-- > > include/linux/swap.h | 9 ++++---- > > mm/memcontrol.c | 4 +-- > > mm/mincore.c | 5 ++-- > > mm/swap.c | 9 ++++++-- > > mm/swap_state.c | 57 ++++++++++++++++++++++++++++++++++----------------- > > mm/swapfile.c | 5 ++-- > > mm/util.c | 10 ++++++-- > > 8 files changed, 68 insertions(+), 35 deletions(-) > > > > Index: linux/include/linux/swap.h > > =================================================================== > > --- linux.orig/include/linux/swap.h 2013-01-22 09:13:14.000000000 +0800 > > +++ linux/include/linux/swap.h 2013-01-22 09:34:44.923011706 +0800 > > @@ -8,7 +8,7 @@ > > #include <linux/memcontrol.h> > > #include <linux/sched.h> > > #include <linux/node.h> > > - > > +#include <linux/fs.h> > > #include <linux/atomic.h> > > #include <asm/page.h> > > > > @@ -330,8 +330,9 @@ int generic_swapfile_activate(struct swa > > sector_t *); > > > > /* linux/mm/swap_state.c */ > > -extern struct address_space swapper_space; > > -#define total_swapcache_pages swapper_space.nrpages > > +extern struct address_space swapper_spaces[]; > > +#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)]) > > How about this naming? > > #define swapper_space(entry) (&swapper_spaces[swp_type(entry)]) > > > +extern unsigned long total_swapcache_pages(void); > > extern void show_swap_cache_info(void); > > extern int add_to_swap(struct page *); > > extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); > > @@ -382,7 +383,7 @@ mem_cgroup_uncharge_swapcache(struct pag > > > > #define nr_swap_pages 0L > > #define total_swap_pages 0L > > -#define total_swapcache_pages 0UL > > +#define total_swapcache_pages() 0UL > > > > #define si_swapinfo(val) \ > > do { (val)->freeswap = (val)->totalswap = 0; } while (0) > > Index: linux/mm/memcontrol.c > > =================================================================== > > --- linux.orig/mm/memcontrol.c 2013-01-22 09:13:14.000000000 +0800 > Acked-by: Minchan Kim <minchan@xxxxxxxxxx> > > > +++ linux/mm/memcontrol.c 2013-01-22 09:29:29.374977700 +0800 > > @@ -6279,7 +6279,7 @@ static struct page *mc_handle_swap_pte(s > > * Because lookup_swap_cache() updates some statistics counter, > > * we call find_get_page() with swapper_space directly. > > */ > > - page = find_get_page(&swapper_space, ent.val); > > + page = find_get_page(swap_address_space(ent), ent.val); > > if (do_swap_account) > > entry->val = ent.val; > > > > @@ -6320,7 +6320,7 @@ static struct page *mc_handle_file_pte(s > > swp_entry_t swap = radix_to_swp_entry(page); > > if (do_swap_account) > > *entry = swap; > > - page = find_get_page(&swapper_space, swap.val); > > + page = find_get_page(swap_address_space(swap), swap.val); > > } > > #endif > > return page; > > Index: linux/mm/mincore.c > > =================================================================== > > --- linux.orig/mm/mincore.c 2013-01-22 09:13:14.000000000 +0800 > > +++ linux/mm/mincore.c 2013-01-22 09:29:29.378977649 +0800 > > @@ -75,7 +75,7 @@ static unsigned char mincore_page(struct > > /* shmem/tmpfs may return swap: account for swapcache page too. */ > > if (radix_tree_exceptional_entry(page)) { > > swp_entry_t swap = radix_to_swp_entry(page); > > - page = find_get_page(&swapper_space, swap.val); > > + page = find_get_page(swap_address_space(swap), swap.val); > > } > > #endif > > if (page) { > > @@ -135,7 +135,8 @@ static void mincore_pte_range(struct vm_ > > } else { > > #ifdef CONFIG_SWAP > > pgoff = entry.val; > > - *vec = mincore_page(&swapper_space, pgoff); > > + *vec = mincore_page(swap_address_space(entry), > > + pgoff); > > #else > > WARN_ON(1); > > *vec = 1; > > Index: linux/mm/swap.c > > =================================================================== > > --- linux.orig/mm/swap.c 2013-01-22 09:13:14.000000000 +0800 > > +++ linux/mm/swap.c 2013-01-22 09:29:29.378977649 +0800 > > @@ -855,9 +855,14 @@ EXPORT_SYMBOL(pagevec_lookup_tag); > > void __init swap_setup(void) > > { > > unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); > > - > > #ifdef CONFIG_SWAP > > - bdi_init(swapper_space.backing_dev_info); > > + int i; > > + > > + for (i = 0; i < MAX_SWAPFILES; i++) { > > + bdi_init(swapper_spaces[i].backing_dev_info); > > + spin_lock_init(&swapper_spaces[i].tree_lock); > > + INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear); > > + } > > #endif > > > > /* Use a smaller cluster for small-memory machines */ > > Index: linux/mm/swap_state.c > > =================================================================== > > --- linux.orig/mm/swap_state.c 2013-01-22 09:13:14.000000000 +0800 > > +++ linux/mm/swap_state.c 2013-01-22 09:29:29.378977649 +0800 > > @@ -36,12 +36,12 @@ static struct backing_dev_info swap_back > > .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, > > }; > > > > -struct address_space swapper_space = { > > - .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), > > - .tree_lock = __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock), > > - .a_ops = &swap_aops, > > - .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), > > - .backing_dev_info = &swap_backing_dev_info, > > +struct address_space swapper_spaces[MAX_SWAPFILES] = { > > + [0 ... MAX_SWAPFILES - 1] = { > > + .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), > > + .a_ops = &swap_aops, > > + .backing_dev_info = &swap_backing_dev_info, > > + } > > }; > > > > #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) > > @@ -53,9 +53,19 @@ static struct { > > unsigned long find_total; > > } swap_cache_info; > > > > +unsigned long total_swapcache_pages(void) > > +{ > > + int i; > > + unsigned long ret = 0; > > + > > + for (i = 0; i < MAX_SWAPFILES; i++) > > + ret += swapper_spaces[i].nrpages; > > + return ret; > > +} > > + > > void show_swap_cache_info(void) > > { > > - printk("%lu pages in swap cache\n", total_swapcache_pages); > > + printk("%lu pages in swap cache\n", total_swapcache_pages()); > > printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", > > swap_cache_info.add_total, swap_cache_info.del_total, > > swap_cache_info.find_success, swap_cache_info.find_total); > > @@ -70,23 +80,26 @@ void show_swap_cache_info(void) > > static int __add_to_swap_cache(struct page *page, swp_entry_t entry) > > { > > int error; > > + struct address_space *address_space; > > > > VM_BUG_ON(!PageLocked(page)); > > VM_BUG_ON(PageSwapCache(page)); > > VM_BUG_ON(!PageSwapBacked(page)); > > > > page_cache_get(page); > > - SetPageSwapCache(page); > > set_page_private(page, entry.val); > > + SetPageSwapCache(page); > > Why did you move this line? Is there any special reason? Originally I'm afraid page_mapping() gets invalid page_private(), but I then realized we hold page lock. There are some places we don't hold page lock. either such page isn't swap page or the caller can tolerate race. I forgot removing this change in the patch. But I certainly can be wrong. We can add memory barrier if required. Thanks, Shaohua -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>