On Wed, 2013-01-23 at 17:04 +0900, Minchan Kim wrote: > On Wed, Jan 23, 2013 at 03:36:55PM +0800, Shaohua Li wrote: > > On Wed, Jan 23, 2013 at 03:16:45PM +0900, Minchan Kim wrote: > > > Looks good to me. Below just nitpicks. > > > I saw Andrew already took this into mmotm so I'm not sure he or you will do > > > next spin but anyway, review goes. Just nitpicks and a question. > > > > > > On Tue, Jan 22, 2013 at 10:29:51AM +0800, Shaohua Li wrote: > > > > > > > > When I use several fast SSD to do swap, swapper_space.tree_lock is heavily > > > > contended. This makes each swap partition have one address_space to reduce the > > > > lock contention. There is an array of address_space for swap. The swap entry > > > > type is the index to the array. > > > > > > > > In my test with 3 SSD, this increases the swapout throughput 20%. > > > > > > > > V1->V2: simplify code > > > > > > > > Signed-off-by: Shaohua Li <shli@xxxxxxxxxxxx> > > > > > > Acked-by: Minchan Kim <minchan@xxxxxxxxxx> > > > > > > > --- > > > > fs/proc/meminfo.c | 4 +-- > > > > include/linux/swap.h | 9 ++++---- > > > > mm/memcontrol.c | 4 +-- > > > > mm/mincore.c | 5 ++-- > > > > mm/swap.c | 9 ++++++-- > > > > mm/swap_state.c | 57 ++++++++++++++++++++++++++++++++++----------------- > > > > mm/swapfile.c | 5 ++-- > > > > mm/util.c | 10 ++++++-- > > > > 8 files changed, 68 insertions(+), 35 deletions(-) > > > > > > > > Index: linux/include/linux/swap.h > > > > =================================================================== > > > > --- linux.orig/include/linux/swap.h 2013-01-22 09:13:14.000000000 +0800 > > > > +++ linux/include/linux/swap.h 2013-01-22 09:34:44.923011706 +0800 > > > > @@ -8,7 +8,7 @@ > > > > #include <linux/memcontrol.h> > > > > #include <linux/sched.h> > > > > #include <linux/node.h> > > > > - > > > > +#include <linux/fs.h> > > > > #include <linux/atomic.h> > > > > #include <asm/page.h> > > > > > > > > @@ -330,8 +330,9 @@ int generic_swapfile_activate(struct swa > > > > sector_t *); > > > > > > > > /* linux/mm/swap_state.c */ > > > > -extern struct address_space swapper_space; > > > > -#define total_swapcache_pages swapper_space.nrpages > > > > +extern struct address_space swapper_spaces[]; > > > > +#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)]) > > > > > > How about this naming? > > > > > > #define swapper_space(entry) (&swapper_spaces[swp_type(entry)]) > > > > > > > +extern unsigned long total_swapcache_pages(void); > > > > extern void show_swap_cache_info(void); > > > > extern int add_to_swap(struct page *); > > > > extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); > > > > @@ -382,7 +383,7 @@ mem_cgroup_uncharge_swapcache(struct pag > > > > > > > > #define nr_swap_pages 0L > > > > #define total_swap_pages 0L > > > > -#define total_swapcache_pages 0UL > > > > +#define total_swapcache_pages() 0UL > > > > > > > > #define si_swapinfo(val) \ > > > > do { (val)->freeswap = (val)->totalswap = 0; } while (0) > > > > Index: linux/mm/memcontrol.c > > > > =================================================================== > > > > --- linux.orig/mm/memcontrol.c 2013-01-22 09:13:14.000000000 +0800 > > > Acked-by: Minchan Kim <minchan@xxxxxxxxxx> > > > > > > > +++ linux/mm/memcontrol.c 2013-01-22 09:29:29.374977700 +0800 > > > > @@ -6279,7 +6279,7 @@ static struct page *mc_handle_swap_pte(s > > > > * Because lookup_swap_cache() updates some statistics counter, > > > > * we call find_get_page() with swapper_space directly. > > > > */ > > > > - page = find_get_page(&swapper_space, ent.val); > > > > + page = find_get_page(swap_address_space(ent), ent.val); > > > > if (do_swap_account) > > > > entry->val = ent.val; > > > > > > > > @@ -6320,7 +6320,7 @@ static struct page *mc_handle_file_pte(s > > > > swp_entry_t swap = radix_to_swp_entry(page); > > > > if (do_swap_account) > > > > *entry = swap; > > > > - page = find_get_page(&swapper_space, swap.val); > > > > + page = find_get_page(swap_address_space(swap), swap.val); > > > > } > > > > #endif > > > > return page; > > > > Index: linux/mm/mincore.c > > > > =================================================================== > > > > --- linux.orig/mm/mincore.c 2013-01-22 09:13:14.000000000 +0800 > > > > +++ linux/mm/mincore.c 2013-01-22 09:29:29.378977649 +0800 > > > > @@ -75,7 +75,7 @@ static unsigned char mincore_page(struct > > > > /* shmem/tmpfs may return swap: account for swapcache page too. */ > > > > if (radix_tree_exceptional_entry(page)) { > > > > swp_entry_t swap = radix_to_swp_entry(page); > > > > - page = find_get_page(&swapper_space, swap.val); > > > > + page = find_get_page(swap_address_space(swap), swap.val); > > > > } > > > > #endif > > > > if (page) { > > > > @@ -135,7 +135,8 @@ static void mincore_pte_range(struct vm_ > > > > } else { > > > > #ifdef CONFIG_SWAP > > > > pgoff = entry.val; > > > > - *vec = mincore_page(&swapper_space, pgoff); > > > > + *vec = mincore_page(swap_address_space(entry), > > > > + pgoff); > > > > #else > > > > WARN_ON(1); > > > > *vec = 1; > > > > Index: linux/mm/swap.c > > > > =================================================================== > > > > --- linux.orig/mm/swap.c 2013-01-22 09:13:14.000000000 +0800 > > > > +++ linux/mm/swap.c 2013-01-22 09:29:29.378977649 +0800 > > > > @@ -855,9 +855,14 @@ EXPORT_SYMBOL(pagevec_lookup_tag); > > > > void __init swap_setup(void) > > > > { > > > > unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); > > > > - > > > > #ifdef CONFIG_SWAP > > > > - bdi_init(swapper_space.backing_dev_info); > > > > + int i; > > > > + > > > > + for (i = 0; i < MAX_SWAPFILES; i++) { > > > > + bdi_init(swapper_spaces[i].backing_dev_info); > > > > + spin_lock_init(&swapper_spaces[i].tree_lock); > > > > + INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear); > > > > + } > > > > #endif > > > > > > > > /* Use a smaller cluster for small-memory machines */ > > > > Index: linux/mm/swap_state.c > > > > =================================================================== > > > > --- linux.orig/mm/swap_state.c 2013-01-22 09:13:14.000000000 +0800 > > > > +++ linux/mm/swap_state.c 2013-01-22 09:29:29.378977649 +0800 > > > > @@ -36,12 +36,12 @@ static struct backing_dev_info swap_back > > > > .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, > > > > }; > > > > > > > > -struct address_space swapper_space = { > > > > - .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), > > > > - .tree_lock = __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock), > > > > - .a_ops = &swap_aops, > > > > - .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), > > > > - .backing_dev_info = &swap_backing_dev_info, > > > > +struct address_space swapper_spaces[MAX_SWAPFILES] = { > > > > + [0 ... MAX_SWAPFILES - 1] = { > > > > + .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), > > > > + .a_ops = &swap_aops, > > > > + .backing_dev_info = &swap_backing_dev_info, > > > > + } > > > > }; > > > > > > > > #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) > > > > @@ -53,9 +53,19 @@ static struct { > > > > unsigned long find_total; > > > > } swap_cache_info; > > > > > > > > +unsigned long total_swapcache_pages(void) > > > > +{ > > > > + int i; > > > > + unsigned long ret = 0; > > > > + > > > > + for (i = 0; i < MAX_SWAPFILES; i++) > > > > + ret += swapper_spaces[i].nrpages; > > > > + return ret; > > > > +} > > > > + > > > > void show_swap_cache_info(void) > > > > { > > > > - printk("%lu pages in swap cache\n", total_swapcache_pages); > > > > + printk("%lu pages in swap cache\n", total_swapcache_pages()); > > > > printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", > > > > swap_cache_info.add_total, swap_cache_info.del_total, > > > > swap_cache_info.find_success, swap_cache_info.find_total); > > > > @@ -70,23 +80,26 @@ void show_swap_cache_info(void) > > > > static int __add_to_swap_cache(struct page *page, swp_entry_t entry) > > > > { > > > > int error; > > > > + struct address_space *address_space; > > > > > > > > VM_BUG_ON(!PageLocked(page)); > > > > VM_BUG_ON(PageSwapCache(page)); > > > > VM_BUG_ON(!PageSwapBacked(page)); > > > > > > > > page_cache_get(page); > > > > - SetPageSwapCache(page); > > > > set_page_private(page, entry.val); > > > > + SetPageSwapCache(page); > > > > > > Why did you move this line? Is there any special reason? > > > > Originally I'm afraid page_mapping() gets invalid page_private(), but I then > > realized we hold page lock. There are some places we don't hold page lock. > > either such page isn't swap page or the caller can tolerate race. I forgot > > removing this change in the patch. But I certainly can be wrong. We can add > > memory barrier if required. > > Yeb. While I reviewed the patch, I was concern about that but I fail to find > a problem, too. But maybe it's valuable to add comment about that race > (!PageSwapCache(page) but page_mapping could return swapper_space) in page_mapping. Hi Minchan, If the race Shaohua mentioned should be PageSwapCache(page) but page_mapping couldn't return swapper_space since page_mapping() gets invalid page_private(). > So if some problem happens in the future, people could help to find culprit. > > Could you respin this with adding the comment and removing above unnecessary moving? > > > > > Thanks, > > Shaohua > > > > -- > > To unsubscribe, send a message with 'unsubscribe linux-mm' in > > the body to majordomo@xxxxxxxxx. For more info on Linux MM, > > see: http://www.linux-mm.org/ . > > Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a> > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>