Re: [PATCH v2 1/2] vmscan: don't subtraction of unsined

KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx> · Tue, 13 Jul 2010 18:32:21 +0900 (JST)

> On Fri,  9 Jul 2010 10:16:33 +0900 (JST)
> KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx> wrote:
> 
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
> > @@ -2588,7 +2588,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
> >  		.swappiness = vm_swappiness,
> >  		.order = order,
> >  	};
> > -	unsigned long slab_reclaimable;
> > +	unsigned long nr_slab_pages0, nr_slab_pages1;
> >  
> >  	disable_swap_token();
> >  	cond_resched();
> > @@ -2615,8 +2615,8 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
> >  		} while (priority >= 0 && sc.nr_reclaimed < nr_pages);
> >  	}
> >  
> > -	slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
> > -	if (slab_reclaimable > zone->min_slab_pages) {
> > +	nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
> > +	if (nr_slab_pages0 > zone->min_slab_pages) {
> >  		/*
> >  		 * shrink_slab() does not currently allow us to determine how
> >  		 * many pages were freed in this zone.
> 
> Well no, but it could do so, with some minor changes to struct
> reclaim_state and its handling.  Put a zone* and a counter in
> reclaim_state, handle them in sl?b.c.
> 
> > So we take the current
> > @@ -2628,16 +2628,17 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
> >  		 * take a long time.
> >  		 */
> >  		while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
> > -			zone_page_state(zone, NR_SLAB_RECLAIMABLE) >
> > -				slab_reclaimable - nr_pages)
> > +		       (zone_page_state(zone, NR_SLAB_RECLAIMABLE) + nr_pages >
> > +				nr_slab_pages0))
> >  			;
> >  
> >  		/*
> >  		 * Update nr_reclaimed by the number of slab pages we
> >  		 * reclaimed from this zone.
> >  		 */
> > -		sc.nr_reclaimed += slab_reclaimable -
> > -			zone_page_state(zone, NR_SLAB_RECLAIMABLE);
> > +		nr_slab_pages1 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
> > +		if (nr_slab_pages1 < nr_slab_pages0)
> > +			sc.nr_reclaimed += nr_slab_pages0 - nr_slab_pages1;
> 
> My, that's horrible.  The whole expression says "this number is
> basically a pile of random junk.  Let's add it in anyway".
> 
> 
> >  	}
> >  
> >  	p->reclaim_state = NULL;


How's this?

Christoph, Can we hear your opinion about to add new branch in slab-free path?
I think this is ok, because reclaim makes a lot of cache miss then branch
mistaken is relatively minor penalty. thought?



>From 9f7d7a9bd836b7373ade3056e6a3d2a3d82ac7ce Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx>
Date: Tue, 13 Jul 2010 14:43:21 +0900
Subject: [PATCH] vmscan: count reclaimed slab pages properly

Andrew Morton pointed out __zone_reclaim() shouldn't compare old and new
zone_page_state(NR_SLAB_RECLAIMABLE) result. Instead, it have to account
number of free slab pages by to enhance reclaim_state.

This patch does it.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx>
---
 include/linux/swap.h |    3 ++-
 mm/slab.c            |    4 +++-
 mm/slob.c            |    4 +++-
 mm/slub.c            |    7 +++++--
 mm/vmscan.c          |   44 ++++++++++++++++----------------------------
 5 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index ff4acea..b8d3f33 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -107,7 +107,8 @@ typedef struct {
  * memory reclaim
  */
 struct reclaim_state {
-	unsigned long reclaimed_slab;
+	unsigned long	reclaimed_slab;
+	struct zone	*zone;
 };
 
 #ifdef __KERNEL__
diff --git a/mm/slab.c b/mm/slab.c
index 4e9c46f..aac9306 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1741,7 +1741,9 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
 		page++;
 	}
 	if (current->reclaim_state)
-		current->reclaim_state->reclaimed_slab += nr_freed;
+		if (!current->reclaim_state->zone ||
+		    current->reclaim_state->zone == page_zone(page))
+			current->reclaim_state->reclaimed_slab += nr_freed;
 	free_pages((unsigned long)addr, cachep->gfporder);
 }
 
diff --git a/mm/slob.c b/mm/slob.c
index 3f19a34..192d05c 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -260,7 +260,9 @@ static void *slob_new_pages(gfp_t gfp, int order, int node)
 static void slob_free_pages(void *b, int order)
 {
 	if (current->reclaim_state)
-		current->reclaim_state->reclaimed_slab += 1 << order;
+		if (!current->reclaim_state->zone ||
+		    current->reclaim_state->zone == page_zone(page))
+			current->reclaim_state->reclaimed_slab += 1 << order;
 	free_pages((unsigned long)b, order);
 }
 
diff --git a/mm/slub.c b/mm/slub.c
index 7bb7940..f510b14 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1204,8 +1204,11 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 
 	__ClearPageSlab(page);
 	reset_page_mapcount(page);
-	if (current->reclaim_state)
-		current->reclaim_state->reclaimed_slab += pages;
+	if (current->reclaim_state) {
+		if (!current->reclaim_state->zone ||
+		    current->reclaim_state->zone == page_zone(page))
+			current->reclaim_state->reclaimed_slab += pages;
+	}
 	__free_pages(page, order);
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1bf9f72..8faef0c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2571,7 +2571,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	/* Minimum pages needed in order to stay on node */
 	const unsigned long nr_pages = 1 << order;
 	struct task_struct *p = current;
-	struct reclaim_state reclaim_state;
 	int priority;
 	struct scan_control sc = {
 		.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
@@ -2583,8 +2582,10 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 		.swappiness = vm_swappiness,
 		.order = order,
 	};
-	unsigned long nr_slab_pages0, nr_slab_pages1;
-
+	struct reclaim_state reclaim_state = {
+		.reclaimed_slab = 0,
+		.zone		= zone,
+	};
 
 	cond_resched();
 	/*
@@ -2594,7 +2595,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	 */
 	p->flags |= PF_MEMALLOC | PF_SWAPWRITE;
 	lockdep_set_current_reclaim_state(gfp_mask);
-	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
 	if (zone_pagecache_reclaimable(zone) > zone->min_unmapped_pages) {
@@ -2610,34 +2610,22 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 		} while (priority >= 0 && sc.nr_reclaimed < nr_pages);
 	}
 
-	nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
-	if (nr_slab_pages0 > zone->min_slab_pages) {
+	if (zone_page_state(zone, NR_SLAB_RECLAIMABLE) > zone->min_slab_pages) {
 		unsigned long lru_pages = zone_reclaimable_pages(zone);
 
-		/*
-		 * shrink_slab() does not currently allow us to determine how
-		 * many pages were freed in this zone. So we take the current
-		 * number of slab pages and shake the slab until it is reduced
-		 * by the same nr_pages that we used for reclaiming unmapped
-		 * pages.
-		 *
-		 * Note that shrink_slab will free memory on all zones and may
-		 * take a long time.
-		 */
-		while (shrink_slab(sc.nr_scanned, gfp_mask, lru_pages) &&
-		       (zone_page_state(zone, NR_SLAB_RECLAIMABLE) + nr_pages >
-			nr_slab_pages0))
-			;
-
-		/*
-		 * Update nr_reclaimed by the number of slab pages we
-		 * reclaimed from this zone.
-		 */
-		nr_slab_pages1 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
-		if (nr_slab_pages1 < nr_slab_pages0)
-			sc.nr_reclaimed += nr_slab_pages0 - nr_slab_pages1;
+		for(;;) {
+			/*
+			 * Note that shrink_slab will free memory on all zones
+			 * and may take a long time.
+			 */
+			if (!shrink_slab(sc.nr_scanned, gfp_mask, lru_pages))
+				break;
+			if (reclaim_state.reclaimed_slab >= nr_pages)
+				break;
+		}
 	}
 
+	sc.nr_reclaimed += reclaim_state.reclaimed_slab;
 	p->reclaim_state = NULL;
 	current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
 	lockdep_clear_current_reclaim_state();
-- 
1.6.5.2






--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxxx  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href