On Thu, Sep 2, 2010 at 9:57 AM, KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx> wrote: >> On Wed, Sep 01, 2010 at 11:01:43AM +0900, Minchan Kim wrote: >> > On Wed, Sep 1, 2010 at 10:55 AM, KOSAKI Motohiro >> > <kosaki.motohiro@xxxxxxxxxxxxxx> wrote: >> > > Hi >> > > >> > > Thank you for good commenting! >> > > >> > > >> > >> I don't like use oom_killer_disabled directly. >> > >> That's because we have wrapper inline functions to handle the >> > >> variable(ex, oom_killer_[disable/enable]). >> > >> It means we are reluctant to use the global variable directly. >> > >> So should we make new function as is_oom_killer_disable? >> > >> >> > >> I think NO. >> > >> >> > >> As I read your description, this problem is related to only hibernation. >> > >> Since hibernation freezes all processes(include kswapd), this problem >> > >> happens. Of course, now oom_killer_disabled is used by only >> > >> hibernation. But it can be used others in future(Off-topic : I don't >> > >> want it). Others can use it without freezing processes. Then kswapd >> > >> can set zone->all_unreclaimable and the problem can't happen. >> > >> >> > >> So I want to use sc->hibernation_mode which is already used >> > >> do_try_to_free_pages instead of oom_killer_disabled. >> > > >> > > Unfortunatelly, It's impossible. shrink_all_memory() turn on >> > > sc->hibernation_mode. but other hibernation caller merely call >> > > alloc_pages(). so we don't have any hint. >> > > >> > Ahh.. True. Sorry for that. >> > I will think some better method. >> > if I can't find it, I don't mind this patch. :) >> >> It seems that the poblem happens following as. >> (I might miss something since I just read theyour description) >> >> hibernation >> oom_disable >> alloc_pages >> do_try_to_free_pages >> if (scanning_global_lru(sc) && !all_unreclaimable) >> return 1; >> If kswapd is not freezed, it would set zone->all_unreclaimable to 1 and then >> shrink_zones maybe return true. so alloc_pages could go to _nopage_. >> If it is, it's no problem. >> Right? >> >> I think the problem would come from shrink_zones. >> It set false to all_unreclaimable blindly even though shrink_zone can't reclaim >> any page. It doesn't make sense. >> How about this? >> I think we need this regardless of the problem. >> What do you think about? >> >> diff --git a/mm/vmscan.c b/mm/vmscan.c >> index d8fd87d..22017b3 100644 >> --- a/mm/vmscan.c >> +++ b/mm/vmscan.c >> @@ -1901,7 +1901,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, >> } >> >> shrink_zone(priority, zone, sc); >> - all_unreclaimable = false; >> + if (sc->nr_reclaimed) >> + all_unreclaimable = false; >> } >> return all_unreclaimable; >> } > > here is brief of shrink_zones(). > > for_each_zone_zonelist_nodemask(zone, z, zonelist, > gfp_zone(sc->gfp_mask), sc->nodemask) { > if (!populated_zone(zone)) > continue; > if (zone->all_unreclaimable && priority != DEF_PRIORITY) > continue; /* Let kswapd poll it */ > shrink_zone(priority, zone, sc); > all_unreclaimable = false; > } > > That said, > all zone's zone->all_unreclaimable are true > -> all_unreclaimable local variable become true. > otherwise > -> all_unreclaimable local variable become false. > > The intention is, we don't want to invoke oom-killer if there are > !all_unreclaimable zones. So your patch makes big design change and > seems to increase OOM risk. Right. Thanks for pointing me out. > I don't want to send risky patch to -stable. Still I don't want to use oom_killer_disabled magic. But I don't want to prevent urgent stable patch due to my just nitpick. This is my last try(just quick patch, even I didn't tried compile test). If this isn't good, first of all, let's try merge yours. And then we can fix it later. Thanks for comment. -- CUT HERE -- Why do we check zone->all_unreclaimable in only kswapd? If kswapd is freezed in hibernation, OOM can happen. Let's the check in direct reclaim path, too. diff --git a/mm/vmscan.c b/mm/vmscan.c index 3109ff7..41493ba 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1878,12 +1878,11 @@ static void shrink_zone(int priority, struct zone *zone, * If a zone is deemed to be full of pinned pages then just give it a light * scan then give up on it. */ -static bool shrink_zones(int priority, struct zonelist *zonelist, +static void shrink_zones(int priority, struct zonelist *zonelist, struct scan_control *sc) { struct zoneref *z; struct zone *zone; - bool all_unreclaimable = true; for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(sc->gfp_mask), sc->nodemask) { @@ -1901,8 +1900,25 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, } shrink_zone(priority, zone, sc); - all_unreclaimable = false; } +} + +static inline int all_unreclaimable(struct zonelist *zonelist, struct scan_control *sc) +{ + struct zoneref *z; + struct zone *zone; + bool all_unreclaimable = true; + + for_each_zone_zonelist_nodemask(zone, z, zonelist, + gfp_zone(sc->gfp_mask), sc->nodemask) { + if (!populated_zone(zone)) + continue; + if (zone->pages_scanned < (zone_reclaimable_pages(zone) * 6)) { + all_unreclaimable = false; + break; + } + } + return all_unreclaimable; } @@ -1926,7 +1942,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc) { int priority; - bool all_unreclaimable; unsigned long total_scanned = 0; struct reclaim_state *reclaim_state = current->reclaim_state; struct zoneref *z; @@ -1943,7 +1958,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, sc->nr_scanned = 0; if (!priority) disable_swap_token(); - all_unreclaimable = shrink_zones(priority, zonelist, sc); + shrink_zones(priority, zonelist, sc); /* * Don't shrink slabs when reclaiming memory from * over limit cgroups @@ -2005,7 +2020,7 @@ out: return sc->nr_reclaimed; /* top priority shrink_zones still had more to do? don't OOM, then */ - if (scanning_global_lru(sc) && !all_unreclaimable) + if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc)) return 1; return 0; -- Kind regards, Minchan Kim
diff --git a/mm/vmscan.c b/mm/vmscan.c index 3109ff7..41493ba 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1878,12 +1878,11 @@ static void shrink_zone(int priority, struct zone *zone, * If a zone is deemed to be full of pinned pages then just give it a light * scan then give up on it. */ -static bool shrink_zones(int priority, struct zonelist *zonelist, +static void shrink_zones(int priority, struct zonelist *zonelist, struct scan_control *sc) { struct zoneref *z; struct zone *zone; - bool all_unreclaimable = true; for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(sc->gfp_mask), sc->nodemask) { @@ -1901,8 +1900,25 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, } shrink_zone(priority, zone, sc); - all_unreclaimable = false; } +} + +static inline int all_unreclaimable(struct zonelist *zonelist, struct scan_control *sc) +{ + struct zoneref *z; + struct zone *zone; + bool all_unreclaimable = true; + + for_each_zone_zonelist_nodemask(zone, z, zonelist, + gfp_zone(sc->gfp_mask), sc->nodemask) { + if (!populated_zone(zone)) + continue; + if (zone->pages_scanned < (zone_reclaimable_pages(zone) * 6)) { + all_unreclaimable = false; + break; + } + } + return all_unreclaimable; } @@ -1926,7 +1942,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc) { int priority; - bool all_unreclaimable; unsigned long total_scanned = 0; struct reclaim_state *reclaim_state = current->reclaim_state; struct zoneref *z; @@ -1943,7 +1958,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, sc->nr_scanned = 0; if (!priority) disable_swap_token(); - all_unreclaimable = shrink_zones(priority, zonelist, sc); + shrink_zones(priority, zonelist, sc); /* * Don't shrink slabs when reclaiming memory from * over limit cgroups @@ -2005,7 +2020,7 @@ out: return sc->nr_reclaimed; /* top priority shrink_zones still had more to do? don't OOM, then */ - if (scanning_global_lru(sc) && !all_unreclaimable) + if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc)) return 1; return 0;