Andrew, I've noticed you have taken the patch to mm tree already. I have realized I haven't marked it for stable which is worth it IMO because debugging nasty reclaim recursion bugs is definitely a pain and might fix one and even if it doesn't it is rather straightforward and shouldn't break anything. So if nobody has anything against I would mark this for stable 3.16+ AFAICS. On Mon 29-12-14 20:33:12, Michal Hocko wrote: > From 3242f56ae8886a3c605d93960e77176dfe1dff43 Mon Sep 17 00:00:00 2001 > From: Michal Hocko <mhocko@xxxxxxx> > Date: Mon, 29 Dec 2014 20:30:35 +0100 > Subject: [PATCH] mm: get rid of radix tree gfp mask for pagecache_get_page > > 2457aec63745 (mm: non-atomically mark page accessed during page cache > allocation where possible) has added a separate parameter for specifying > gfp mask for radix tree allocations. > > Not only this is less than optimal from the API point of view > because it is error prone, it is also buggy currently because > grab_cache_page_write_begin is using GFP_KERNEL for radix tree and > if fgp_flags doesn't contain FGP_NOFS (mostly controlled by fs by > AOP_FLAG_NOFS flag) but the mapping_gfp_mask has __GFP_FS cleared then > the radix tree allocation wouldn't obey the restriction and might > recurse into filesystem and cause deadlocks. This is the case for > most filesystems unfortunately because only ext4 and gfs2 are using > AOP_FLAG_NOFS. > > Let's simply remove radix_gfp_mask parameter because the allocation > context is same for both page cache and for the radix tree. Just make > sure that the radix tree gets only the sane subset of the mask (e.g. do > not pass __GFP_WRITE). > > Long term it is more preferable to convert remaining users of > AOP_FLAG_NOFS to use mapping_gfp_mask instead and simplify this > interface even further. > > Reported-by: Dave Chinner <david@xxxxxxxxxxxxx> > Signed-off-by: Michal Hocko <mhocko@xxxxxxx> > --- > include/linux/pagemap.h | 13 ++++++------- > mm/filemap.c | 29 ++++++++++++----------------- > 2 files changed, 18 insertions(+), 24 deletions(-) > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > index 7ea069cd3257..4b3736f7065c 100644 > --- a/include/linux/pagemap.h > +++ b/include/linux/pagemap.h > @@ -251,7 +251,7 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping, > #define FGP_NOWAIT 0x00000020 > > struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, > - int fgp_flags, gfp_t cache_gfp_mask, gfp_t radix_gfp_mask); > + int fgp_flags, gfp_t cache_gfp_mask); > > /** > * find_get_page - find and get a page reference > @@ -266,13 +266,13 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, > static inline struct page *find_get_page(struct address_space *mapping, > pgoff_t offset) > { > - return pagecache_get_page(mapping, offset, 0, 0, 0); > + return pagecache_get_page(mapping, offset, 0, 0); > } > > static inline struct page *find_get_page_flags(struct address_space *mapping, > pgoff_t offset, int fgp_flags) > { > - return pagecache_get_page(mapping, offset, fgp_flags, 0, 0); > + return pagecache_get_page(mapping, offset, fgp_flags, 0); > } > > /** > @@ -292,7 +292,7 @@ static inline struct page *find_get_page_flags(struct address_space *mapping, > static inline struct page *find_lock_page(struct address_space *mapping, > pgoff_t offset) > { > - return pagecache_get_page(mapping, offset, FGP_LOCK, 0, 0); > + return pagecache_get_page(mapping, offset, FGP_LOCK, 0); > } > > /** > @@ -319,7 +319,7 @@ static inline struct page *find_or_create_page(struct address_space *mapping, > { > return pagecache_get_page(mapping, offset, > FGP_LOCK|FGP_ACCESSED|FGP_CREAT, > - gfp_mask, gfp_mask & GFP_RECLAIM_MASK); > + gfp_mask); > } > > /** > @@ -340,8 +340,7 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, > { > return pagecache_get_page(mapping, index, > FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT, > - mapping_gfp_mask(mapping), > - GFP_NOFS); > + mapping_gfp_mask(mapping)); > } > > struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); > diff --git a/mm/filemap.c b/mm/filemap.c > index e8905bc3cbd7..11477d3b7838 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -1046,8 +1046,7 @@ EXPORT_SYMBOL(find_lock_entry); > * @mapping: the address_space to search > * @offset: the page index > * @fgp_flags: PCG flags > - * @cache_gfp_mask: gfp mask to use for the page cache data page allocation > - * @radix_gfp_mask: gfp mask to use for radix tree node allocation > + * @gfp_mask: gfp mask to use for the page cache data page allocation > * > * Looks up the page cache slot at @mapping & @offset. > * > @@ -1056,11 +1055,9 @@ EXPORT_SYMBOL(find_lock_entry); > * FGP_ACCESSED: the page will be marked accessed > * FGP_LOCK: Page is return locked > * FGP_CREAT: If page is not present then a new page is allocated using > - * @cache_gfp_mask and added to the page cache and the VM's LRU > - * list. If radix tree nodes are allocated during page cache > - * insertion then @radix_gfp_mask is used. The page is returned > - * locked and with an increased refcount. Otherwise, %NULL is > - * returned. > + * @gfp_mask and added to the page cache and the VM's LRU > + * list. The page is returned locked and with an increased > + * refcount. Otherwise, %NULL is returned. > * > * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even > * if the GFP flags specified for FGP_CREAT are atomic. > @@ -1068,7 +1065,7 @@ EXPORT_SYMBOL(find_lock_entry); > * If there is a page cache page, it is returned with an increased refcount. > */ > struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, > - int fgp_flags, gfp_t cache_gfp_mask, gfp_t radix_gfp_mask) > + int fgp_flags, gfp_t gfp_mask) > { > struct page *page; > > @@ -1105,13 +1102,11 @@ no_page: > if (!page && (fgp_flags & FGP_CREAT)) { > int err; > if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping)) > - cache_gfp_mask |= __GFP_WRITE; > - if (fgp_flags & FGP_NOFS) { > - cache_gfp_mask &= ~__GFP_FS; > - radix_gfp_mask &= ~__GFP_FS; > - } > + gfp_mask |= __GFP_WRITE; > + if (fgp_flags & FGP_NOFS) > + gfp_mask &= ~__GFP_FS; > > - page = __page_cache_alloc(cache_gfp_mask); > + page = __page_cache_alloc(gfp_mask); > if (!page) > return NULL; > > @@ -1122,7 +1117,8 @@ no_page: > if (fgp_flags & FGP_ACCESSED) > __SetPageReferenced(page); > > - err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask); > + err = add_to_page_cache_lru(page, mapping, offset, > + gfp_mask & GFP_RECLAIM_MASK); > if (unlikely(err)) { > page_cache_release(page); > page = NULL; > @@ -2443,8 +2439,7 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping, > fgp_flags |= FGP_NOFS; > > page = pagecache_get_page(mapping, index, fgp_flags, > - mapping_gfp_mask(mapping), > - GFP_KERNEL); > + mapping_gfp_mask(mapping)); > if (page) > wait_for_stable_page(page); > > -- > 2.1.4 > > -- > Michal Hocko > SUSE Labs > > -- > To unsubscribe, send a message with 'unsubscribe linux-mm' in > the body to majordomo@xxxxxxxxx. For more info on Linux MM, > see: http://www.linux-mm.org/ . > Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a> -- Michal Hocko SUSE Labs -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>