Re: [PATCH] block,blkcg: use __GFP_NOWARN for best-effort allocations in blkcg

Michal Hocko <mhocko@xxxxxxxxxx> · Tue, 29 Nov 2016 18:13:33 +0100

On Tue 29-11-16 17:57:08, Vlastimil Babka wrote:
> On 11/29/2016 05:38 PM, Tejun Heo wrote:
> > On Tue, Nov 29, 2016 at 08:25:07AM +0100, Michal Hocko wrote:
> > > --- a/include/linux/gfp.h
> > > +++ b/include/linux/gfp.h
> > > @@ -246,7 +246,7 @@ struct vm_area_struct;
> > >  #define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
> > >  #define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
> > >  #define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT)
> > > -#define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM)
> > > +#define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM|__GFP_NOWARN)
> > >  #define GFP_NOIO	(__GFP_RECLAIM)
> > >  #define GFP_NOFS	(__GFP_RECLAIM | __GFP_IO)
> > >  #define GFP_TEMPORARY	(__GFP_RECLAIM | __GFP_IO | __GFP_FS | \
> > > 
> > > this will not catch users who are doing gfp & ~__GFP_DIRECT_RECLAIM but
> > > I would rather not make warn_alloc() even more cluttered with checks.
> > 
> > Yeah, FWIW, looks good to me.
> 
> Me too. Just don't forget to update the comment describing GFP_NOWAIT and
> check the existing users if duplicite __GFP_NOWARN can be removed, and if
> they really want to be doing GFP_NOWAIT and not GFP_ATOMIC.

How does this look like?
---
>From c6635f7fedec1fc475da0a4be32ea360560cde18 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@xxxxxxxx>
Date: Tue, 29 Nov 2016 18:04:00 +0100
Subject: [PATCH] mm: do not warn about allocation failures for GFP_NOWAIT

Historically we didn't have a distinction between atomic allocation
requests and those which just do not want to sleep for other (e.g.
performance optimization reasons). After d0164adc89f6 ("mm, page_alloc:
distinguish between being unable to sleep, unwilling to sleep and
avoiding waking kswapd") this distinction is clear.

Nevertheless we still have quite some GFP_NOWAIT requests without
__GFP_NOWARN so the system log will contain scary and rarely useful
allocation failure splats. Those allocations are to be expected to fail
under a memory pressure (especially when the kswapd doesn't catch up
with the load). GFP_ATOMIC is different in this regards because it
allows an access to part of the memory reserves which sould make it much
less likely to fail and actual reports could help to tune the system
better - e.g. by increasing the amount of memory reserves for a better
performance. This is not true for GFP_NOWAIT though.

This patch simply makes __GFP_NOWARN implicit to all GFP_NOWAIT requests
to silent them all. Those which used the explicit NOWARN were removed.

Suggested-by: Vlastimil Babka <vbabka@xxxxxxx>
Signed-off-by: Michal Hocko <mhocko@xxxxxxxx>
---
 arch/arm/include/asm/tlb.h      | 2 +-
 arch/ia64/include/asm/tlb.h     | 2 +-
 arch/s390/mm/pgalloc.c          | 2 +-
 drivers/dma-buf/reservation.c   | 3 +--
 drivers/md/bcache/bset.c        | 3 +--
 drivers/md/bcache/btree.c       | 2 +-
 fs/fs-writeback.c               | 2 +-
 include/linux/gfp.h             | 5 +++--
 kernel/events/uprobes.c         | 2 +-
 mm/memory.c                     | 4 ++--
 mm/rmap.c                       | 2 +-
 net/ipv4/tcp_cdg.c              | 2 +-
 net/sunrpc/sched.c              | 2 +-
 net/sunrpc/xprt.c               | 2 +-
 net/sunrpc/xprtrdma/transport.c | 2 +-
 virt/kvm/async_pf.c             | 2 +-
 16 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 1e25cd80589e..5b173836df23 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -117,7 +117,7 @@ static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr)
 
 static inline void __tlb_alloc_page(struct mmu_gather *tlb)
 {
-	unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+	unsigned long addr = __get_free_pages(GFP_NOWAIT, 0);
 
 	if (addr) {
 		tlb->pages = (void *)addr;
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 77e541cf0e5d..7dfabd37c993 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -158,7 +158,7 @@ ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long e
 
 static inline void __tlb_alloc_page(struct mmu_gather *tlb)
 {
-	unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+	unsigned long addr = __get_free_pages(GFP_NOWAIT, 0);
 
 	if (addr) {
 		tlb->pages = (void *)addr;
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 995f78532cc2..1e5adc76f7dd 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -340,7 +340,7 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 	tlb->mm->context.flush_mm = 1;
 	if (*batch == NULL) {
 		*batch = (struct mmu_table_batch *)
-			__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
+			__get_free_page(GFP_NOWAIT);
 		if (*batch == NULL) {
 			__tlb_flush_mm_lazy(tlb->mm);
 			tlb_remove_table_one(table);
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index 9566a62ad8e3..2715258ef1db 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -298,8 +298,7 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
 			struct fence **nshared;
 			size_t sz = sizeof(*shared) * fobj->shared_max;
 
-			nshared = krealloc(shared, sz,
-					   GFP_NOWAIT | __GFP_NOWARN);
+			nshared = krealloc(shared, sz, GFP_NOWAIT);
 			if (!nshared) {
 				rcu_read_unlock();
 				nshared = krealloc(shared, sz, GFP_KERNEL);
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 646fe85261c1..0a9b6a91f5b9 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -1182,8 +1182,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
 {
 	uint64_t start_time;
 	bool used_mempool = false;
-	struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT,
-						     order);
+	struct bset *out = (void *) __get_free_pages(GFP_NOWAIT, order);
 	if (!out) {
 		struct page *outp;
 
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 76f7534d1dd1..54355099dd14 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -419,7 +419,7 @@ static void do_btree_node_write(struct btree *b)
 	SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) +
 		       bset_sector_offset(&b->keys, i));
 
-	if (!bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) {
+	if (!bio_alloc_pages(b->bio, GFP_NOWAIT)) {
 		int j;
 		struct bio_vec *bv;
 		void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 05713a5da083..ae46eebd56be 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -932,7 +932,7 @@ void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
 	 * wakeup the thread for old dirty data writeback
 	 */
 	work = kzalloc(sizeof(*work),
-		       GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN);
+		       GFP_NOWAIT | __GFP_NOMEMALLOC);
 	if (!work) {
 		trace_writeback_nowork(wb);
 		wb_wakeup(wb);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f8041f9de31e..2579a9abbc38 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -205,7 +205,8 @@ struct vm_area_struct;
  *   accounted to kmemcg.
  *
  * GFP_NOWAIT is for kernel allocations that should not stall for direct
- *   reclaim, start physical IO or use any filesystem callback.
+ *   reclaim, start physical IO or use any filesystem callback. These are
+ *   quite likely to fail under memory pressure.
  *
  * GFP_NOIO will use direct reclaim to discard clean pages or slab pages
  *   that do not require the starting of any physical IO.
@@ -246,7 +247,7 @@ struct vm_area_struct;
 #define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
 #define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
 #define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT)
-#define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM)
+#define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM | __GFP_NOWARN)
 #define GFP_NOIO	(__GFP_RECLAIM)
 #define GFP_NOFS	(__GFP_RECLAIM | __GFP_IO)
 #define GFP_TEMPORARY	(__GFP_RECLAIM | __GFP_IO | __GFP_FS | \
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 19417719fde7..3013df303461 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -732,7 +732,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
 			 * reclaim. This is optimistic, no harm done if it fails.
 			 */
 			prev = kmalloc(sizeof(struct map_info),
-					GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN);
+					GFP_NOWAIT | __GFP_NOMEMALLOC);
 			if (prev)
 				prev->next = NULL;
 		}
diff --git a/mm/memory.c b/mm/memory.c
index 840adc6e05d6..13fa563e90e1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -197,7 +197,7 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
 	if (tlb->batch_count == MAX_GATHER_BATCH_COUNT)
 		return false;
 
-	batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+	batch = (void *)__get_free_pages(GFP_NOWAIT, 0);
 	if (!batch)
 		return false;
 
@@ -383,7 +383,7 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 	}
 
 	if (*batch == NULL) {
-		*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
+		*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT);
 		if (*batch == NULL) {
 			tlb_remove_table_one(table);
 			return;
diff --git a/mm/rmap.c b/mm/rmap.c
index 1ef36404e7b2..004abc7c2cff 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -263,7 +263,7 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
 	list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
 		struct anon_vma *anon_vma;
 
-		avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
+		avc = anon_vma_chain_alloc(GFP_NOWAIT);
 		if (unlikely(!avc)) {
 			unlock_anon_vma_root(root);
 			root = NULL;
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 03725b294286..e2621acc54e8 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -385,7 +385,7 @@ static void tcp_cdg_init(struct sock *sk)
 	/* We silently fall back to window = 1 if allocation fails. */
 	if (window > 1)
 		ca->gradients = kcalloc(window, sizeof(ca->gradients[0]),
-					GFP_NOWAIT | __GFP_NOWARN);
+					GFP_NOWAIT);
 	ca->rtt_seq = tp->snd_nxt;
 	ca->shadow_wnd = tp->snd_cwnd;
 }
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 9ae588511aaf..791fd3fe4995 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -871,7 +871,7 @@ void *rpc_malloc(struct rpc_task *task, size_t size)
 	gfp_t gfp = GFP_NOIO | __GFP_NOWARN;
 
 	if (RPC_IS_SWAPPER(task))
-		gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
+		gfp = __GFP_MEMALLOC | GFP_NOWAIT;
 
 	size += sizeof(struct rpc_buffer);
 	if (size <= RPC_BUFFER_MAXSIZE)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ea244b29138b..3a4e13282f4e 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1081,7 +1081,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
 		list_del(&req->rq_list);
 		goto out_init_req;
 	}
-	req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT|__GFP_NOWARN);
+	req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT);
 	if (!IS_ERR(req))
 		goto out_init_req;
 	switch (PTR_ERR(req)) {
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 81f0e879f019..c22ed3b7bf61 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -502,7 +502,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
 
 	flags = RPCRDMA_DEF_GFP;
 	if (RPC_IS_SWAPPER(task))
-		flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
+		flags = __GFP_MEMALLOC | GFP_NOWAIT;
 
 	if (req->rl_rdmabuf == NULL)
 		goto out_rdmabuf;
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 8035cc1eb955..99209775818f 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -179,7 +179,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
 	 * do alloc nowait since if we are going to sleep anyway we
 	 * may as well sleep faulting in page
 	 */
-	work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN);
+	work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT);
 	if (!work)
 		return 0;
 
-- 
2.10.2
-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>