Re: [PATCH 1/4] nfsd: remove artificial limits on the session-based DRC

"NeilBrown" <neilb@xxxxxxx> · Fri, 15 Nov 2024 15:01:47 +1100

On Thu, 14 Nov 2024, Jeff Layton wrote:
> On Wed, 2024-11-13 at 16:38 +1100, NeilBrown wrote:
> > Rather than guessing how much space it might be safe to use for the DRC,
> > simply try allocating slots and be prepared to accept failure.
> > 
> > The first slot for each session is allocated with GFP_KERNEL which is
> > unlikely to fail.  Subsequent slots are allocated with the addition of
> > __GFP_NORETRY which is expected to fail if there isn't much free memory.
> > 
> > This is probably too aggressive but clears the way for adding a
> > shrinker interface to free extra slots when memory is tight.
> > 
> > Also *always* allow NFSD_MAX_SLOTS_PER_SESSION slot pointers per
> > session.  This allows the session to be allocated before we know how
> > many slots we will successfully allocate, and will be useful when we
> > starting dynamically increasing the number of allocated slots.
> > 
> > Signed-off-by: NeilBrown <neilb@xxxxxxx>
> > ---
> >  fs/nfsd/nfs4state.c | 91 +++++++--------------------------------------
> >  fs/nfsd/nfsd.h      |  3 --
> >  fs/nfsd/nfssvc.c    | 32 ----------------
> >  fs/nfsd/state.h     |  2 +-
> >  4 files changed, 14 insertions(+), 114 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 551d2958ec29..2dcba0c83c10 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -1935,59 +1935,6 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
> >  	return size + sizeof(struct nfsd4_slot);
> >  }
> >  
> > -/*
> > - * XXX: If we run out of reserved DRC memory we could (up to a point)
> > - * re-negotiate active sessions and reduce their slot usage to make
> > - * room for new connections. For now we just fail the create session.
> > - */
> > -static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
> > -{
> > -	u32 slotsize = slot_bytes(ca);
> > -	u32 num = ca->maxreqs;
> > -	unsigned long avail, total_avail;
> > -	unsigned int scale_factor;
> > -
> > -	spin_lock(&nfsd_drc_lock);
> > -	if (nfsd_drc_max_mem > nfsd_drc_mem_used)
> > -		total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
> > -	else
> > -		/* We have handed out more space than we chose in
> > -		 * set_max_drc() to allow.  That isn't really a
> > -		 * problem as long as that doesn't make us think we
> > -		 * have lots more due to integer overflow.
> > -		 */
> > -		total_avail = 0;
> > -	avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
> > -	/*
> > -	 * Never use more than a fraction of the remaining memory,
> > -	 * unless it's the only way to give this client a slot.
> > -	 * The chosen fraction is either 1/8 or 1/number of threads,
> > -	 * whichever is smaller.  This ensures there are adequate
> > -	 * slots to support multiple clients per thread.
> > -	 * Give the client one slot even if that would require
> > -	 * over-allocation--it is better than failure.
> > -	 */
> > -	scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
> > -
> > -	avail = clamp_t(unsigned long, avail, slotsize,
> > -			total_avail/scale_factor);
> > -	num = min_t(int, num, avail / slotsize);
> > -	num = max_t(int, num, 1);
> > -	nfsd_drc_mem_used += num * slotsize;
> > -	spin_unlock(&nfsd_drc_lock);
> > -
> > -	return num;
> > -}
> > -
> > -static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca)
> > -{
> > -	int slotsize = slot_bytes(ca);
> > -
> > -	spin_lock(&nfsd_drc_lock);
> > -	nfsd_drc_mem_used -= slotsize * ca->maxreqs;
> > -	spin_unlock(&nfsd_drc_lock);
> > -}
> > -
> >  static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
> >  					   struct nfsd4_channel_attrs *battrs)
> >  {
> > @@ -1996,26 +1943,27 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
> >  	struct nfsd4_session *new;
> >  	int i;
> >  
> > -	BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION)
> > -		     > PAGE_SIZE);
> > +	BUILD_BUG_ON(sizeof(new) > PAGE_SIZE);
> >  
> > -	new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL);
> > +	new = kzalloc(sizeof(*new), GFP_KERNEL);
> >  	if (!new)
> >  		return NULL;
> >  	/* allocate each struct nfsd4_slot and data cache in one piece */
> > -	for (i = 0; i < numslots; i++) {
> > -		new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL);
> > +	new->se_slots[0] = kzalloc(slotsize, GFP_KERNEL);
> > +	if (!new->se_slots[0])
> > +		goto out_free;
> > +
> > +	for (i = 1; i < numslots; i++) {
> > +		new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL | __GFP_NORETRY);
> >  		if (!new->se_slots[i])
> > -			goto out_free;
> > +			break;
> >  	}
> > -
> > +	fattrs->maxreqs = i;
> >  	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> >  	memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs));
> >  
> >  	return new;
> >  out_free:
> > -	while (i--)
> > -		kfree(new->se_slots[i]);
> >  	kfree(new);
> >  	return NULL;
> >  }
> > @@ -2128,7 +2076,6 @@ static void __free_session(struct nfsd4_session *ses)
> >  static void free_session(struct nfsd4_session *ses)
> >  {
> >  	nfsd4_del_conns(ses);
> > -	nfsd4_put_drc_mem(&ses->se_fchannel);
> >  	__free_session(ses);
> >  }
> >  
> > @@ -3748,17 +3695,6 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
> >  	ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
> >  			NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
> >  	ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
> > -	/*
> > -	 * Note decreasing slot size below client's request may make it
> > -	 * difficult for client to function correctly, whereas
> > -	 * decreasing the number of slots will (just?) affect
> > -	 * performance.  When short on memory we therefore prefer to
> > -	 * decrease number of slots instead of their size.  Clients that
> > -	 * request larger slots than they need will get poor results:
> > -	 * Note that we always allow at least one slot, because our
> > -	 * accounting is soft and provides no guarantees either way.
> > -	 */
> > -	ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
> >  
> >  	return nfs_ok;
> >  }
> > @@ -3836,11 +3772,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
> >  		return status;
> >  	status = check_backchannel_attrs(&cr_ses->back_channel);
> >  	if (status)
> > -		goto out_release_drc_mem;
> > +		goto out_err;
> >  	status = nfserr_jukebox;
> >  	new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel);
> >  	if (!new)
> > -		goto out_release_drc_mem;
> > +		goto out_err;
> >  	conn = alloc_conn_from_crses(rqstp, cr_ses);
> >  	if (!conn)
> >  		goto out_free_session;
> > @@ -3950,8 +3886,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
> >  		expire_client(old);
> >  out_free_session:
> >  	__free_session(new);
> > -out_release_drc_mem:
> > -	nfsd4_put_drc_mem(&cr_ses->fore_channel);
> > +out_err:
> >  	return status;
> >  }
> >  
> > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> > index 4b56ba1e8e48..3eb21e63b921 100644
> > --- a/fs/nfsd/nfsd.h
> > +++ b/fs/nfsd/nfsd.h
> > @@ -88,9 +88,6 @@ struct nfsd_genl_rqstp {
> >  extern struct svc_program	nfsd_programs[];
> >  extern const struct svc_version	nfsd_version2, nfsd_version3, nfsd_version4;
> >  extern struct mutex		nfsd_mutex;
> > -extern spinlock_t		nfsd_drc_lock;
> > -extern unsigned long		nfsd_drc_max_mem;
> > -extern unsigned long		nfsd_drc_mem_used;
> >  extern atomic_t			nfsd_th_cnt;		/* number of available threads */
> >  
> >  extern const struct seq_operations nfs_exports_op;
> > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > index 49e2f32102ab..3dbaefc96608 100644
> > --- a/fs/nfsd/nfssvc.c
> > +++ b/fs/nfsd/nfssvc.c
> > @@ -70,16 +70,6 @@ static __be32			nfsd_init_request(struct svc_rqst *,
> >   */
> >  DEFINE_MUTEX(nfsd_mutex);
> >  
> > -/*
> > - * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
> > - * nfsd_drc_max_pages limits the total amount of memory available for
> > - * version 4.1 DRC caches.
> > - * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
> > - */
> > -DEFINE_SPINLOCK(nfsd_drc_lock);
> > -unsigned long	nfsd_drc_max_mem;
> > -unsigned long	nfsd_drc_mem_used;
> > -
> >  #if IS_ENABLED(CONFIG_NFS_LOCALIO)
> >  static const struct svc_version *localio_versions[] = {
> >  	[1] = &localio_version1,
> > @@ -575,27 +565,6 @@ void nfsd_reset_versions(struct nfsd_net *nn)
> >  		}
> >  }
> >  
> > -/*
> > - * Each session guarantees a negotiated per slot memory cache for replies
> > - * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated
> > - * NFSv4.1 server might want to use more memory for a DRC than a machine
> > - * with mutiple services.
> > - *
> > - * Impose a hard limit on the number of pages for the DRC which varies
> > - * according to the machines free pages. This is of course only a default.
> > - *
> > - * For now this is a #defined shift which could be under admin control
> > - * in the future.
> > - */
> > -static void set_max_drc(void)
> > -{
> > -	#define NFSD_DRC_SIZE_SHIFT	7
> > -	nfsd_drc_max_mem = (nr_free_buffer_pages()
> > -					>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
> > -	nfsd_drc_mem_used = 0;
> > -	dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem);
> > -}
> > -
> >  static int nfsd_get_default_max_blksize(void)
> >  {
> >  	struct sysinfo i;
> > @@ -678,7 +647,6 @@ int nfsd_create_serv(struct net *net)
> >  	nn->nfsd_serv = serv;
> >  	spin_unlock(&nfsd_notifier_lock);
> >  
> > -	set_max_drc();
> >  	/* check if the notifier is already set */
> >  	if (atomic_inc_return(&nfsd_notifier_refcount) == 1) {
> >  		register_inetaddr_notifier(&nfsd_inetaddr_notifier);
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index 35b3564c065f..c052e9eea81b 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -310,7 +310,7 @@ struct nfsd4_session {
> >  	struct list_head	se_conns;
> >  	u32			se_cb_prog;
> >  	u32			se_cb_seq_nr;
> > -	struct nfsd4_slot	*se_slots[];	/* forward channel slots */
> > +	struct nfsd4_slot	*se_slots[NFSD_MAX_SLOTS_PER_SESSION];	/* forward channel slots */
> >  };
> >  
> >  /* formatted contents of nfs4_sessionid */
> 
> I like this patch overall. One thing we could consider is allocating
> slots aggressively with GFP_KERNEL up to a particular threshold, and
> then switch to GFP_NOWAIT, but this seems like a reasonable place to
> start.

At this point (when creating the session) it is __GFP_NORETRY , not
GET_NOWAIT.  So it makes a decent effort but does give up earlier than
GFP_KERNEL. 

What threshold should we use?  "1" seemed good to me as 1 is essential
but more is just for improved performance: wait for the client to
actually use them.

I'd rather allocate JIT than ASAP.

Thanks,
NeilBrown