Re: [PATCH 6/6] nfsd: add shrinker to reduce number of slots allocated per session

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 12/8/24 5:43 PM, NeilBrown wrote:
Add a shrinker which frees unused slots and may ask the clients to use
fewer slots on each session.

We keep a global count of the number of freeable slots, which is the sum
of one less than the current "target" slots in all sessions in all
clients in all net-namespaces. This number is reported by the shrinker.

When the shrinker is asked to free some, we call xxx on each session in
a round-robin asking each to reduce the slot count by 1.  This will
reduce the "target" so the number reported by the shrinker will reduce
immediately.  The memory will only be freed later when the client
confirmed that it is no longer needed.

We use a global list of sessions and move the "head" to after the last
session that we asked to reduce, so the next callback from the shrinker
will move on to the next session.  This pressure should be applied
"evenly" across all sessions over time.

Signed-off-by: NeilBrown <neilb@xxxxxxx>
---
  fs/nfsd/nfs4state.c | 71 ++++++++++++++++++++++++++++++++++++++++++---
  fs/nfsd/state.h     |  1 +
  2 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a2d1f97b8a0e..311f67418759 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1909,6 +1909,16 @@ gen_sessionid(struct nfsd4_session *ses)
   */
  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
+static struct shrinker *nfsd_slot_shrinker;
+static DEFINE_SPINLOCK(nfsd_session_list_lock);
+static LIST_HEAD(nfsd_session_list);
+/* The sum of "target_slots-1" on every session.  The shrinker can push this
+ * down, though it can take a little while for the memory to actually
+ * be freed.  The "-1" is because we can never free slot 0 while the
+ * session is active.
+ */
+static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0);
+
  static void
  free_session_slots(struct nfsd4_session *ses, int from)
  {
@@ -1930,8 +1940,11 @@ free_session_slots(struct nfsd4_session *ses, int from)
  		kfree(slot);
  	}
  	ses->se_fchannel.maxreqs = from;
-	if (ses->se_target_maxslots > from)
-		ses->se_target_maxslots = from;
+	if (ses->se_target_maxslots > from) {
+		int new_target = from ?: 1;
+		atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots);
+		ses->se_target_maxslots = new_target;
+	}
  }
/**
@@ -1949,7 +1962,7 @@ free_session_slots(struct nfsd4_session *ses, int from)
   * Return value:
   *   The number of slots that the target was reduced by.
   */
-static int __maybe_unused
+static int
  reduce_session_slots(struct nfsd4_session *ses, int dec)
  {
  	struct nfsd_net *nn = net_generic(ses->se_client->net,
@@ -1962,6 +1975,7 @@ reduce_session_slots(struct nfsd4_session *ses, int dec)
  		return ret;
  	ret = min(dec, ses->se_target_maxslots-1);
  	ses->se_target_maxslots -= ret;
+	atomic_sub(ret, &nfsd_total_target_slots);
  	ses->se_slot_gen += 1;
  	if (ses->se_slot_gen == 0) {
  		int i;
@@ -2021,6 +2035,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
  	fattrs->maxreqs = i;
  	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
  	new->se_target_maxslots = i;
+	atomic_add(i - 1, &nfsd_total_target_slots);
  	new->se_cb_slot_avail = ~0U;
  	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
  				      NFSD_BC_SLOT_TABLE_SIZE - 1);
@@ -2145,6 +2160,36 @@ static void free_session(struct nfsd4_session *ses)
  	__free_session(ses);
  }
+static unsigned long
+nfsd_slot_count(struct shrinker *s, struct shrink_control *sc)
+{
+	unsigned long cnt = atomic_read(&nfsd_total_target_slots);
+
+	return cnt ? cnt : SHRINK_EMPTY;
+}
+
+static unsigned long
+nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc)
+{
+	struct nfsd4_session *ses;
+	unsigned long scanned = 0;
+	unsigned long freed = 0;
+
+	spin_lock(&nfsd_session_list_lock);
+	list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) {
+		freed += reduce_session_slots(ses, 1);
+		scanned += 1;
+		if (scanned >= sc->nr_to_scan) {
+			/* Move starting point for next scan */
+			list_move(&nfsd_session_list, &ses->se_all_sessions);
+			break;
+		}
+	}
+	spin_unlock(&nfsd_session_list_lock);
+	sc->nr_scanned = scanned;
+	return freed;
+}
+
  static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
  {
  	int idx;
@@ -2169,6 +2214,10 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
  	list_add(&new->se_perclnt, &clp->cl_sessions);
  	spin_unlock(&clp->cl_lock);
+ spin_lock(&nfsd_session_list_lock);
+	list_add_tail(&new->se_all_sessions, &nfsd_session_list);
+	spin_unlock(&nfsd_session_list_lock);
+
  	{
  		struct sockaddr *sa = svc_addr(rqstp);
  		/*
@@ -2238,6 +2287,9 @@ unhash_session(struct nfsd4_session *ses)
  	spin_lock(&ses->se_client->cl_lock);
  	list_del(&ses->se_perclnt);
  	spin_unlock(&ses->se_client->cl_lock);
+	spin_lock(&nfsd_session_list_lock);
+	list_del(&ses->se_all_sessions);
+	spin_unlock(&nfsd_session_list_lock);
  }
/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
@@ -4380,6 +4432,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
  						GFP_NOWAIT))) {
  				s += 1;
  				session->se_fchannel.maxreqs = s;
+				atomic_add(s - session->se_target_maxslots,
+					   &nfsd_total_target_slots);
  				session->se_target_maxslots = s;
  			} else {
  				kfree(slot);
@@ -8776,7 +8830,6 @@ nfs4_state_start_net(struct net *net)
  }
/* initialization to perform when the nfsd service is started: */
-
  int
  nfs4_state_start(void)
  {
@@ -8786,6 +8839,15 @@ nfs4_state_start(void)
  	if (ret)
  		return ret;
+ nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot");
+	if (!nfsd_slot_shrinker) {
+		rhltable_destroy(&nfs4_file_rhltable);
+		return -ENOMEM;
+	}
+	nfsd_slot_shrinker->count_objects = nfsd_slot_count;
+	nfsd_slot_shrinker->scan_objects = nfsd_slot_scan;
+	shrinker_register(nfsd_slot_shrinker);
+
  	set_max_delegations();
  	return 0;
  }
@@ -8827,6 +8889,7 @@ void
  nfs4_state_shutdown(void)
  {
  	rhltable_destroy(&nfs4_file_rhltable);
+	shrinker_free(nfsd_slot_shrinker);
  }
static void
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 4251ff3c5ad1..f45aee751a10 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -325,6 +325,7 @@ struct nfsd4_session {
  	u32			se_cb_prog;
  	struct list_head	se_hash;	/* hash by sessionid */
  	struct list_head	se_perclnt;
+	struct list_head	se_all_sessions;/* global list of sessions */
  	struct nfs4_client	*se_client;
  	struct nfs4_sessionid	se_sessionid;
  	struct nfsd4_channel_attrs se_fchannel;

Bisected to this patch. Sometime during the pynfs NFSv4.1 server tests,
this list_del corruption splat is triggered:

[ 87.768277] list_del corruption. prev->next should be ff388b4606369638, but was 0000000000000000. (prev=ff388b4606368038)
[   87.771492] ------------[ cut here ]------------
[   87.772862] kernel BUG at lib/list_debug.c:62!
[   87.775029] Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
[ 87.777179] CPU: 2 UID: 0 PID: 940 Comm: nfsd Not tainted 6.13.0-rc2-g6139eb164177 #1 [ 87.780065] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-2.fc40 04/01/2014
[   87.783143] RIP: 0010:__list_del_entry_valid_or_report.cold+0x4f/0x9f
[ 87.785336] Code: c2 48 83 05 43 a7 13 04 01 e8 5e ba f9 ff 0f 0b 48 89 f2 48 89 fe 48 c7 c7 00 07 84 ae 48 83 05 0f a7 13 04 01 e8 42 ba f9 ff <0f> 0b 48 89 fe 48 89 ca 48 c7 c7 c8 06 84 ae 48 83 05 db a6 13 04
[   87.791467] RSP: 0018:ff4e1b1302de3d08 EFLAGS: 00010246
[ 87.793251] RAX: 000000000000006d RBX: ff388b4606369600 RCX: 0000000000000000 [ 87.795660] RDX: 0000000000000000 RSI: ff388b496fd21900 RDI: ff388b496fd21900 [ 87.798066] RBP: ff4e1b1302de3d08 R08: 0000000000000000 R09: 656e3e2d76657270 [ 87.800485] R10: 0000000000000029 R11: ff4e1b1302de3aa0 R12: ffffffffb0495580 [ 87.802884] R13: ff388b460dcee128 R14: 0000000000000001 R15: ffffffffb0495580 [ 87.805301] FS: 0000000000000000(0000) GS:ff388b496fd00000(0000) knlGS:0000000000000000
[   87.807992] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 87.809952] CR2: 00007f7424c42008 CR3: 0000000100f30001 CR4: 0000000000771ef0
[   87.811961] PKRU: 55555554
[   87.812699] Call Trace:
[   87.813380]  <TASK>
[   87.813966]  ? show_regs.cold+0x21/0x36
[   87.814990]  ? __die_body+0x2b/0xa0
[   87.815934]  ? __die+0x3c/0x4e
[   87.816669]  ? die+0x43/0x80
[   87.817297]  ? do_trap+0x11c/0x150
[   87.818008]  ? do_error_trap+0xbc/0x110
[   87.818797]  ? __list_del_entry_valid_or_report.cold+0x4f/0x9f
[   87.819955]  ? exc_invalid_op+0x6e/0x90
[   87.820747]  ? __list_del_entry_valid_or_report.cold+0x4f/0x9f
[   87.821904]  ? asm_exc_invalid_op+0x1f/0x30
[   87.822761]  ? __list_del_entry_valid_or_report.cold+0x4f/0x9f
[   87.823915]  ? __list_del_entry_valid_or_report.cold+0x4f/0x9f
[   87.825069]  nfsd4_destroy_session+0x280/0x430 [nfsd]
[   87.826230]  nfsd4_proc_compound+0x64d/0xcf0 [nfsd]
[   87.827141]  ? nfs4svc_decode_compoundargs+0x367/0x6c0 [nfsd]
[   87.827989]  nfsd_dispatch+0x16b/0x3d0 [nfsd]
[   87.828671]  svc_process_common+0x903/0xc80 [sunrpc]
[   87.829440]  ? __pfx_nfsd_dispatch+0x10/0x10 [nfsd]
[   87.830178]  svc_process+0x166/0x2e0 [sunrpc]
[   87.830868]  svc_recv+0xd65/0x12c0 [sunrpc]
[   87.831529]  ? __pfx_nfsd+0x10/0x10 [nfsd]
[   87.832160]  nfsd+0x10a/0x1b0 [nfsd]
[   87.832734]  kthread+0x149/0x1c0
[   87.833201]  ? __pfx_kthread+0x10/0x10
[   87.833737]  ret_from_fork+0x5e/0x80
[   87.834248]  ? __pfx_kthread+0x10/0x10
[   87.834786]  ret_from_fork_asm+0x1a/0x30
[   87.835349]  </TASK>


--
Chuck Lever




[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux