Re: [Cluster-devel] [PATCH 2/2] GFS2: Add a gfs2-specific prune_icache_sb

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

I think the idea looks good. A couple of comments below though...

On 24/06/16 20:50, Bob Peterson wrote:
This patch adds a new prune_icache_sb function for the VFS slab
shrinker to call. Trying to directly free the inodes from memory
might deadlock because it evicts inodes, which calls into DLM
to acquire the glock. The DLM, in turn, may block on a pending
fence operation, which may already be blocked on memory allocation
that caused the slab shrinker to be called in the first place.

Signed-off-by: Bob Peterson <rpeterso@xxxxxxxxxx>
---
  fs/gfs2/incore.h     |  2 ++
  fs/gfs2/ops_fstype.c |  1 +
  fs/gfs2/quota.c      | 25 +++++++++++++++++++++++++
  fs/gfs2/super.c      | 13 +++++++++++++
  4 files changed, 41 insertions(+)

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a6a3389..a367459 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -757,6 +757,8 @@ struct gfs2_sbd {
struct task_struct *sd_logd_process;
  	struct task_struct *sd_quotad_process;
+	int sd_iprune; /* inodes to prune */
+	spinlock_t sd_shrinkspin;
/* Quota stuff */ diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 4546360..65a69be 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -95,6 +95,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
  	spin_lock_init(&sdp->sd_jindex_spin);
  	mutex_init(&sdp->sd_jindex_mutex);
  	init_completion(&sdp->sd_journal_ready);
+	spin_lock_init(&sdp->sd_shrinkspin);
INIT_LIST_HEAD(&sdp->sd_quota_list);
  	mutex_init(&sdp->sd_quota_mutex);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index ce7d69a..5810a2c 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1528,14 +1528,39 @@ void gfs2_wake_up_statfs(struct gfs2_sbd *sdp) {
  int gfs2_quotad(void *data)
  {
  	struct gfs2_sbd *sdp = data;
+	struct super_block *sb = sdp->sd_vfs;
  	struct gfs2_tune *tune = &sdp->sd_tune;
  	unsigned long statfs_timeo = 0;
  	unsigned long quotad_timeo = 0;
  	unsigned long t = 0;
  	DEFINE_WAIT(wait);
  	int empty;
+	int rc;
+	struct shrink_control sc = {.gfp_mask = GFP_KERNEL, };
while (!kthread_should_stop()) {
+		/* TODO: Deal with shrinking of dcache */
+		/* Prune any inode cache intended by the shrinker. */
+		spin_lock(&sdp->sd_shrinkspin);
+		if (sdp->sd_iprune > 0) {
+			sc.nr_to_scan = sdp->sd_iprune;
+			if (sc.nr_to_scan > 1024)
+				sc.nr_to_scan = 1024;
+			sdp->sd_iprune -= sc.nr_to_scan;
+			spin_unlock(&sdp->sd_shrinkspin);
+			rc = prune_icache_sb(sb, &sc);
+			if (rc < 0) {
+				spin_lock(&sdp->sd_shrinkspin);
+				sdp->sd_iprune = 0;
+				spin_unlock(&sdp->sd_shrinkspin);
+			}
+			if (sdp->sd_iprune) {
+				cond_resched();
+				continue;
+			}
+		} else {
+			spin_unlock(&sdp->sd_shrinkspin);
+		}
/* Update the master statfs file */
  		if (sdp->sd_statfs_force_sync) {
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 9b2ff353..75e8a85 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1667,6 +1667,18 @@ static void gfs2_destroy_inode(struct inode *inode)
  	call_rcu(&inode->i_rcu, gfs2_i_callback);
  }
+static long gfs2_prune_icache_sb(struct super_block *sb,
+				 struct shrink_control *sc)
+{
+	struct gfs2_sbd *sdp;
+
+	sdp = sb->s_fs_info;
+	spin_lock(&sdp->sd_shrinkspin);
+	sdp->sd_iprune = sc->nr_to_scan + 1;
+	spin_unlock(&sdp->sd_shrinkspin);
+	return 0;
+}
This doesn't wake up the thread that will do the reclaim, so that there may be a significant delay between the request to shrink and the actual shrink. Also, using quotad is not a good plan, since it might itself block waiting for memory. This should be done by a thread on its own to avoid any deadlock possibility here.

There also appears to be a limit of 1024 to scan per run of quotad, which means it would take a very long time to push out any significant number, and it does seem a bit arbitrary - was there a reason for selecting that number? It would probably be better to simply yield every now and then if there are a lot of items to process,

Steve.

+
  const struct super_operations gfs2_super_ops = {
  	.alloc_inode		= gfs2_alloc_inode,
  	.destroy_inode		= gfs2_destroy_inode,
@@ -1681,5 +1693,6 @@ const struct super_operations gfs2_super_ops = {
  	.remount_fs		= gfs2_remount_fs,
  	.drop_inode		= gfs2_drop_inode,
  	.show_options		= gfs2_show_options,
+	.prune_icache_sb	= gfs2_prune_icache_sb,
  };

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux