[RFC][PATCH] Re: shrink_dcache_sb scalability problem.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Apr 18, 2006 at 10:29:28AM +1000, David Chinner wrote:
> 
> The other thing that I thought of over the weekend is per-node LRU
> lists and a lock per node This will reduce the length of the lists,
> allow some parallelism even while we scan and purge each list
> using the existing algorithm, and not completely destroy the LRU-ness
> of the dcache.

Here's a patch that does that. It's rough, but it boots and i've run
some basic tests against it.  It doesn't survive dbench with 200
processes, though, as it crashes in prune_one_dentry() with a
corrupted d_child.

The logic in prune_dcache() is pretty grotesque atm, and I
doubt it's correct. Comments on how to fix it are welcome ;)

Cheers,

Dave.
-- 
Dave Chinner
R&D Software Enginner
SGI Australian Software Group

Make the dentry unused lists per-node.

Signed-off-by: Dave Chinner <dgc@xxxxxxx>

Index: 2.6.x-xfs-new/fs/dcache.c
===================================================================
--- 2.6.x-xfs-new.orig/fs/dcache.c	2006-04-18 10:35:10.000000000 +1000
+++ 2.6.x-xfs-new/fs/dcache.c	2006-04-18 22:23:04.603552947 +1000
@@ -62,7 +62,6 @@ static kmem_cache_t *dentry_cache; 
 static unsigned int d_hash_mask;
 static unsigned int d_hash_shift;
 static struct hlist_head *dentry_hashtable;
-static LIST_HEAD(dentry_unused);
 
 /* Statistics gathering. */
 struct dentry_stat_t dentry_stat = {
@@ -114,6 +113,72 @@ static void dentry_iput(struct dentry * 
 	}
 }
 
+static void
+dentry_unused_add(struct dentry *dentry)
+{
+	pg_data_t *pgdat = page_zone(virt_to_page(dentry))->zone_pgdat;
+
+	spin_lock(&pgdat->dentry_unused_lock);
+	list_add(&dentry->d_lru, &pgdat->dentry_unused);
+	spin_unlock(&pgdat->dentry_unused_lock);
+	dentry_stat.nr_unused++;
+}
+
+static void
+dentry_unused_add_tail(struct dentry *dentry)
+{
+	pg_data_t *pgdat = page_zone(virt_to_page(dentry))->zone_pgdat;
+
+	spin_lock(&pgdat->dentry_unused_lock);
+	list_add_tail(&dentry->d_lru, &pgdat->dentry_unused);
+	spin_unlock(&pgdat->dentry_unused_lock);
+	dentry_stat.nr_unused++;
+}
+
+/*
+ * Assumes external locks are already held
+ */
+static void
+dentry_unused_move(struct dentry *dentry, struct list_head *head)
+{
+	list_del(&dentry->d_lru);
+	list_add(&dentry->d_lru, head);
+}
+
+static void
+dentry_unused_del(struct dentry *dentry)
+{
+	if (!list_empty(&dentry->d_lru)) {
+		pg_data_t *pgdat = page_zone(virt_to_page(dentry))->zone_pgdat;
+
+		spin_lock(&pgdat->dentry_unused_lock);
+		list_del(&dentry->d_lru);
+		spin_unlock(&pgdat->dentry_unused_lock);
+		dentry_stat.nr_unused--;
+	}
+}
+
+static inline void
+__dentry_unused_del_init(struct dentry *dentry)
+{
+	if (likely(!list_empty(&dentry->d_lru)))
+		list_del_init(&dentry->d_lru);
+
+}
+
+static void
+dentry_unused_del_init(struct dentry *dentry)
+{
+	if (!list_empty(&dentry->d_lru)) {
+		pg_data_t *pgdat = page_zone(virt_to_page(dentry))->zone_pgdat;
+
+		spin_lock(&pgdat->dentry_unused_lock);
+		__dentry_unused_del_init(dentry);
+		spin_unlock(&pgdat->dentry_unused_lock);
+		dentry_stat.nr_unused--;
+	}
+}
+
 /* 
  * This is dput
  *
@@ -173,8 +238,7 @@ repeat:
 		goto kill_it;
   	if (list_empty(&dentry->d_lru)) {
   		dentry->d_flags |= DCACHE_REFERENCED;
-  		list_add(&dentry->d_lru, &dentry_unused);
-  		dentry_stat.nr_unused++;
+		dentry_unused_add(dentry);
   	}
  	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
@@ -186,13 +250,8 @@ unhash_it:
 kill_it: {
 		struct dentry *parent;
 
-		/* If dentry was on d_lru list
-		 * delete it from there
-		 */
-  		if (!list_empty(&dentry->d_lru)) {
-  			list_del(&dentry->d_lru);
-  			dentry_stat.nr_unused--;
-  		}
+		/* If dentry was on d_lru list delete it from there */
+		dentry_unused_del(dentry);
   		list_del(&dentry->d_u.d_child);
 		dentry_stat.nr_dentry--;	/* For d_free, below */
 		/*drops the locks, at that point nobody can reach this dentry */
@@ -268,10 +327,7 @@ int d_invalidate(struct dentry * dentry)
 static inline struct dentry * __dget_locked(struct dentry *dentry)
 {
 	atomic_inc(&dentry->d_count);
-	if (!list_empty(&dentry->d_lru)) {
-		dentry_stat.nr_unused--;
-		list_del_init(&dentry->d_lru);
-	}
+	dentry_unused_del_init(dentry);
 	return dentry;
 }
 
@@ -392,42 +448,73 @@ static inline void prune_one_dentry(stru
  
 static void prune_dcache(int count)
 {
-	spin_lock(&dcache_lock);
-	for (; count ; count--) {
-		struct dentry *dentry;
-		struct list_head *tmp;
-
-		cond_resched_lock(&dcache_lock);
-
-		tmp = dentry_unused.prev;
-		if (tmp == &dentry_unused)
-			break;
-		list_del_init(tmp);
-		prefetch(dentry_unused.prev);
- 		dentry_stat.nr_unused--;
-		dentry = list_entry(tmp, struct dentry, d_lru);
+	int node_id = numa_node_id();
+	int	scan_low = 0;
+	int	c = count;
+	pg_data_t *pgdat;
+
+rescan:
+	for_each_pgdat(pgdat) {
+		if (!scan_low) {
+			if (pgdat->node_id < node_id)
+				continue;
+		} else {
+			if (pgdat->node_id >= node_id)
+				break;
+		}
+		for (c = count; c ; c--) {
+			struct dentry *dentry;
+			struct list_head *tmp;
+			spin_lock(&pgdat->dentry_unused_lock);
+
+			tmp = pgdat->dentry_unused.prev;
+			if (tmp == &pgdat->dentry_unused) {
+				spin_unlock(&pgdat->dentry_unused_lock);
+				break;
+			}
+			dentry = list_entry(tmp, struct dentry, d_lru);
+			__dentry_unused_del_init(dentry);
+			prefetch(&pgdat->dentry_unused.prev);
+			spin_unlock(&pgdat->dentry_unused_lock);
 
- 		spin_lock(&dentry->d_lock);
+			spin_lock(&dcache_lock);
+			spin_lock(&dentry->d_lock);
+			/*
+			 * We found an inuse dentry which was not removed from
+			 * dentry_unused because of laziness during lookup or
+			 * a dentry that has just been put back on the unused
+			 * list.  Do not free it - just leave it where it is.
+			 */
+			if (atomic_read(&dentry->d_count) ||
+			    !list_empty(&dentry->d_lru)) {
+				spin_unlock(&dentry->d_lock);
+				spin_unlock(&dcache_lock);
+				continue;
+			}
+			/* If the dentry was recently referenced, don't free it. */
+			if (dentry->d_flags & DCACHE_REFERENCED) {
+				dentry->d_flags &= ~DCACHE_REFERENCED;
+				dentry_unused_add(dentry);
+				spin_unlock(&dentry->d_lock);
+				spin_unlock(&dcache_lock);
+				continue;
+			}
+			prune_one_dentry(dentry);
+			spin_unlock(&dcache_lock);
+		}
 		/*
-		 * We found an inuse dentry which was not removed from
-		 * dentry_unused because of laziness during lookup.  Do not free
-		 * it - just keep it off the dentry_unused list.
+		 * shrink_parent needs to scan each list, and if it only
+		 * calls in with one count then we may never find it. So
+		 * if count ==1, scan each list once.
 		 */
- 		if (atomic_read(&dentry->d_count)) {
- 			spin_unlock(&dentry->d_lock);
-			continue;
-		}
-		/* If the dentry was recently referenced, don't free it. */
-		if (dentry->d_flags & DCACHE_REFERENCED) {
-			dentry->d_flags &= ~DCACHE_REFERENCED;
- 			list_add(&dentry->d_lru, &dentry_unused);
- 			dentry_stat.nr_unused++;
- 			spin_unlock(&dentry->d_lock);
-			continue;
-		}
-		prune_one_dentry(dentry);
+		if (count == 1)
+			c = 1;
+		if (!c)
+			break;
 	}
-	spin_unlock(&dcache_lock);
+	if (c && scan_low++ == 0)
+		goto rescan;
+
 }
 
 /*
@@ -456,39 +543,66 @@ void shrink_dcache_sb(struct super_block
 {
 	struct list_head *tmp, *next;
 	struct dentry *dentry;
+	pg_data_t *pgdat;
+	int found;
 
-	/*
-	 * Pass one ... move the dentries for the specified
-	 * superblock to the most recent end of the unused list.
-	 */
-	spin_lock(&dcache_lock);
-	list_for_each_safe(tmp, next, &dentry_unused) {
-		dentry = list_entry(tmp, struct dentry, d_lru);
-		if (dentry->d_sb != sb)
-			continue;
-		list_del(tmp);
-		list_add(tmp, &dentry_unused);
-	}
+	for_each_pgdat(pgdat) {
+		found = 0;
+		spin_lock(&pgdat->dentry_unused_lock);
+		/*
+		 * Pass one ... move the dentries for the specified
+		 * superblock to the most recent end of the unused list.
+		 */
+		list_for_each_safe(tmp, next, &pgdat->dentry_unused) {
+			dentry = list_entry(tmp, struct dentry, d_lru);
+			if (dentry->d_sb != sb)
+				continue;
+			dentry_unused_move(dentry, &pgdat->dentry_unused);
+			found++;
+		}
+		spin_unlock(&pgdat->dentry_unused_lock);
 
-	/*
-	 * Pass two ... free the dentries for this superblock.
-	 */
-repeat:
-	list_for_each_safe(tmp, next, &dentry_unused) {
-		dentry = list_entry(tmp, struct dentry, d_lru);
-		if (dentry->d_sb != sb)
-			continue;
-		dentry_stat.nr_unused--;
-		list_del_init(tmp);
-		spin_lock(&dentry->d_lock);
-		if (atomic_read(&dentry->d_count)) {
-			spin_unlock(&dentry->d_lock);
+		/*
+		 * Pass two ... free the dentries for this superblock.
+		 * use the output of the first pass to determine if we need
+		 * to run this pass.
+		 */
+		if (!found)
 			continue;
+repeat:
+		spin_lock(&pgdat->dentry_unused_lock);
+		list_for_each_safe(tmp, next, &pgdat->dentry_unused) {
+			dentry = list_entry(tmp, struct dentry, d_lru);
+			if (dentry->d_sb != sb)
+				continue;
+			__dentry_unused_del_init(dentry);
+
+			/*
+			 * We snoop on the d_count here so we can skip
+			 * dentries we can obviously not free right now
+			 * without dropping the list lock. This prevents us
+			 * from getting stuck on an in-use dentry on the unused
+			 * list.
+			 */
+			if (atomic_read(&dentry->d_count))
+				continue;
+
+			spin_unlock(&pgdat->dentry_unused_lock);
+			spin_lock(&dcache_lock);
+			spin_lock(&dentry->d_lock);
+			if (atomic_read(&dentry->d_count) ||
+			    (dentry->d_sb != sb) ||
+			    !list_empty(&dentry->d_lru)) {
+				spin_unlock(&dentry->d_lock);
+				spin_unlock(&dcache_lock);
+				goto repeat;
+			}
+			prune_one_dentry(dentry);
+			spin_unlock(&dcache_lock);
+			goto repeat;
 		}
-		prune_one_dentry(dentry);
-		goto repeat;
+		spin_unlock(&pgdat->dentry_unused_lock);
 	}
-	spin_unlock(&dcache_lock);
 }
 
 /*
@@ -572,17 +686,13 @@ resume:
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
 
-		if (!list_empty(&dentry->d_lru)) {
-			dentry_stat.nr_unused--;
-			list_del_init(&dentry->d_lru);
-		}
+		dentry_unused_del_init(dentry);
 		/* 
 		 * move only zero ref count dentries to the end 
 		 * of the unused list for prune_dcache
 		 */
 		if (!atomic_read(&dentry->d_count)) {
-			list_add(&dentry->d_lru, dentry_unused.prev);
-			dentry_stat.nr_unused++;
+			dentry_unused_add_tail(dentry);
 			found++;
 		}
 
@@ -657,18 +767,14 @@ void shrink_dcache_anon(struct hlist_hea
 		spin_lock(&dcache_lock);
 		hlist_for_each(lp, head) {
 			struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
-			if (!list_empty(&this->d_lru)) {
-				dentry_stat.nr_unused--;
-				list_del_init(&this->d_lru);
-			}
 
+			dentry_unused_del_init(this);
 			/* 
 			 * move only zero ref count dentries to the end 
 			 * of the unused list for prune_dcache
 			 */
 			if (!atomic_read(&this->d_count)) {
-				list_add_tail(&this->d_lru, &dentry_unused);
-				dentry_stat.nr_unused++;
+				dentry_unused_add_tail(this);
 				found++;
 			}
 		}
@@ -1673,6 +1779,12 @@ static void __init dcache_init_early(voi
 static void __init dcache_init(unsigned long mempages)
 {
 	int loop;
+	pg_data_t *pgdat;
+
+	for_each_pgdat(pgdat) {
+		spin_lock_init(&pgdat->dentry_unused_lock);
+		INIT_LIST_HEAD(&pgdat->dentry_unused);
+	}
 
 	/* 
 	 * A constructor could be added for stable state like the lists,
Index: 2.6.x-xfs-new/include/linux/mmzone.h
===================================================================
--- 2.6.x-xfs-new.orig/include/linux/mmzone.h	2006-02-06 11:57:55.000000000 +1100
+++ 2.6.x-xfs-new/include/linux/mmzone.h	2006-04-18 18:04:22.378952121 +1000
@@ -311,6 +311,8 @@ typedef struct pglist_data {
 	wait_queue_head_t kswapd_wait;
 	struct task_struct *kswapd;
 	int kswapd_max_order;
+	spinlock_t dentry_unused_lock;
+	struct list_head dentry_unused;
 } pg_data_t;
 
 #define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux