Re: [patch] fs: fix superblock iteration race

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jun 11, 2010 at 09:06:01AM -0700, Linus Torvalds wrote:
> On Fri, Jun 11, 2010 at 7:50 AM, Nick Piggin <npiggin@xxxxxxx> wrote:
> > Not sure if this is really the _cleanest_ way to fix it. But open coding
> > the list walking is a bit annoying too. And I couldn't see any real way to
> > make the list macro safe. Better ideas?
> 
> I really think we should open-code the list walking instead. You
> basically already are doing that, and in a very non-obvious way too
> (ie you are mixing the non-open-coded list walker with also explicitly
> playing with the internal variable for that magic walker.
> 
> So I would get rid of the 'list_for_each_entry_safe' entirely, and
> replace it with something like
> 
>    struct list_head *list;
> 
>    spin_lock(&sb_lock);
>    list = super_blocks->next;
>    while (list != &super_blocks) {
>       struct super_block *sb = list_entry(next, struct super_block, s_list);
>       list = list->next;
> 
>       if (list_empty(&sb->s_instances))
>          continue;
> 
>       if (!sb->s_nr_dentry_unused)
>          continue;
> 
>       sb->s_count++;
>       spin_unlock(&sb_lock);
> 
>       .... whatever ...
> 
>       spin_lock(&sb_lock);
>       /* We dropped the lock, need to re-load the next list entry */
>       list = sb->s_list.next;
>       __put_super(sb);
>    }
> 
> which isn't that much more complicated, now is it? Sure, it's
> open-coded, but at least it doesn't play games. And being open-coded,
> it's a lot more honest about the issue. Maybe even add a comment
> saying "we can't use the list_for_each[_safe]() macro, because we
> don't hold the lock and we're not the only ones that may delete
> things" explaining _why_ it's open-coded.
> 
> I dunno. Maybe Al disagrees. I just don't like using the "simple
> helpers" and then changing subtly how they work by knowing their
> internals.

Something like this
--

list_for_each_entry_safe is not suitable to protect against concurrent
modification of the list. 6754af6 introduced a race in sb walking.

list_for_each_entry can use the trick of pinning the current entry while we
drop and retake the lock because the iteration subsequently follows cur->next.
However list_for_each_entry_safe saves n=cur->next before entering the loop
body, so when the lock is dropped, n may be deleted.

Signed-off-by: Nick Piggin <npiggin@xxxxxxx>
---
 fs/dcache.c |   14 ++++++++++++--
 fs/super.c  |   51 +++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 57 insertions(+), 8 deletions(-)

Index: linux-2.6/fs/dcache.c
===================================================================
--- linux-2.6.orig/fs/dcache.c
+++ linux-2.6/fs/dcache.c
@@ -536,7 +536,7 @@ restart:
  */
 static void prune_dcache(int count)
 {
-	struct super_block *sb, *n;
+	struct list_head *list;
 	int w_count;
 	int unused = dentry_stat.nr_unused;
 	int prune_ratio;
@@ -549,8 +549,16 @@ static void prune_dcache(int count)
 		prune_ratio = 1;
 	else
 		prune_ratio = unused / count;
+
+	/* see iterate_supers for super_blocks iteration comments */
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list = super_blocks.next;
+	while (list != &super_blocks) {
+		struct super_block *sb;
+
+		sb = list_entry(list, struct super_block, s_list);
+		list = list->next;
+
 		if (list_empty(&sb->s_instances))
 			continue;
 		if (sb->s_nr_dentry_unused == 0)
@@ -590,6 +598,8 @@ static void prune_dcache(int count)
 			up_read(&sb->s_umount);
 		}
 		spin_lock(&sb_lock);
+		/* sb_lock dropped, must reload next */
+		list = sb->s_list.next;
 		count -= pruned;
 		__put_super(sb);
 		/* more work left to do? */
Index: linux-2.6/fs/super.c
===================================================================
--- linux-2.6.orig/fs/super.c
+++ linux-2.6/fs/super.c
@@ -358,10 +358,17 @@ EXPORT_SYMBOL(drop_super);
  */
 void sync_supers(void)
 {
-	struct super_block *sb, *n;
+	struct list_head *list;
 
+	/* see iterate_supers for super_blocks iteration comments */
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list = super_blocks.next;
+	while (list != &super_blocks) {
+		struct super_block *sb;
+
+		sb = list_entry(list, struct super_block, s_list);
+		list = list->next;
+
 		if (list_empty(&sb->s_instances))
 			continue;
 		if (sb->s_op->write_super && sb->s_dirt) {
@@ -374,6 +381,8 @@ void sync_supers(void)
 			up_read(&sb->s_umount);
 
 			spin_lock(&sb_lock);
+			/* sb_lock dropped, must reload next */
+			list = sb->s_list.next;
 			__put_super(sb);
 		}
 	}
@@ -390,10 +399,25 @@ void sync_supers(void)
  */
 void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 {
-	struct super_block *sb, *n;
+	struct list_head *list;
 
+	/*
+	 * Walk the list of super_blocks:
+	 * Cannot use list_for_each_entry because __put_super may delete
+	 * sb from the list.
+	 * Cannot use list_for_each_entry_safe because it loads both the
+	 * current and next list entries before the loop body. When dropping
+	 * the lock we have only pinned the current entry in the list, next
+	 * may be deleted.
+	 */
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list = super_blocks.next;
+	while (list != &super_blocks) {
+		struct super_block *sb;
+
+		sb = list_entry(list, struct super_block, s_list);
+		list = list->next;
+
 		if (list_empty(&sb->s_instances))
 			continue;
 		sb->s_count++;
@@ -405,6 +429,12 @@ void iterate_supers(void (*f)(struct sup
 		up_read(&sb->s_umount);
 
 		spin_lock(&sb_lock);
+		/*
+		 * sb_lock dropped, we must reload next entry. We can reload it
+		 * from sb because we have that element pinned in the list with
+		 * s_count.
+		 */
+		list = sb->s_list.next;
 		__put_super(sb);
 	}
 	spin_unlock(&sb_lock);
@@ -568,10 +598,17 @@ int do_remount_sb(struct super_block *sb
 
 static void do_emergency_remount(struct work_struct *work)
 {
-	struct super_block *sb, *n;
+	struct list_head *list;
 
+	/* see iterate_supers for super_blocks iteration comments */
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list = super_blocks.next;
+	while (list != &super_blocks) {
+		struct super_block *sb;
+
+		sb = list_entry(list, struct super_block, s_list);
+		list = list->next;
+
 		if (list_empty(&sb->s_instances))
 			continue;
 		sb->s_count++;
@@ -585,6 +622,8 @@ static void do_emergency_remount(struct
 		}
 		up_write(&sb->s_umount);
 		spin_lock(&sb_lock);
+		/* sb_lock dropped, must reload next */
+		list = sb->s_list.next;
 		__put_super(sb);
 	}
 	spin_unlock(&sb_lock);
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux