Re: [BUG] cgroup writeback list corruption

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello, Tahsin.

FYI, this is the preliminary patch I'm trying to test.  I'm running
the repro case w/o the patch but the bug hasn't triggered yet.

Thanks.

Index: work/include/linux/backing-dev.h
===================================================================
--- work.orig/include/linux/backing-dev.h
+++ work/include/linux/backing-dev.h
@@ -362,6 +362,26 @@ static inline struct bdi_writeback *inod
 	return inode->i_wb;
 }
 
+static inline struct bdi_writeback *
+unlocked_inode_to_wb_lock_list(struct inode *inode)
+{
+	struct bdi_writeback *wb;
+
+	rcu_read_lock();
+
+	while (true) {
+		wb = inode->i_wb;
+		spin_lock(&wb->list_lock);
+		if (likely(wb == inode->i_wb))
+			break;
+		spin_unlock(&wb->list_lock);
+		cpu_relax();
+	}
+
+	rcu_read_unlock();
+	return wb;
+}
+
 /**
  * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction
  * @inode: target inode
@@ -374,8 +394,7 @@ static inline struct bdi_writeback *inod
  * unlocked_inode_to_wb_end().
  *
  * The caller must call unlocked_inode_to_wb_end() with *@lockdep
- * afterwards and can't sleep during transaction.  IRQ may or may not be
- * disabled on return.
+ * afterwards and can't sleep during transaction.
  */
 static inline struct bdi_writeback *
 unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
@@ -388,14 +407,16 @@ unlocked_inode_to_wb_begin(struct inode
 	 */
 	*lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
 
-	if (unlikely(*lockedp))
-		spin_lock_irq(&inode->i_mapping->tree_lock);
-
 	/*
-	 * Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock.
+	 * Protected by either !I_WB_SWITCH + rcu_read_lock(),
 	 * inode_to_wb() will bark.  Deref directly.
 	 */
-	return inode->i_wb;
+	if (likely(!*lockedp))
+		return inode->i_wb;
+
+	rcu_read_unlock();
+
+	return unlocked_inode_to_wb_lock_list(inode);
 }
 
 /**
@@ -405,10 +426,10 @@ unlocked_inode_to_wb_begin(struct inode
  */
 static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
 {
-	if (unlikely(locked))
-		spin_unlock_irq(&inode->i_mapping->tree_lock);
-
-	rcu_read_unlock();
+	if (likely(!locked))
+		rcu_read_unlock();
+	else
+		spin_unlock(&inode->i_wb->list_lock);
 }
 
 #else	/* CONFIG_CGROUP_WRITEBACK */
@@ -453,6 +474,15 @@ static inline struct bdi_writeback *inod
 }
 
 static inline struct bdi_writeback *
+unlocked_inode_to_wb_lock_list(struct inode *inode)
+{
+	struct bdi_writeback *wb = inode_to_wb(inode);
+
+	spin_lock(&wb->list_lock);
+	return wb;
+}
+
+static inline struct bdi_writeback *
 unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 {
 	return inode_to_wb(inode);
Index: work/fs/fs-writeback.c
===================================================================
--- work.orig/fs/fs-writeback.c
+++ work/fs/fs-writeback.c
@@ -1309,10 +1309,10 @@ __writeback_single_inode(struct inode *i
  * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
  * and does more profound writeback list handling in writeback_sb_inodes().
  */
-static int
-writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
-		       struct writeback_control *wbc)
+static int writeback_single_inode(struct inode *inode,
+				  struct writeback_control *wbc)
 {
+	struct bdi_writeback *wb;
 	int ret = 0;
 
 	spin_lock(&inode->i_lock);
@@ -1350,7 +1350,8 @@ writeback_single_inode(struct inode *ino
 	ret = __writeback_single_inode(inode, wbc);
 
 	wbc_detach_inode(wbc);
-	spin_lock(&wb->list_lock);
+
+	wb = unlocked_inode_to_wb_lock_list(inode);
 	spin_lock(&inode->i_lock);
 	/*
 	 * If inode is clean, remove it from writeback lists. Otherwise don't
@@ -1515,8 +1516,7 @@ static long writeback_sb_inodes(struct s
 			cond_resched();
 		}
 
-
-		spin_lock(&wb->list_lock);
+		wb = unlocked_inode_to_wb_lock_list(inode);
 		spin_lock(&inode->i_lock);
 		if (!(inode->i_state & I_DIRTY_ALL))
 			wrote++;
@@ -2310,7 +2310,6 @@ EXPORT_SYMBOL(sync_inodes_sb);
  */
 int write_inode_now(struct inode *inode, int sync)
 {
-	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
 	struct writeback_control wbc = {
 		.nr_to_write = LONG_MAX,
 		.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
@@ -2322,7 +2321,7 @@ int write_inode_now(struct inode *inode,
 		wbc.nr_to_write = 0;
 
 	might_sleep();
-	return writeback_single_inode(inode, wb, &wbc);
+	return writeback_single_inode(inode, &wbc);
 }
 EXPORT_SYMBOL(write_inode_now);
 
@@ -2339,7 +2338,7 @@ EXPORT_SYMBOL(write_inode_now);
  */
 int sync_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc);
+	return writeback_single_inode(inode, wbc);
 }
 EXPORT_SYMBOL(sync_inode);
 
--
To unsubscribe from this list: send the line "unsubscribe cgroups" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]     [Monitors]

  Powered by Linux