[PATCH] ceph: fix writeback thread waits on itself

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



In the case of -ENOSPC, writeback thread may wait on itself. The call
stack looks like:

  inode_wait_for_writeback+0x26/0x40
  evict+0xb5/0x1a0
  iput+0x1d2/0x220
  ceph_put_wrbuffer_cap_refs+0xe0/0x2c0 [ceph]
  writepages_finish+0x2d3/0x410 [ceph]
  __complete_request+0x26/0x60 [libceph]
  complete_request+0x2e/0x70 [libceph]
  __submit_request+0x256/0x330 [libceph]
  submit_request+0x2b/0x30 [libceph]
  ceph_osdc_start_request+0x25/0x40 [libceph]
  ceph_writepages_start+0xdfe/0x1320 [ceph]
  do_writepages+0x1f/0x70
  __writeback_single_inode+0x45/0x330
  writeback_sb_inodes+0x26a/0x600
  __writeback_inodes_wb+0x92/0xc0
  wb_writeback+0x274/0x330
  wb_workfn+0x2d5/0x3b0

The fix is make writepages_finish() not drop inode's last reference.

Link: http://tracker.ceph.com/issues/23978
Signed-off-by: "Yan, Zheng" <zyan@xxxxxxxxxx>
---
 fs/ceph/addr.c  | 21 +++++++++++++++++++++
 fs/ceph/inode.c | 12 ++++++++++--
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 5f7ad3d0df2e..9db2f4108951 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -772,6 +772,17 @@ static void writepages_finish(struct ceph_osd_request *req)
 		ceph_release_pages(osd_data->pages, num_pages);
 	}
 
+	if (rc < 0 && total_pages) {
+		/*
+		 * In the case of error, this function may directly get
+		 * called by the thread that does writeback. The writeback
+		 * thread should not drop inode's last reference. Otherwise
+		 * iput_final() may call inode_wait_for_writeback(), which
+		 * waits on writeback.
+		 */
+		ihold(inode);
+	}
+
 	ceph_put_wrbuffer_cap_refs(ci, total_pages, snapc);
 
 	osd_data = osd_req_op_extent_osd_data(req, 0);
@@ -781,6 +792,16 @@ static void writepages_finish(struct ceph_osd_request *req)
 	else
 		kfree(osd_data->pages);
 	ceph_osdc_put_request(req);
+
+	if (rc < 0 && total_pages) {
+		for (;;) {
+			if (atomic_add_unless(&inode->i_count, -1, 1))
+				break;
+			/* let writeback work drop the last reference */
+			if (queue_work(fsc->wb_wq, &ci->i_wb_work))
+				break;
+		}
+	}
 }
 
 /*
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index df3875fdfa41..aa7c5a4ff137 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1752,9 +1752,17 @@ static void ceph_writeback_work(struct work_struct *work)
 	struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
 						  i_wb_work);
 	struct inode *inode = &ci->vfs_inode;
+	int wrbuffer_refs;
+
+	spin_lock(&ci->i_ceph_lock);
+	wrbuffer_refs = ci->i_wrbuffer_ref;
+	spin_unlock(&ci->i_ceph_lock);
+
+	if (wrbuffer_refs) {
+		dout("writeback %p\n", inode);
+		filemap_fdatawrite(&inode->i_data);
+	}
 
-	dout("writeback %p\n", inode);
-	filemap_fdatawrite(&inode->i_data);
 	iput(inode);
 }
 
-- 
2.13.6

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux