[PATCH 2/3] pnfs_submit: fix deadlock in pnfs_clear_lseg_list

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Andy Adamson <andros@xxxxxxxxxxxxxxxxxxxxx>

The file layout free_lseg i/o operation called by destroy_lseg under the
inode->i_lock can call nfs_put_client() when a data
server is no longer referenced. nfs_put_client can end up taking the
i_mutex called in rpc_unlink (called from nfs_idmap_delete from
nfs_free_client) which can result in a deadlock.

Use a temporary list to hold layout segments to be freed, and free them outside
the inode->i_lock.

Reported-by: Fred Isaman <iisaman@xxxxxxxxxx>
Signed-off-by: Andy Adamson <andros@xxxxxxxxxx>
---
 fs/nfs/pnfs.c |   53 ++++++++++++++++++++++++++---------------------------
 fs/nfs/pnfs.h |    1 -
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 24620cf..06fcc92 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -275,6 +275,7 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
 	lseg->layout = lo;
 }
 
+/* Called without i_lock held */
 static void
 destroy_lseg(struct kref *kref)
 {
@@ -285,29 +286,10 @@ destroy_lseg(struct kref *kref)
 	dprintk("--> %s\n", __func__);
 	NFS_SERVER(local->inode)->pnfs_curr_ld->free_lseg(lseg);
 	/* Matched by get_layout_hdr_locked in pnfs_insert_layout */
-	put_layout_hdr_locked(local);
+	put_layout_hdr(local->inode);
 }
 
 void
-put_lseg_locked(struct pnfs_layout_segment *lseg)
-{
-	bool do_wake_up;
-	struct nfs_inode *nfsi;
-
-	if (!lseg)
-		return;
-
-	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
-		atomic_read(&lseg->kref.refcount), lseg->valid);
-	do_wake_up = !lseg->valid;
-	nfsi = NFS_I(lseg->layout->inode);
-	kref_put(&lseg->kref, destroy_lseg);
-	if (do_wake_up)
-		wake_up(&nfsi->lo_waitq);
-}
-EXPORT_SYMBOL_GPL(put_lseg_locked);
-
-void
 put_lseg(struct pnfs_layout_segment *lseg)
 {
 	bool do_wake_up;
@@ -320,9 +302,7 @@ put_lseg(struct pnfs_layout_segment *lseg)
 		atomic_read(&lseg->kref.refcount), lseg->valid);
 	do_wake_up = !lseg->valid;
 	nfsi = NFS_I(lseg->layout->inode);
-	spin_lock(&nfsi->vfs_inode.i_lock);
 	kref_put(&lseg->kref, destroy_lseg);
-	spin_unlock(&nfsi->vfs_inode.i_lock);
 	if (do_wake_up)
 		wake_up(&nfsi->lo_waitq);
 }
@@ -354,10 +334,11 @@ _pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
 }
 
 static void
-pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo,
+pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list,
 		     struct pnfs_layout_range *range)
 {
 	struct pnfs_layout_segment *lseg, *next;
+
 	dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
 		__func__, lo, range->offset, range->length, range->iomode);
 
@@ -370,8 +351,7 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo,
 			"offset %llu length %llu\n", __func__,
 			lseg, lseg->range.iomode, lseg->range.offset,
 			lseg->range.length);
-		list_del(&lseg->fi_list);
-		put_lseg_locked(lseg);
+		list_move(&lseg->fi_list, tmp_list);
 	}
 	if (list_empty(&lo->segs)) {
 		struct nfs_client *clp;
@@ -387,6 +367,21 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo,
 	dprintk("%s:Return\n", __func__);
 }
 
+static void
+pnfs_free_lseg_list(struct list_head *tmp_list)
+{
+	struct pnfs_layout_segment *lseg;
+
+	while (!list_empty(tmp_list)) {
+		lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
+				fi_list);
+		dprintk("%s calling put_lseg on %p\n", __func__, lseg);
+		list_del(&lseg->fi_list);
+		put_lseg(lseg);
+	}
+}
+
+
 void
 pnfs_layoutget_release(struct pnfs_layout_hdr *lo)
 {
@@ -403,12 +398,14 @@ pnfs_layoutreturn_release(struct pnfs_layout_hdr *lo,
 			  struct pnfs_layout_range *range)
 {
 	struct nfs_inode *nfsi = NFS_I(lo->inode);
+	LIST_HEAD(tmp_list);
 
 	spin_lock(&nfsi->vfs_inode.i_lock);
 	if (range)
-		pnfs_clear_lseg_list(lo, range);
+		pnfs_clear_lseg_list(lo, &tmp_list, range);
 	put_layout_hdr_locked(lo); /* Matched in _pnfs_return_layout */
 	spin_unlock(&nfsi->vfs_inode.i_lock);
+	pnfs_free_lseg_list(&tmp_list);
 	wake_up_all(&nfsi->lo_waitq);
 }
 
@@ -421,11 +418,12 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
 		.offset = 0,
 		.length = NFS4_MAX_UINT64,
 	};
+	LIST_HEAD(tmp_list);
 
 	spin_lock(&nfsi->vfs_inode.i_lock);
 	lo = nfsi->layout;
 	if (lo) {
-		pnfs_clear_lseg_list(lo, &range);
+		pnfs_clear_lseg_list(lo, &tmp_list,  &range);
 		WARN_ON(!list_empty(&nfsi->layout->segs));
 		WARN_ON(!list_empty(&nfsi->layout->layouts));
 		WARN_ON(nfsi->layout->refcount != 1);
@@ -434,6 +432,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
 		put_layout_hdr_locked(lo);
 	}
 	spin_unlock(&nfsi->vfs_inode.i_lock);
+	pnfs_free_lseg_list(&tmp_list);
 }
 
 /*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 1b1efcd..51f717d 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -177,7 +177,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool wait);
 
 /* pnfs.c */
 void put_lseg(struct pnfs_layout_segment *lseg);
-void put_lseg_locked(struct pnfs_layout_segment *lseg);
 struct pnfs_layout_segment *
 pnfs_has_layout(struct pnfs_layout_hdr *lo, struct pnfs_layout_range *range);
 struct pnfs_layout_segment *
-- 
1.6.6

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux