On Fri, Feb 6, 2015 at 12:45 PM, Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> wrote: > pnfs_layoutreturn_free_lseg_async() can also race with inode put in > the general case. We can now fix this, and also simplify the code. > > Cc: Peng Tao <tao.peng@xxxxxxxxxxxxxxx> > Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> > --- > fs/nfs/pnfs.c | 53 +++++++++++++++++++---------------------------------- > 1 file changed, 19 insertions(+), 34 deletions(-) > > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c > index a1d8620e8cb7..107b321be7d4 100644 > --- a/fs/nfs/pnfs.c > +++ b/fs/nfs/pnfs.c > @@ -361,14 +361,9 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo, > return true; > } > > -static void pnfs_layoutreturn_free_lseg(struct work_struct *work) > +static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, > + struct pnfs_layout_hdr *lo, struct inode *inode) > { > - struct pnfs_layout_segment *lseg; > - struct pnfs_layout_hdr *lo; > - struct inode *inode; > - > - lseg = container_of(work, struct pnfs_layout_segment, pls_work); > - WARN_ON(atomic_read(&lseg->pls_refcount)); > lo = lseg->pls_layout; > inode = lo->plh_inode; > > @@ -383,24 +378,12 @@ static void pnfs_layoutreturn_free_lseg(struct work_struct *work) > lo->plh_block_lgets++; > lo->plh_return_iomode = 0; > spin_unlock(&inode->i_lock); > + pnfs_get_layout_hdr(lo); > > - pnfs_send_layoutreturn(lo, stateid, iomode, true); > - spin_lock(&inode->i_lock); > + /* Send an async layoutreturn so we dont deadlock */ > + pnfs_send_layoutreturn(lo, stateid, iomode, false); > } else > - /* match pnfs_get_layout_hdr #2 in pnfs_put_lseg */ > - pnfs_put_layout_hdr(lo); > - pnfs_layout_remove_lseg(lo, lseg); > - spin_unlock(&inode->i_lock); > - pnfs_free_lseg(lseg); > - /* match pnfs_get_layout_hdr #1 in pnfs_put_lseg */ > - pnfs_put_layout_hdr(lo); > -} > - > -static void > -pnfs_layoutreturn_free_lseg_async(struct pnfs_layout_segment *lseg) > -{ > - INIT_WORK(&lseg->pls_work, pnfs_layoutreturn_free_lseg); > - queue_work(nfsiod_workqueue, &lseg->pls_work); > + spin_unlock(&inode->i_lock); > } > > void > @@ -415,21 +398,23 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) > dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, > atomic_read(&lseg->pls_refcount), > test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); > + > + /* Handle the case where refcount != 1 */ > + if (atomic_add_unless(&lseg->pls_refcount, -1, 1)) > + return; > + > lo = lseg->pls_layout; > inode = lo->plh_inode; > + /* Do we need a layoutreturn? */ > + if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) pnfs_layout_need_return() iterates through all layout segments to make sure that if we have other segments that need to be returned, we do not send layoutreturn for current lseg so that if they happen to overlap, we don't return a layout while still holding one captive. I guess the right thing to do is to move pnfs_layout_need_return() and pnfs_layoutreturn_before_put_lseg() under atomic_dec_and_lock. Cheers, Tao > + pnfs_layoutreturn_before_put_lseg(lseg, lo, inode); > + > if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { > pnfs_get_layout_hdr(lo); > - if (pnfs_layout_need_return(lo, lseg)) { > - spin_unlock(&inode->i_lock); > - /* hdr reference dropped in nfs4_layoutreturn_release */ > - pnfs_get_layout_hdr(lo); > - pnfs_layoutreturn_free_lseg_async(lseg); > - } else { > - pnfs_layout_remove_lseg(lo, lseg); > - spin_unlock(&inode->i_lock); > - pnfs_free_lseg(lseg); > - pnfs_put_layout_hdr(lo); > - } > + pnfs_layout_remove_lseg(lo, lseg); > + spin_unlock(&inode->i_lock); > + pnfs_free_lseg(lseg); > + pnfs_put_layout_hdr(lo); > } > } > EXPORT_SYMBOL_GPL(pnfs_put_lseg); > -- > 2.1.0 > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html