On 2010-11-04 17:22, Fred Isaman wrote: > Instead, have mark_invalid function that marks lseg invalid and > removes the reference that holds it in the list. Now when io is finished, > the lseg will automatically be removed from the list. This is > at the heart of many of the upcoming cb_layoutrecall changes. > > Signed-off-by: Fred Isaman <iisaman@xxxxxxxxxx> > --- > fs/nfs/nfs4xdr.c | 3 +- > fs/nfs/pnfs.c | 145 ++++++++++++++++++++++++++++++++++------------------- > fs/nfs/pnfs.h | 1 + > 3 files changed, 95 insertions(+), 54 deletions(-) > > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c > index 238eeb2..6d9ef2b 100644 > --- a/fs/nfs/nfs4xdr.c > +++ b/fs/nfs/nfs4xdr.c > @@ -1915,8 +1915,7 @@ encode_layoutreturn(struct xdr_stream *xdr, > p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE); > p = xdr_encode_hyper(p, args->range.offset); > p = xdr_encode_hyper(p, args->range.length); > - pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout, > - NULL); > + pnfs_copy_layout_stateid(&stateid, NFS_I(args->inode)->layout); > p = xdr_encode_opaque_fixed(p, &stateid.data, > NFS4_STATEID_SIZE); > p = reserve_space(xdr, 4); > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c > index 3bbe3be..4e5c68b 100644 > --- a/fs/nfs/pnfs.c > +++ b/fs/nfs/pnfs.c > @@ -272,10 +272,42 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) > lseg->layout = lo; > } > > +static void > +_put_lseg_common(struct pnfs_layout_segment *lseg) > +{ > + BUG_ON(lseg->valid == true); > + list_del(&lseg->fi_list); > + if (list_empty(&lseg->layout->segs)) { > + struct nfs_client *clp; > + > + clp = NFS_SERVER(lseg->layout->inode)->nfs_client; > + spin_lock(&clp->cl_lock); > + /* List does not take a reference, so no need for put here */ > + list_del_init(&lseg->layout->layouts); > + spin_unlock(&clp->cl_lock); > + pnfs_invalidate_layout_stateid(lseg->layout); > + } > + rpc_wake_up(&NFS_I(lseg->layout->inode)->lo_rpcwaitq); > +} > + > +/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg > + * could sleep, so must be called outside of the lock. > + */ > +static void > +put_lseg_locked(struct pnfs_layout_segment *lseg, > + struct list_head *tmp_list) > +{ > + dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, > + atomic_read(&lseg->pls_refcount), lseg->valid); > + if (atomic_dec_and_test(&lseg->pls_refcount)) { > + _put_lseg_common(lseg); > + list_add(&lseg->fi_list, tmp_list); > + } > +} > + > void > put_lseg(struct pnfs_layout_segment *lseg) > { > - bool do_wake_up; > struct inode *ino; > > if (!lseg) > @@ -283,15 +315,14 @@ put_lseg(struct pnfs_layout_segment *lseg) > > dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, > atomic_read(&lseg->pls_refcount), lseg->valid); > - do_wake_up = !lseg->valid; > ino = lseg->layout->inode; > - if (atomic_dec_and_test(&lseg->pls_refcount)) { > + if (atomic_dec_and_lock(&lseg->pls_refcount, &ino->i_lock)) { > + _put_lseg_common(lseg); > + spin_unlock(&ino->i_lock); > NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); > /* Matched by get_layout_hdr_locked in pnfs_insert_layout */ > put_layout_hdr(ino); > } > - if (do_wake_up) > - rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq); > } > EXPORT_SYMBOL_GPL(put_lseg); > > @@ -314,10 +345,18 @@ should_free_lseg(struct pnfs_layout_segment *lseg, > lseg->range.iomode == range->iomode); > } > > -static bool > -_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg) > +static void mark_lseg_invalid(struct pnfs_layout_segment *lseg, > + struct list_head *tmp_list) > { > - return atomic_read(&lseg->pls_refcount) == 1; > + assert_spin_locked(&lseg->layout->inode->i_lock); > + if (lseg->valid) { > + lseg->valid = false; > + /* Remove the reference keeping the lseg in the > + * list. It will now be removed when all > + * outstanding io is finished. > + */ > + put_lseg_locked(lseg, tmp_list); > + } > } > > static void > @@ -330,42 +369,31 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, > __func__, lo, range->offset, range->length, range->iomode); > > assert_spin_locked(&lo->inode->i_lock); > - list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) { > - if (!should_free_lseg(lseg, range) || > - !_pnfs_can_return_lseg(lseg)) > - continue; > - dprintk("%s: freeing lseg %p iomode %d " > - "offset %llu length %llu\n", __func__, > - lseg, lseg->range.iomode, lseg->range.offset, > - lseg->range.length); > - list_move(&lseg->fi_list, tmp_list); > - } > - if (list_empty(&lo->segs)) { > - struct nfs_client *clp; > - > - clp = NFS_SERVER(lo->inode)->nfs_client; > - spin_lock(&clp->cl_lock); > - /* List does not take a reference, so no need for put here */ > - list_del_init(&lo->layouts); > - spin_unlock(&clp->cl_lock); > - pnfs_invalidate_layout_stateid(lo); > - } > - > + list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) > + if (should_free_lseg(lseg, range)) { > + dprintk("%s: freeing lseg %p iomode %d " > + "offset %llu length %llu\n", __func__, > + lseg, lseg->range.iomode, lseg->range.offset, > + lseg->range.length); > + mark_lseg_invalid(lseg, tmp_list); > + } > dprintk("%s:Return\n", __func__); > } > > static void > -pnfs_free_lseg_list(struct list_head *tmp_list) > +pnfs_free_lseg_list(struct list_head *free_me) > { > - struct pnfs_layout_segment *lseg; > + struct pnfs_layout_segment *lseg, *tmp; > + struct inode *ino; > > - while (!list_empty(tmp_list)) { > - lseg = list_entry(tmp_list->next, struct pnfs_layout_segment, > - fi_list); > - dprintk("%s calling put_lseg on %p\n", __func__, lseg); > - list_del(&lseg->fi_list); > - put_lseg(lseg); > + list_for_each_entry_safe(lseg, tmp, free_me, fi_list) { > + BUG_ON(atomic_read(&lseg->pls_refcount) != 0); > + ino = lseg->layout->inode; > + NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); > + /* Matched by get_layout_hdr_locked in pnfs_insert_layout */ > + put_layout_hdr(ino); > } > + INIT_LIST_HEAD(free_me); > } > > void > @@ -463,6 +491,17 @@ pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo, > dprintk("<-- %s\n", __func__); > } > > +/* Layoutreturn may use an invalid stateid, just copy what is there */ > +void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo) > +{ > + int seq; > + > + do { > + seq = read_seqbegin(&lo->seqlock); > + memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data)); > + } while (read_seqretry(&lo->seqlock, seq)); > +} > + > void > pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, > struct nfs4_state *open_state) > @@ -546,25 +585,23 @@ has_layout_to_return(struct pnfs_layout_hdr *lo, > return out; > } > > +/* Return true if there is layout based io in progress in the given range. > + * Assumes range has already been marked invalid, and layout marked to > + * prevent any new lseg from being inserted. > + */ > bool > pnfs_return_layout_barrier(struct nfs_inode *nfsi, > struct pnfs_layout_range *range) > { > - struct pnfs_layout_segment *lseg; > + struct pnfs_layout_segment *lseg, *tmp; > bool ret = false; > > spin_lock(&nfsi->vfs_inode.i_lock); > - list_for_each_entry(lseg, &nfsi->layout->segs, fi_list) { > - if (!should_free_lseg(lseg, range)) > - continue; > - lseg->valid = false; > - if (!_pnfs_can_return_lseg(lseg)) { > - dprintk("%s: wait on lseg %p refcount %d\n", > - __func__, lseg, > - atomic_read(&lseg->pls_refcount)); > + list_for_each_entry_safe(lseg, tmp, &nfsi->layout->segs, fi_list) Why do you need the safe version here while the inode is locked? > + if (should_free_lseg(lseg, range)) { > ret = true; But this will always return "true" if there's any lseg to return, not only if (!_pnfs_can_return_lseg(lseg)). What am I missing? :) Benny > + break; > } > - } > spin_unlock(&nfsi->vfs_inode.i_lock); > dprintk("%s:Return %d\n", __func__, ret); > return ret; > @@ -574,12 +611,10 @@ void > pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp) > { > struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; > - LIST_HEAD(tmp_list); > > if (lrp->args.return_type != RETURN_FILE) > return; > spin_lock(&lrp->args.inode->i_lock); > - pnfs_clear_lseg_list(lo, &tmp_list, &lrp->args.range); > if (!lrp->res.valid) > ; /* forgetful model internal release */ > else if (!lrp->res.lrs_present) > @@ -588,7 +623,6 @@ pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp) > pnfs_set_layout_stateid(lo, &lrp->res.stateid); > put_layout_hdr_locked(lo); /* Matched in _pnfs_return_layout */ > spin_unlock(&lrp->args.inode->i_lock); > - pnfs_free_lseg_list(&tmp_list); > } > > static int > @@ -641,7 +675,11 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range, > arg.offset = 0; > arg.length = NFS4_MAX_UINT64; > > + /* probably should BUGON if type != RETURN_FILE */ > if (type == RETURN_FILE) { > + LIST_HEAD(tmp_list); > + struct pnfs_layout_segment *lseg, *tmp; > + > spin_lock(&ino->i_lock); > lo = nfsi->layout; > if (lo && !has_layout_to_return(lo, &arg)) > @@ -652,10 +690,13 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range, > goto out; > } > > + list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list) > + if (should_free_lseg(lseg, &arg)) > + mark_lseg_invalid(lseg, &tmp_list); > /* Reference matched in pnfs_layoutreturn_release */ > get_layout_hdr_locked(lo); > - > spin_unlock(&ino->i_lock); > + pnfs_free_lseg_list(&tmp_list); > > if (layoutcommit_needed(nfsi)) { > if (stateid && !wait) { /* callback */ > @@ -1171,7 +1212,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync) > nfsi->layout->write_end_pos = 0; > nfsi->layout->cred = NULL; > __clear_bit(NFS_LAYOUT_NEED_LCOMMIT, &nfsi->layout->state); > - pnfs_get_layout_stateid(&data->args.stateid, nfsi->layout, NULL); > + pnfs_copy_layout_stateid(&data->args.stateid, nfsi->layout); > > /* Reference for layoutcommit matched in pnfs_layoutcommit_release */ > get_layout_hdr_locked(NFS_I(inode)->layout); > diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h > index 05dd5e0..000acf0 100644 > --- a/fs/nfs/pnfs.h > +++ b/fs/nfs/pnfs.h > @@ -206,6 +206,7 @@ void pnfs_layoutreturn_release(struct nfs4_layoutreturn *lpr); > void pnfs_destroy_layout(struct nfs_inode *); > void pnfs_destroy_all_layouts(struct nfs_client *); > void put_layout_hdr(struct inode *inode); > +void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo); > void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, > struct nfs4_state *open_state); > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html