From: The pNFS Team <linux-nfs@xxxxxxxxxxxxxxx> Signed-off-by: Andy Adamson <andros@xxxxxxxxxx> --- fs/nfs/inode.c | 1 + fs/nfs/pnfs.c | 377 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/pnfs.h | 34 +++++ include/linux/nfs_fs.h | 1 + 4 files changed, 413 insertions(+), 0 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ce91e8f..5e355de 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1445,6 +1445,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) init_rwsem(&nfsi->rwsem); #ifdef CONFIG_NFS_V4_1 init_waitqueue_head(&nfsi->lo_waitq); + nfsi->pnfs_layout_suspend = 0; nfsi->layout = NULL; #endif /* CONFIG_NFS_V4_1 */ #endif diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 36a3056..0f98261 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -259,6 +259,18 @@ void pnfs_layout_release(struct pnfs_layout_type *lo, struct nfs4_pnfs_layout_segment *range) { + struct nfs_inode *nfsi = PNFS_NFS_INODE(lo); + + spin_lock(&nfsi->vfs_inode.i_lock); + if (range) + pnfs_free_layout(lo, range); + /* + * Matched in _pnfs_update_layout for layoutget + * and by get_layout in _pnfs_return_layout for layoutreturn + */ + put_layout_locked(lo); + spin_unlock(&nfsi->vfs_inode.i_lock); + wake_up_all(&nfsi->lo_waitq); } void @@ -412,6 +424,61 @@ pnfs_layout_from_open_stateid(struct pnfs_layout_type *lo, } /* +* Get layout from server. +* for now, assume that whole file layouts are requested. +* arg->offset: 0 +* arg->length: all ones +*/ +static int +send_layoutget(struct inode *ino, + struct nfs_open_context *ctx, + struct nfs4_pnfs_layout_segment *range, + struct pnfs_layout_segment **lsegpp, + struct pnfs_layout_type *lo) +{ + int status; + struct nfs_server *server = NFS_SERVER(ino); + struct nfs4_pnfs_layoutget *lgp; + + dprintk("--> %s\n", __func__); + + lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); + if (lgp == NULL) { + pnfs_layout_release(lo, NULL); + return -ENOMEM; + } + lgp->args.minlength = NFS4_MAX_UINT64; + lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; + lgp->args.lseg.iomode = range->iomode; + lgp->args.lseg.offset = 0; + lgp->args.lseg.length = NFS4_MAX_UINT64; + lgp->args.type = server->pnfs_curr_ld->id; + lgp->args.inode = ino; + lgp->lsegpp = lsegpp; + + if (!memcmp(lo->stateid.u.data, &zero_stateid, NFS4_STATEID_SIZE)) { + struct nfs_open_context *oldctx = ctx; + + if (!oldctx) { + ctx = nfs_find_open_context(ino, NULL, + (range->iomode == IOMODE_READ) ? + FMODE_READ: FMODE_WRITE); + BUG_ON(!ctx); + } + /* Set the layout stateid from the open stateid */ + pnfs_layout_from_open_stateid(NFS_I(ino)->layout, ctx->state); + if (!oldctx) + put_nfs_open_context(ctx); + } + + /* Retrieve layout information from server */ + status = pnfs4_proc_layoutget(lgp); + + dprintk("<-- %s status %d\n", __func__, status); + return status; +} + +/* * iomode matching rules: * range lseg match * ----- ----- ----- @@ -471,6 +538,62 @@ pnfs_free_layout(struct pnfs_layout_type *lo, } /* + * cmp two layout segments for sorting into layout cache + */ +static inline s64 +cmp_layout(struct nfs4_pnfs_layout_segment *l1, + struct nfs4_pnfs_layout_segment *l2) +{ + /* read > read/write */ + return (int)(l1->iomode == IOMODE_READ) - + (int)(l2->iomode == IOMODE_READ); +} + +static void +pnfs_insert_layout(struct pnfs_layout_type *lo, + struct pnfs_layout_segment *lseg) +{ + struct pnfs_layout_segment *lp; + int found = 0; + + dprintk("%s:Begin\n", __func__); + + BUG_ON_UNLOCKED_LO(lo); + if (list_empty(&lo->segs)) { + struct nfs_client *clp = PNFS_NFS_SERVER(lo)->nfs_client; + + spin_lock(&clp->cl_lock); + BUG_ON(!list_empty(&lo->lo_layouts)); + list_add_tail(&lo->lo_layouts, &clp->cl_layouts); + spin_unlock(&clp->cl_lock); + } + list_for_each_entry (lp, &lo->segs, fi_list) { + if (cmp_layout(&lp->range, &lseg->range) > 0) + continue; + list_add_tail(&lseg->fi_list, &lp->fi_list); + dprintk("%s: inserted lseg %p " + "iomode %d offset %llu length %llu before " + "lp %p iomode %d offset %llu length %llu\n", + __func__, lseg, lseg->range.iomode, + lseg->range.offset, lseg->range.length, + lp, lp->range.iomode, lp->range.offset, + lp->range.length); + found = 1; + break; + } + if (!found) { + list_add_tail(&lseg->fi_list, &lo->segs); + dprintk("%s: inserted lseg %p " + "iomode %d offset %llu length %llu at tail\n", + __func__, lseg, lseg->range.iomode, + lseg->range.offset, lseg->range.length); + } + get_layout(lo); + + dprintk("%s:Return\n", __func__); +} + +/* * Each layoutdriver embeds pnfs_layout_type as the first field in it's * per-layout type layout cache structure and returns it ZEROed * from layoutdriver_io_ops->alloc_layout @@ -531,16 +654,270 @@ pnfs_alloc_layout(struct inode *ino) return nfsi->layout; } +/* + * iomode matching rules: + * range lseg match + * ----- ----- ----- + * ANY READ true + * ANY RW true + * RW READ false + * RW RW true + * READ READ true + * READ RW true + */ +static inline int +has_matching_lseg(struct pnfs_layout_segment *lseg, + struct nfs4_pnfs_layout_segment *range) +{ + return (range->iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW); +} + +/* + * lookup range in layout + */ +static struct pnfs_layout_segment * +pnfs_has_layout(struct pnfs_layout_type *lo, + struct nfs4_pnfs_layout_segment *range) +{ + struct pnfs_layout_segment *lseg, *ret = NULL; + + dprintk("%s:Begin\n", __func__); + + BUG_ON_UNLOCKED_LO(lo); + list_for_each_entry (lseg, &lo->segs, fi_list) { + if (has_matching_lseg(lseg, range)) { + ret = lseg; + get_lseg(ret); + break; + } + if (cmp_layout(range, &lseg->range) > 0) + break; + } + + dprintk("%s:Return lseg %p ref %d valid %d\n", + __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0, + ret ? ret->valid : 0); + return ret; +} + +/* Update the file's layout for the given range and iomode. + * Layout is retreived from the server if needed. + * The appropriate layout segment is referenced and returned to the caller. + */ +void +_pnfs_update_layout(struct inode *ino, + struct nfs_open_context *ctx, + enum pnfs_iomode iomode, + struct pnfs_layout_segment **lsegpp) +{ + struct nfs4_pnfs_layout_segment arg = { + .iomode = iomode, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; + struct nfs_inode *nfsi = NFS_I(ino); + struct pnfs_layout_type *lo; + struct pnfs_layout_segment *lseg = NULL; + + *lsegpp = NULL; + spin_lock(&ino->i_lock); + lo = pnfs_alloc_layout(ino); + if (lo == NULL) { + dprintk("%s ERROR: can't get pnfs_layout_type\n", __func__); + goto out_unlock; + } + + /* Check to see if the layout for the given range already exists */ + lseg = pnfs_has_layout(lo, &arg); + if (lseg && !lseg->valid) { + put_lseg_locked(lseg); + /* someone is cleaning the layout */ + lseg = NULL; + goto out_unlock; + } + + if (lseg) { + dprintk("%s: Using cached lseg %p for %llu@%llu iomode %d)\n", + __func__, + lseg, + arg.length, + arg.offset, + arg.iomode); + + goto out_unlock; + } + + /* if get layout already failed once goto out */ + if (test_bit(lo_fail_bit(iomode), &nfsi->layout->pnfs_layout_state)) { + if (unlikely(nfsi->pnfs_layout_suspend && + get_seconds() >= nfsi->pnfs_layout_suspend)) { + dprintk("%s: layout_get resumed\n", __func__); + clear_bit(lo_fail_bit(iomode), + &nfsi->layout->pnfs_layout_state); + nfsi->pnfs_layout_suspend = 0; + } else + goto out_unlock; + } + + /* Reference the layout for layoutget matched in pnfs_layout_release */ + get_layout(lo); + spin_unlock(&ino->i_lock); + + send_layoutget(ino, ctx, &arg, lsegpp, lo); +out: + dprintk("%s end, state 0x%lx lseg %p\n", __func__, + nfsi->layout->pnfs_layout_state, lseg); + return; +out_unlock: + *lsegpp = lseg; + spin_unlock(&ino->i_lock); + goto out; +} + void pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_status) { + struct pnfs_layout_segment *lseg = NULL; + struct nfs_inode *nfsi = NFS_I(lgp->args.inode); + time_t suspend = 0; + + dprintk("-->%s\n", __func__); + + lgp->status = rpc_status; + if (likely(!rpc_status)) { + if (unlikely(lgp->res.layout.len < 0)) { + printk(KERN_ERR + "%s: ERROR Returned layout size is ZERO\n", __func__); + lgp->status = -EIO; + } + goto out; + } + + dprintk("%s: ERROR retrieving layout %d\n", __func__, rpc_status); + switch (rpc_status) { + case -NFS4ERR_BADLAYOUT: + lgp->status = -ENOENT; + /* FALLTHROUGH */ + case -EACCES: /* NFS4ERR_ACCESS */ + /* transient error, don't mark with NFS_INO_LAYOUT_FAILED */ + goto out; + + case -NFS4ERR_LAYOUTTRYLATER: + case -NFS4ERR_RECALLCONFLICT: + case -NFS4ERR_OLD_STATEID: + case -EAGAIN: /* NFS4ERR_LOCKED */ + lgp->status = -NFS4ERR_DELAY; /* for nfs4_handle_exception */ + /* FALLTHROUGH */ + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + goto out; + + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + /* The layout is expected to be returned at this point. + * This should clear the layout stateid as well */ + suspend = get_seconds() + 1; + break; + + case -NFS4ERR_LAYOUTUNAVAILABLE: + lgp->status = -ENOTSUPP; + break; + + case -NFS4ERR_REP_TOO_BIG: + case -NFS4ERR_REP_TOO_BIG_TO_CACHE: + lgp->status = -E2BIG; + break; + + /* Leave the following errors untranslated */ + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_DQUOT: + case -EINVAL: /* NFS4ERR_INVAL */ + case -EIO: /* NFS4ERR_IO */ + case -NFS4ERR_FHEXPIRED: + case -NFS4ERR_MOVED: + case -NFS4ERR_NOSPC: + case -ESERVERFAULT: /* NFS4ERR_SERVERFAULT */ + case -ESTALE: /* NFS4ERR_STALE */ + case -ETOOSMALL: /* NFS4ERR_TOOSMALL */ + break; + + /* The following errors are our fault and should never happen */ + case -NFS4ERR_BADIOMODE: + case -NFS4ERR_BADXDR: + case -NFS4ERR_REQ_TOO_BIG: + case -NFS4ERR_UNKNOWN_LAYOUTTYPE: + case -NFS4ERR_WRONG_TYPE: + lgp->status = -EINVAL; + /* FALLTHROUGH */ + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_NOFILEHANDLE: + case -ENOTSUPP: /* NFS4ERR_NOTSUPP */ + case -NFS4ERR_OPENMODE: + case -NFS4ERR_OP_NOT_IN_SESSION: + case -NFS4ERR_TOO_MANY_OPS: + dprintk("%s: error %d: should never happen\n", __func__, + rpc_status); + break; + + /* The following errors are the server's fault */ + default: + dprintk("%s: illegal error %d\n", __func__, rpc_status); + lgp->status = -EIO; + break; + } + + /* remember that get layout failed and suspend trying */ + nfsi->pnfs_layout_suspend = suspend; + set_bit(lo_fail_bit(lgp->args.lseg.iomode), + &nfsi->layout->pnfs_layout_state); + dprintk("%s: layout_get suspended until %ld\n", + __func__, suspend); +out: + dprintk("%s end (err:%d) state 0x%lx lseg %p\n", + __func__, lgp->status, nfsi->layout->pnfs_layout_state, lseg); + return; } int pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp) { + struct pnfs_layout_type *lo = NFS_I(lgp->args.inode)->layout; + struct nfs4_pnfs_layoutget_res *res = &lgp->res; + struct pnfs_layout_segment *lseg; + struct inode *ino = PNFS_INODE(lo); int status = 0; + /* Inject layout blob into I/O device driver */ + lseg = PNFS_LD_IO_OPS(lo)->alloc_lseg(lo, res); + if (!lseg || IS_ERR(lseg)) { + if (!lseg) + status = -ENOMEM; + else + status = PTR_ERR(lseg); + dprintk("%s: Could not allocate layout: error %d\n", + __func__, status); + goto out; + } + + spin_lock(&ino->i_lock); + init_lseg(lo, lseg); + lseg->range = res->lseg; + if (lgp->lsegpp) { + get_lseg(lseg); + *lgp->lsegpp = lseg; + } + pnfs_insert_layout(lo, lseg); + + if (res->return_on_close) { + lo->roc_iomode |= res->lseg.iomode; + if (!lo->roc_iomode) + lo->roc_iomode = IOMODE_ANY; + } + + /* Done processing layoutget. Set the layout stateid */ + pnfs_set_layout_stateid(lo, &res->stateid); + spin_unlock(&ino->i_lock); +out: return status; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 8c1d50e..379aa18 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -28,6 +28,10 @@ extern int pnfs4_proc_layoutget(struct nfs4_pnfs_layoutget *lgp); /* pnfs.c */ void put_lseg(struct pnfs_layout_segment *lseg); +void _pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, + enum pnfs_iomode access_type, + struct pnfs_layout_segment **lsegpp); + void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unmount_pnfs_layoutdriver(struct nfs_server *); int pnfs_initialize(void); @@ -47,6 +51,12 @@ void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_type *lo); #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" +static inline int lo_fail_bit(u32 iomode) +{ + return iomode == IOMODE_RW ? + NFS_INO_RW_LAYOUT_FAILED : NFS_INO_RO_LAYOUT_FAILED; +} + static inline void get_lseg(struct pnfs_layout_segment *lseg) { kref_get(&lseg->kref); @@ -58,6 +68,21 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss) return nfss->pnfs_curr_ld != NULL; } +static inline void pnfs_update_layout(struct inode *ino, + struct nfs_open_context *ctx, + enum pnfs_iomode access_type, + struct pnfs_layout_segment **lsegpp) +{ + struct nfs_server *nfss = NFS_SERVER(ino); + + if (pnfs_enabled_sb(nfss)) + _pnfs_update_layout(ino, ctx, access_type, lsegpp); + else { + if (lsegpp) + *lsegpp = NULL; + } +} + #else /* CONFIG_NFS_V4_1 */ static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) @@ -76,6 +101,15 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) { } +static inline void +pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, + enum pnfs_iomode access_type, + struct pnfs_layout_segment **lsegpp) +{ + if (lsegpp) + *lsegpp = NULL; +} + #endif /* CONFIG_NFS_V4_1 */ #endif /* FS_NFS_PNFS_H */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c8b6129..7202c05 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -213,6 +213,7 @@ struct nfs_inode { #if defined(CONFIG_NFS_V4_1) wait_queue_head_t lo_waitq; struct pnfs_layout_type *layout; + time_t pnfs_layout_suspend; #endif /* CONFIG_NFS_V4_1 */ #endif /* CONFIG_NFS_V4*/ #ifdef CONFIG_NFS_FSCACHE -- 1.6.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html