From: Andy Adamson <andros@xxxxxxxxxx> Use our own async error handler. Mark the layout as failed and retry i/o through the MDS on specified errors. Signed-off-by: Andy Adamson <andros@xxxxxxxxxx> --- fs/nfs/internal.h | 1 + fs/nfs/nfs4filelayout.c | 86 +++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 44 +++++++++++++-------- fs/nfs/nfs4state.c | 1 + fs/nfs/pnfs.h | 1 - include/linux/nfs_xdr.h | 1 + include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 8 ++++ 8 files changed, 125 insertions(+), 18 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5518d61..f69a322 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -281,6 +281,7 @@ extern int nfs_migrate_page(struct address_space *, #endif /* nfs4proc.c */ +extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); extern int _nfs4_call_sync(struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 5fd8ed3..777d78b 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -40,6 +40,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Dean Hildebrand <dhildebz@xxxxxxxxx>"); MODULE_DESCRIPTION("The NFSv4 file layout driver"); +#define FILELAYOUT_POLL_RETRY_MAX (15*HZ) + static int filelayout_set_layoutdriver(struct nfs_server *nfss) { @@ -95,6 +97,88 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) BUG(); } +/* For data server errors we don't recover from */ +static void +filelayout_set_lo_fail(struct pnfs_layout_segment *lseg, fmode_t mode) +{ + if (mode & FMODE_WRITE) { + dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); + set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); + } else if (mode & FMODE_READ) { + dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); + set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); + } +} + +/* + * Async I/O error handler. + * + * NFS4ERR_OLD_STATEID can not occur with a zero stateid seqid. + */ +static int filelayout_async_handle_error(struct rpc_task *task, + struct nfs4_state *state, + struct nfs_client *clp, + int *reset) +{ + if (task->tk_status >= 0) + return 0; + switch (task->tk_status) { + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_SEQ_FALSE_RETRY: + case -NFS4ERR_SEQ_MISORDERED: + dprintk("%s ERROR %d, Reset session. Exchangeid " + "flags 0x%x\n", __func__, task->tk_status, + clp->cl_exchange_flags); + nfs4_schedule_state_recovery(clp); + task->tk_status = 0; + return -EAGAIN; + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + case -EKEYEXPIRED: + rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX); + task->tk_status = 0; + return -EAGAIN; + default: + dprintk("%s DS error %d\n", __func__, task->tk_status); + /* Layout marked as failed by pnfs_check_io_status. + * Retry I/O through the MDS */ + *reset = 1; + task->tk_status = 0; + return -EAGAIN; + } +} + +/* NFS_PROTO call done callback routines */ + +static int filelayout_read_done_cb(struct rpc_task *task, + struct nfs_read_data *data) +{ + struct nfs_client *clp = data->ds_clp; + int reset = 0; + + dprintk("%s DS read\n", __func__); + + if (filelayout_async_handle_error(task, data->args.context->state, + data->ds_clp, &reset) == -EAGAIN) { + dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", + __func__, data->ds_clp, data->ds_clp->cl_session); + if (reset) { + nfs4_reset_read(task, data); + filelayout_set_lo_fail(data->lseg, + data->args.context->state->state); + clp = NFS_SERVER(data->inode)->nfs_client; + } + nfs_restart_rpc(task, clp); + return -EAGAIN; + } + + return 0; +} + /* * Call ops for the async read/write cases * In the case of dense layouts, the offset needs to be reset to its @@ -104,6 +188,8 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) { struct nfs_read_data *rdata = (struct nfs_read_data *)data; + rdata->read_done_cb = filelayout_read_done_cb; + if (nfs41_setup_sequence(rdata->ds_clp->cl_session, &rdata->args.seq_args, &rdata->res.seq_res, 0, task)) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3fcf756..9dee49d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3075,41 +3075,51 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return err; } -static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) +static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) { struct nfs_server *server = NFS_SERVER(data->inode); - struct nfs_client *clp = server->nfs_client; - - dprintk("--> %s\n", __func__); - -#ifdef CONFIG_NFS_V4_1 - /* Is this a DS session */ - if (data->ds_clp) { - dprintk("%s DS read\n", __func__); - clp = data->ds_clp; - } -#endif /* CONFIG_NFS_V4_1 */ - - if (!nfs4_sequence_done(task, &data->res.seq_res)) - return -EAGAIN; if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { - nfs_restart_rpc(task, client); + nfs_restart_rpc(task, server->nfs_client); return -EAGAIN; } nfs_invalidate_atime(data->inode); - if (task->tk_status > 0 && !data->ds_clp) + if (task->tk_status > 0) renew_lease(server, data->timestamp); return 0; } +static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) +{ + + dprintk("--> %s\n", __func__); + + if (!nfs4_sequence_done(task, &data->res.seq_res)) + return -EAGAIN; + + return data->read_done_cb(task, data); +} + static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) { data->timestamp = jiffies; + data->read_done_cb = nfs4_read_done_cb; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; } +/* Reset the the nfs_read_data to send the read to another server. */ +void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data) +{ + dprintk("%s Reset task for i/o through \n", __func__); + data->ds_clp = NULL; + data->args.fh = NFS_FH(data->inode); + data->read_done_cb = nfs4_read_done_cb; + task->tk_ops = data->call_ops; + rpc_task_reset_client(task, NFS_CLIENT(data->inode)); +} +EXPORT_SYMBOL_GPL(nfs4_reset_read); + static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) { struct inode *inode = data->inode; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 49433aa..346fb97 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1022,6 +1022,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp) set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); nfs4_schedule_state_manager(clp); } +EXPORT_SYMBOL_GPL(nfs4_schedule_state_recovery); int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 6a99c33..218cdfe 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -198,7 +198,6 @@ void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier); - static inline int lo_fail_bit(u32 iomode) { return iomode == IOMODE_RW ? diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 1222aa9..c91f468 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1020,6 +1020,7 @@ struct nfs_read_data { struct pnfs_layout_segment *lseg; struct nfs_client *ds_clp; /* pNFS data server */ const struct rpc_call_ops *call_ops; /* For pNFS recovery to MDS */ + int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); __u64 orig_offset; /* Filelayout dense stripe */ struct page *page_array[NFS_PAGEVEC_SIZE]; }; diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ef9476a..db7bcaf 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -129,6 +129,7 @@ struct rpc_create_args { struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, struct rpc_program *, u32); +void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 57d344c..5c4df70 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -597,6 +597,14 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) } } +void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt) +{ + rpc_task_release_client(task); + rpc_task_set_client(task, clnt); +} +EXPORT_SYMBOL_GPL(rpc_task_reset_client); + + static void rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg) { -- 1.6.6 -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html