Sorry if you got two responses - my mailer to netapp is having trouble sending mail.... On Tue, Feb 8, 2011 at 6:06 PM, Fred Isaman <iisaman@xxxxxxxxxx> wrote: > On Fri, Feb 4, 2011 at 4:33 PM, <andros@xxxxxxxxxx> wrote: >> From: Andy Adamson <andros@xxxxxxxxxx> >> >> Use our own async error handler. >> Mark the layout as failed and retry i/o through the MDS on specified errors. >> >> Signed-off-by: Andy Adamson <andros@xxxxxxxxxx> >> --- >> fs/nfs/internal.h | 1 + >> fs/nfs/nfs4filelayout.c | 86 +++++++++++++++++++++++++++++++++++++++++++ >> fs/nfs/nfs4proc.c | 44 +++++++++++++-------- >> fs/nfs/nfs4state.c | 1 + >> fs/nfs/pnfs.h | 1 - >> include/linux/nfs_xdr.h | 1 + >> include/linux/sunrpc/clnt.h | 1 + >> net/sunrpc/clnt.c | 8 ++++ >> 8 files changed, 125 insertions(+), 18 deletions(-) >> >> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h >> index 5518d61..f69a322 100644 >> --- a/fs/nfs/internal.h >> +++ b/fs/nfs/internal.h >> @@ -281,6 +281,7 @@ extern int nfs_migrate_page(struct address_space *, >> #endif >> >> /* nfs4proc.c */ >> +extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); >> extern int _nfs4_call_sync(struct nfs_server *server, >> struct rpc_message *msg, >> struct nfs4_sequence_args *args, >> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c >> index 5fd8ed3..777d78b 100644 >> --- a/fs/nfs/nfs4filelayout.c >> +++ b/fs/nfs/nfs4filelayout.c >> @@ -40,6 +40,8 @@ MODULE_LICENSE("GPL"); >> MODULE_AUTHOR("Dean Hildebrand <dhildebz@xxxxxxxxx>"); >> MODULE_DESCRIPTION("The NFSv4 file layout driver"); >> >> +#define FILELAYOUT_POLL_RETRY_MAX (15*HZ) >> + >> static int >> filelayout_set_layoutdriver(struct nfs_server *nfss) >> { >> @@ -95,6 +97,88 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) >> BUG(); >> } >> >> +/* For data server errors we don't recover from */ >> +static void >> +filelayout_set_lo_fail(struct pnfs_layout_segment *lseg, fmode_t mode) >> +{ >> + if (mode & FMODE_WRITE) { >> + dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); >> + set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); >> + } else if (mode & FMODE_READ) { >> + dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); >> + set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); >> + } >> +} >> + >> +/* >> + * Async I/O error handler. >> + * >> + * NFS4ERR_OLD_STATEID can not occur with a zero stateid seqid. >> + */ >> +static int filelayout_async_handle_error(struct rpc_task *task, >> + struct nfs4_state *state, >> + struct nfs_client *clp, >> + int *reset) >> +{ >> + if (task->tk_status >= 0) >> + return 0; >> + switch (task->tk_status) { >> + case -NFS4ERR_BADSESSION: >> + case -NFS4ERR_BADSLOT: >> + case -NFS4ERR_BAD_HIGH_SLOT: >> + case -NFS4ERR_DEADSESSION: >> + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: >> + case -NFS4ERR_SEQ_FALSE_RETRY: >> + case -NFS4ERR_SEQ_MISORDERED: >> + dprintk("%s ERROR %d, Reset session. Exchangeid " >> + "flags 0x%x\n", __func__, task->tk_status, >> + clp->cl_exchange_flags); >> + nfs4_schedule_state_recovery(clp); >> + task->tk_status = 0; >> + return -EAGAIN; >> + case -NFS4ERR_DELAY: >> + case -NFS4ERR_GRACE: >> + case -EKEYEXPIRED: >> + rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX); >> + task->tk_status = 0; >> + return -EAGAIN; >> + default: >> + dprintk("%s DS error %d\n", __func__, task->tk_status); >> + /* Layout marked as failed by pnfs_check_io_status. >> + * Retry I/O through the MDS */ >> + *reset = 1; >> + task->tk_status = 0; >> + return -EAGAIN; >> + } >> +} >> + >> +/* NFS_PROTO call done callback routines */ >> + >> +static int filelayout_read_done_cb(struct rpc_task *task, >> + struct nfs_read_data *data) >> +{ >> + struct nfs_client *clp = data->ds_clp; >> + int reset = 0; >> + >> + dprintk("%s DS read\n", __func__); >> + >> + if (filelayout_async_handle_error(task, data->args.context->state, >> + data->ds_clp, &reset) == -EAGAIN) { >> + dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", >> + __func__, data->ds_clp, data->ds_clp->cl_session); >> + if (reset) { >> + nfs4_reset_read(task, data); >> + filelayout_set_lo_fail(data->lseg, >> + data->args.context->state->state); > > Why use the open context, instead of just failing read layouts? I pass in the mode of the open, which is also used to determine the iomode of the layout. > Do you > really want to prevent all future write layouts too? Even worse is > the reverse case...if a write layout op fails do you want to prevent > all future read layouts just because the file happened to be open for > read? That is not what this code does. It either fails IOMODE_RW (for FMODE_WRITE) or IOMODE_READ (for FMODE_READ) layouts, never both. > > If you answer is no, then just send a READ/WRITE bit. What we really want is to fail the layout that was used. So, I can simply use the iomode of the data->lseg to determine which mode to fail. -->Andy > > If your answer is yes, then just remove the arg entirely and always > mark both modes as failing. > > Fred > >> + clp = NFS_SERVER(data->inode)->nfs_client; >> + } >> + nfs_restart_rpc(task, clp); >> + return -EAGAIN; >> + } >> + >> + return 0; >> +} >> + >> /* >> * Call ops for the async read/write cases >> * In the case of dense layouts, the offset needs to be reset to its >> @@ -104,6 +188,8 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) >> { >> struct nfs_read_data *rdata = (struct nfs_read_data *)data; >> >> + rdata->read_done_cb = filelayout_read_done_cb; >> + >> if (nfs41_setup_sequence(rdata->ds_clp->cl_session, >> &rdata->args.seq_args, &rdata->res.seq_res, >> 0, task)) >> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c >> index 3fcf756..9dee49d 100644 >> --- a/fs/nfs/nfs4proc.c >> +++ b/fs/nfs/nfs4proc.c >> @@ -3075,41 +3075,51 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, >> return err; >> } >> >> -static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) >> +static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) >> { >> struct nfs_server *server = NFS_SERVER(data->inode); >> - struct nfs_client *clp = server->nfs_client; >> - >> - dprintk("--> %s\n", __func__); >> - >> -#ifdef CONFIG_NFS_V4_1 >> - /* Is this a DS session */ >> - if (data->ds_clp) { >> - dprintk("%s DS read\n", __func__); >> - clp = data->ds_clp; >> - } >> -#endif /* CONFIG_NFS_V4_1 */ >> - >> - if (!nfs4_sequence_done(task, &data->res.seq_res)) >> - return -EAGAIN; >> >> if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { >> - nfs_restart_rpc(task, client); >> + nfs_restart_rpc(task, server->nfs_client); >> return -EAGAIN; >> } >> >> nfs_invalidate_atime(data->inode); >> - if (task->tk_status > 0 && !data->ds_clp) >> + if (task->tk_status > 0) >> renew_lease(server, data->timestamp); >> return 0; >> } >> >> +static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) >> +{ >> + >> + dprintk("--> %s\n", __func__); >> + >> + if (!nfs4_sequence_done(task, &data->res.seq_res)) >> + return -EAGAIN; >> + >> + return data->read_done_cb(task, data); >> +} >> + >> static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) >> { >> data->timestamp = jiffies; >> + data->read_done_cb = nfs4_read_done_cb; >> msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; >> } >> >> +/* Reset the the nfs_read_data to send the read to another server. */ >> +void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data) >> +{ >> + dprintk("%s Reset task for i/o through \n", __func__); >> + data->ds_clp = NULL; >> + data->args.fh = NFS_FH(data->inode); >> + data->read_done_cb = nfs4_read_done_cb; >> + task->tk_ops = data->call_ops; >> + rpc_task_reset_client(task, NFS_CLIENT(data->inode)); >> +} >> +EXPORT_SYMBOL_GPL(nfs4_reset_read); >> + >> static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) >> { >> struct inode *inode = data->inode; >> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c >> index 49433aa..346fb97 100644 >> --- a/fs/nfs/nfs4state.c >> +++ b/fs/nfs/nfs4state.c >> @@ -1022,6 +1022,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp) >> set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); >> nfs4_schedule_state_manager(clp); >> } >> +EXPORT_SYMBOL_GPL(nfs4_schedule_state_recovery); >> >> int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) >> { >> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h >> index 6a99c33..218cdfe 100644 >> --- a/fs/nfs/pnfs.h >> +++ b/fs/nfs/pnfs.h >> @@ -198,7 +198,6 @@ void pnfs_roc_release(struct inode *ino); >> void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); >> bool pnfs_roc_drain(struct inode *ino, u32 *barrier); >> >> - >> static inline int lo_fail_bit(u32 iomode) >> { >> return iomode == IOMODE_RW ? >> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h >> index 1222aa9..c91f468 100644 >> --- a/include/linux/nfs_xdr.h >> +++ b/include/linux/nfs_xdr.h >> @@ -1020,6 +1020,7 @@ struct nfs_read_data { >> struct pnfs_layout_segment *lseg; >> struct nfs_client *ds_clp; /* pNFS data server */ >> const struct rpc_call_ops *call_ops; /* For pNFS recovery to MDS */ >> + int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); >> __u64 orig_offset; /* Filelayout dense stripe */ >> struct page *page_array[NFS_PAGEVEC_SIZE]; >> }; >> diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h >> index ef9476a..db7bcaf 100644 >> --- a/include/linux/sunrpc/clnt.h >> +++ b/include/linux/sunrpc/clnt.h >> @@ -129,6 +129,7 @@ struct rpc_create_args { >> struct rpc_clnt *rpc_create(struct rpc_create_args *args); >> struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, >> struct rpc_program *, u32); >> +void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt); >> struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); >> void rpc_shutdown_client(struct rpc_clnt *); >> void rpc_release_client(struct rpc_clnt *); >> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c >> index 57d344c..5c4df70 100644 >> --- a/net/sunrpc/clnt.c >> +++ b/net/sunrpc/clnt.c >> @@ -597,6 +597,14 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) >> } >> } >> >> +void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt) >> +{ >> + rpc_task_release_client(task); >> + rpc_task_set_client(task, clnt); >> +} >> +EXPORT_SYMBOL_GPL(rpc_task_reset_client); >> + >> + >> static void >> rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg) >> { >> -- >> 1.6.6 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in >> the body of a message to majordomo@xxxxxxxxxxxxxxx >> More majordomo info at http://vger.kernel.org/majordomo-info.html >> > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html