On Mar 21, 2012, at 4:39 PM, Myklebust, Trond wrote: > On Wed, 2012-03-21 at 15:46 -0400, andros@xxxxxxxxxx wrote: >> From: Andy Adamson <andros@xxxxxxxxxx> >> >> This prevents the use of any layout for i/o that references the deviceid. >> I/O is redirected through the MDS. >> >> Redirect the unhandled failed I/O to the MDS without marking either the >> layout or the deviceid invalid. >> >> Signed-off-by: Andy Adamson <andros@xxxxxxxxxx> >> --- >> fs/nfs/nfs4filelayout.c | 65 ++++++++++++++++++++++++++++++++++------------ >> fs/nfs/nfs4filelayout.h | 6 ++++ >> 2 files changed, 54 insertions(+), 17 deletions(-) >> >> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c >> index 3802937..1f1be26 100644 >> --- a/fs/nfs/nfs4filelayout.c >> +++ b/fs/nfs/nfs4filelayout.c >> @@ -116,7 +116,7 @@ void filelayout_reset_read(struct rpc_task *task, struct nfs_read_data *data) >> static int filelayout_async_handle_error(struct rpc_task *task, >> struct nfs4_state *state, >> struct nfs_client *clp, >> - int *reset) >> + unsigned long *reset) >> { >> struct nfs_server *mds_server = NFS_SERVER(state->inode); >> struct nfs_client *mds_client = mds_server->nfs_client; >> @@ -158,10 +158,23 @@ static int filelayout_async_handle_error(struct rpc_task *task, >> break; >> case -NFS4ERR_RETRY_UNCACHED_REP: >> break; >> + /* RPC connection errors */ >> + case -ECONNREFUSED: >> + case -EHOSTDOWN: >> + case -EHOSTUNREACH: >> + case -ENETUNREACH: >> + case -EIO: >> + case -ETIMEDOUT: >> + case -EPIPE: >> + dprintk("%s DS connection error. Retry through MDS %d\n", >> + __func__, task->tk_status); >> + set_bit(NFS4_RESET_DEVICEID, reset); >> + set_bit(NFS4_RESET_TO_MDS, reset); >> + break; >> default: >> - dprintk("%s DS error. Retry through MDS %d\n", __func__, >> - task->tk_status); >> - *reset = 1; >> + dprintk("%s Unhandled DS error. Retry through MDS %d\n", >> + __func__, task->tk_status); >> + set_bit(NFS4_RESET_TO_MDS, reset); >> break; >> } >> out: >> @@ -179,16 +192,22 @@ wait_on_recovery: >> static int filelayout_read_done_cb(struct rpc_task *task, >> struct nfs_read_data *data) >> { >> - int reset = 0; >> + struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(data->lseg); >> + unsigned long reset = 0; >> >> dprintk("%s DS read\n", __func__); >> >> if (filelayout_async_handle_error(task, data->args.context->state, >> data->ds_clp, &reset) == -EAGAIN) { >> - dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", >> - __func__, data->ds_clp, data->ds_clp->cl_session); >> - if (reset) >> + >> + dprintk("%s reset 0x%lx ds_clp %p session %p\n", __func__, >> + reset, data->ds_clp, data->ds_clp->cl_session); >> + >> + if (test_bit(NFS4_RESET_TO_MDS, &reset)) { >> filelayout_reset_read(task, data); >> + if (test_bit(NFS4_RESET_DEVICEID, &reset)) >> + filelayout_mark_devid_invalid(devid); > > Is there any reason why we shouldn't just do the > filelayout_mark_devid_invalid() within filelayout_async_handle_error() > instead of having the caller do it? We would have to pass in the lseg argument. > > That should also enable us to get rid of the whole 'reset' argument and > replace it with a return value != 0 && != -EAGAIN. We would need to pass in the operation because READ/WRITE/COMMIT call different reset functions. I chose the 'reset' argument method instead of passing in the operation because I thought it cleaner to keep the per operation logic in the per operation rpc functions instead of having a switch(operation) statement in the async handler for each group of errors. > >> + } >> rpc_restart_call_prepare(task); > > This can probably also be done inside filelayout_async_handle_error(), > BTW. COMMIT does not call rpc_restart_call_prepare on reset or invalid deviceid errors because we want the release function to fail with a verifier mismatch. So it's up to you: I could move all the per operation logic into the async handler by passing in the operation and using it to dereferencing the tk_callback to get the lseg and other needed parameters - then moving the code from the done_cb routines into the async handler under a switch(operation) for both the default reset to mds and for the invalid deviced. -->Andy > >> return -EAGAIN; >> } >> @@ -260,14 +279,20 @@ static void filelayout_read_release(void *data) >> static int filelayout_write_done_cb(struct rpc_task *task, >> struct nfs_write_data *data) >> { >> - int reset = 0; >> + struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(data->lseg); >> + unsigned long reset = 0; >> >> if (filelayout_async_handle_error(task, data->args.context->state, >> data->ds_clp, &reset) == -EAGAIN) { >> - dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", >> - __func__, data->ds_clp, data->ds_clp->cl_session); >> - if (reset) >> + >> + dprintk("%s reset 0x%lx ds_clp %p session %p\n", __func__, >> + reset, data->ds_clp, data->ds_clp->cl_session); >> + >> + if (test_bit(NFS4_RESET_TO_MDS, &reset)) { >> filelayout_reset_write(task, data); >> + if (test_bit(NFS4_RESET_DEVICEID, &reset)) >> + filelayout_mark_devid_invalid(devid); >> + } >> rpc_restart_call_prepare(task); >> return -EAGAIN; >> } >> @@ -290,16 +315,22 @@ static void prepare_to_resend_writes(struct nfs_write_data *data) >> static int filelayout_commit_done_cb(struct rpc_task *task, >> struct nfs_write_data *data) >> { >> - int reset = 0; >> + struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(data->lseg); >> + unsigned long reset = 0; >> >> if (filelayout_async_handle_error(task, data->args.context->state, >> data->ds_clp, &reset) == -EAGAIN) { >> - dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", >> - __func__, data->ds_clp, data->ds_clp->cl_session); >> - if (reset) >> + >> + dprintk("%s reset 0x%lx ds_clp %p session %p\n", __func__, >> + reset, data->ds_clp, data->ds_clp->cl_session); >> + >> + if (test_bit(NFS4_RESET_TO_MDS, &reset)) { >> prepare_to_resend_writes(data); >> - else >> + if (test_bit(NFS4_RESET_DEVICEID, &reset)) >> + filelayout_mark_devid_invalid(devid); >> + } else { >> rpc_restart_call_prepare(task); >> + } >> return -EAGAIN; >> } >> >> diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h >> index b54b389..08b667a 100644 >> --- a/fs/nfs/nfs4filelayout.h >> +++ b/fs/nfs/nfs4filelayout.h >> @@ -41,6 +41,12 @@ >> #define NFS4_PNFS_MAX_STRIPE_CNT 4096 >> #define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ >> >> +/* internal use */ >> +enum nfs4_fl_reset_state { >> + NFS4_RESET_TO_MDS = 0, >> + NFS4_RESET_DEVICEID, >> +}; >> + >> enum stripetype4 { >> STRIPE_SPARSE = 1, >> STRIPE_DENSE = 2 > > -- > Trond Myklebust > Linux NFS client maintainer > > NetApp > Trond.Myklebust@xxxxxxxxxx > www.netapp.com > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html