On Wed, 2012-03-21 at 15:46 -0400, andros@xxxxxxxxxx wrote: > From: Andy Adamson <andros@xxxxxxxxxx> > > This prevents the use of any layout for i/o that references the deviceid. > I/O is redirected through the MDS. > > Redirect the unhandled failed I/O to the MDS without marking either the > layout or the deviceid invalid. > > Signed-off-by: Andy Adamson <andros@xxxxxxxxxx> > --- > fs/nfs/nfs4filelayout.c | 65 ++++++++++++++++++++++++++++++++++------------ > fs/nfs/nfs4filelayout.h | 6 ++++ > 2 files changed, 54 insertions(+), 17 deletions(-) > > diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c > index 3802937..1f1be26 100644 > --- a/fs/nfs/nfs4filelayout.c > +++ b/fs/nfs/nfs4filelayout.c > @@ -116,7 +116,7 @@ void filelayout_reset_read(struct rpc_task *task, struct nfs_read_data *data) > static int filelayout_async_handle_error(struct rpc_task *task, > struct nfs4_state *state, > struct nfs_client *clp, > - int *reset) > + unsigned long *reset) > { > struct nfs_server *mds_server = NFS_SERVER(state->inode); > struct nfs_client *mds_client = mds_server->nfs_client; > @@ -158,10 +158,23 @@ static int filelayout_async_handle_error(struct rpc_task *task, > break; > case -NFS4ERR_RETRY_UNCACHED_REP: > break; > + /* RPC connection errors */ > + case -ECONNREFUSED: > + case -EHOSTDOWN: > + case -EHOSTUNREACH: > + case -ENETUNREACH: > + case -EIO: > + case -ETIMEDOUT: > + case -EPIPE: > + dprintk("%s DS connection error. Retry through MDS %d\n", > + __func__, task->tk_status); > + set_bit(NFS4_RESET_DEVICEID, reset); > + set_bit(NFS4_RESET_TO_MDS, reset); > + break; > default: > - dprintk("%s DS error. Retry through MDS %d\n", __func__, > - task->tk_status); > - *reset = 1; > + dprintk("%s Unhandled DS error. Retry through MDS %d\n", > + __func__, task->tk_status); > + set_bit(NFS4_RESET_TO_MDS, reset); > break; > } > out: > @@ -179,16 +192,22 @@ wait_on_recovery: > static int filelayout_read_done_cb(struct rpc_task *task, > struct nfs_read_data *data) > { > - int reset = 0; > + struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(data->lseg); > + unsigned long reset = 0; > > dprintk("%s DS read\n", __func__); > > if (filelayout_async_handle_error(task, data->args.context->state, > data->ds_clp, &reset) == -EAGAIN) { > - dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", > - __func__, data->ds_clp, data->ds_clp->cl_session); > - if (reset) > + > + dprintk("%s reset 0x%lx ds_clp %p session %p\n", __func__, > + reset, data->ds_clp, data->ds_clp->cl_session); > + > + if (test_bit(NFS4_RESET_TO_MDS, &reset)) { > filelayout_reset_read(task, data); > + if (test_bit(NFS4_RESET_DEVICEID, &reset)) > + filelayout_mark_devid_invalid(devid); Is there any reason why we shouldn't just do the filelayout_mark_devid_invalid() within filelayout_async_handle_error() instead of having the caller do it? That should also enable us to get rid of the whole 'reset' argument and replace it with a return value != 0 && != -EAGAIN. > + } > rpc_restart_call_prepare(task); This can probably also be done inside filelayout_async_handle_error(), BTW. > return -EAGAIN; > } > @@ -260,14 +279,20 @@ static void filelayout_read_release(void *data) > static int filelayout_write_done_cb(struct rpc_task *task, > struct nfs_write_data *data) > { > - int reset = 0; > + struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(data->lseg); > + unsigned long reset = 0; > > if (filelayout_async_handle_error(task, data->args.context->state, > data->ds_clp, &reset) == -EAGAIN) { > - dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", > - __func__, data->ds_clp, data->ds_clp->cl_session); > - if (reset) > + > + dprintk("%s reset 0x%lx ds_clp %p session %p\n", __func__, > + reset, data->ds_clp, data->ds_clp->cl_session); > + > + if (test_bit(NFS4_RESET_TO_MDS, &reset)) { > filelayout_reset_write(task, data); > + if (test_bit(NFS4_RESET_DEVICEID, &reset)) > + filelayout_mark_devid_invalid(devid); > + } > rpc_restart_call_prepare(task); > return -EAGAIN; > } > @@ -290,16 +315,22 @@ static void prepare_to_resend_writes(struct nfs_write_data *data) > static int filelayout_commit_done_cb(struct rpc_task *task, > struct nfs_write_data *data) > { > - int reset = 0; > + struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(data->lseg); > + unsigned long reset = 0; > > if (filelayout_async_handle_error(task, data->args.context->state, > data->ds_clp, &reset) == -EAGAIN) { > - dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", > - __func__, data->ds_clp, data->ds_clp->cl_session); > - if (reset) > + > + dprintk("%s reset 0x%lx ds_clp %p session %p\n", __func__, > + reset, data->ds_clp, data->ds_clp->cl_session); > + > + if (test_bit(NFS4_RESET_TO_MDS, &reset)) { > prepare_to_resend_writes(data); > - else > + if (test_bit(NFS4_RESET_DEVICEID, &reset)) > + filelayout_mark_devid_invalid(devid); > + } else { > rpc_restart_call_prepare(task); > + } > return -EAGAIN; > } > > diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h > index b54b389..08b667a 100644 > --- a/fs/nfs/nfs4filelayout.h > +++ b/fs/nfs/nfs4filelayout.h > @@ -41,6 +41,12 @@ > #define NFS4_PNFS_MAX_STRIPE_CNT 4096 > #define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ > > +/* internal use */ > +enum nfs4_fl_reset_state { > + NFS4_RESET_TO_MDS = 0, > + NFS4_RESET_DEVICEID, > +}; > + > enum stripetype4 { > STRIPE_SPARSE = 1, > STRIPE_DENSE = 2 -- Trond Myklebust Linux NFS client maintainer NetApp Trond.Myklebust@xxxxxxxxxx www.netapp.com ��.n��������+%������w��{.n�����{��w���jg��������ݢj����G�������j:+v���w�m������w�������h�����٥