Re: [PATCH 36/40] pnfs-submit wave3 filelayout read done

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Sorry if you got two responses - my mailer to netapp is having trouble
sending mail....

On Tue, Feb 8, 2011 at 6:06 PM, Fred Isaman <iisaman@xxxxxxxxxx> wrote:
> On Fri, Feb 4, 2011 at 4:33 PM,  <andros@xxxxxxxxxx> wrote:
>> From: Andy Adamson <andros@xxxxxxxxxx>
>>
>> Use our own async error handler.
>> Mark the layout as failed and retry i/o through the MDS on specified errors.
>>
>> Signed-off-by: Andy Adamson <andros@xxxxxxxxxx>
>> ---
>>  fs/nfs/internal.h           |    1 +
>>  fs/nfs/nfs4filelayout.c     |   86 +++++++++++++++++++++++++++++++++++++++++++
>>  fs/nfs/nfs4proc.c           |   44 +++++++++++++--------
>>  fs/nfs/nfs4state.c          |    1 +
>>  fs/nfs/pnfs.h               |    1 -
>>  include/linux/nfs_xdr.h     |    1 +
>>  include/linux/sunrpc/clnt.h |    1 +
>>  net/sunrpc/clnt.c           |    8 ++++
>>  8 files changed, 125 insertions(+), 18 deletions(-)
>>
>> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
>> index 5518d61..f69a322 100644
>> --- a/fs/nfs/internal.h
>> +++ b/fs/nfs/internal.h
>> @@ -281,6 +281,7 @@ extern int nfs_migrate_page(struct address_space *,
>>  #endif
>>
>>  /* nfs4proc.c */
>> +extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
>>  extern int _nfs4_call_sync(struct nfs_server *server,
>>                           struct rpc_message *msg,
>>                           struct nfs4_sequence_args *args,
>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>> index 5fd8ed3..777d78b 100644
>> --- a/fs/nfs/nfs4filelayout.c
>> +++ b/fs/nfs/nfs4filelayout.c
>> @@ -40,6 +40,8 @@ MODULE_LICENSE("GPL");
>>  MODULE_AUTHOR("Dean Hildebrand <dhildebz@xxxxxxxxx>");
>>  MODULE_DESCRIPTION("The NFSv4 file layout driver");
>>
>> +#define FILELAYOUT_POLL_RETRY_MAX     (15*HZ)
>> +
>>  static int
>>  filelayout_set_layoutdriver(struct nfs_server *nfss)
>>  {
>> @@ -95,6 +97,88 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
>>        BUG();
>>  }
>>
>> +/* For data server errors we don't recover from */
>> +static void
>> +filelayout_set_lo_fail(struct pnfs_layout_segment *lseg, fmode_t mode)
>> +{
>> +       if (mode & FMODE_WRITE) {
>> +               dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
>> +               set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
>> +       } else if (mode & FMODE_READ) {
>> +               dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
>> +               set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
>> +       }
>> +}
>> +
>> +/*
>> + * Async I/O error handler.
>> + *
>> + * NFS4ERR_OLD_STATEID can not occur with a zero stateid seqid.
>> + */
>> +static int filelayout_async_handle_error(struct rpc_task *task,
>> +                                        struct nfs4_state *state,
>> +                                        struct nfs_client *clp,
>> +                                        int *reset)
>> +{
>> +       if (task->tk_status >= 0)
>> +               return 0;
>> +       switch (task->tk_status) {
>> +       case -NFS4ERR_BADSESSION:
>> +       case -NFS4ERR_BADSLOT:
>> +       case -NFS4ERR_BAD_HIGH_SLOT:
>> +       case -NFS4ERR_DEADSESSION:
>> +       case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
>> +       case -NFS4ERR_SEQ_FALSE_RETRY:
>> +       case -NFS4ERR_SEQ_MISORDERED:
>> +               dprintk("%s ERROR %d, Reset session. Exchangeid "
>> +                       "flags 0x%x\n", __func__, task->tk_status,
>> +                       clp->cl_exchange_flags);
>> +               nfs4_schedule_state_recovery(clp);
>> +               task->tk_status = 0;
>> +               return -EAGAIN;
>> +       case -NFS4ERR_DELAY:
>> +       case -NFS4ERR_GRACE:
>> +       case -EKEYEXPIRED:
>> +               rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
>> +               task->tk_status = 0;
>> +               return -EAGAIN;
>> +       default:
>> +               dprintk("%s DS error %d\n", __func__, task->tk_status);
>> +               /* Layout marked as failed by pnfs_check_io_status.
>> +                * Retry I/O through the MDS */
>> +               *reset = 1;
>> +               task->tk_status = 0;
>> +               return -EAGAIN;
>> +       }
>> +}
>> +
>> +/* NFS_PROTO call done callback routines */
>> +
>> +static int filelayout_read_done_cb(struct rpc_task *task,
>> +                               struct nfs_read_data *data)
>> +{
>> +       struct nfs_client *clp = data->ds_clp;
>> +       int reset = 0;
>> +
>> +       dprintk("%s DS read\n", __func__);
>> +
>> +       if (filelayout_async_handle_error(task, data->args.context->state,
>> +                                         data->ds_clp, &reset) == -EAGAIN) {
>> +               dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
>> +                       __func__, data->ds_clp, data->ds_clp->cl_session);
>> +               if (reset) {
>> +                       nfs4_reset_read(task, data);
>> +                       filelayout_set_lo_fail(data->lseg,
>> +                                       data->args.context->state->state);
>
> Why use the open context, instead of just failing read layouts?

I pass in the mode of the open, which is also used to determine the
iomode of the layout.

> Do you
> really want to prevent all future write layouts too?  Even worse is
> the reverse case...if a write layout op fails do you want to prevent
> all future read layouts just because the file happened to be open for
> read?

That is not what this code does. It either fails IOMODE_RW (for
FMODE_WRITE) or IOMODE_READ (for FMODE_READ) layouts, never both.

>
> If you answer is no, then just send a READ/WRITE bit.

What we really want is to fail the layout that was used. So, I can
simply use the iomode of the data->lseg to determine which mode to
fail.

-->Andy

>
> If your answer is yes, then just remove the arg entirely and always
> mark both modes as failing.
>
> Fred
>
>> +                       clp = NFS_SERVER(data->inode)->nfs_client;
>> +               }
>> +               nfs_restart_rpc(task, clp);
>> +               return -EAGAIN;
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>>  /*
>>  * Call ops for the async read/write cases
>>  * In the case of dense layouts, the offset needs to be reset to its
>> @@ -104,6 +188,8 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
>>  {
>>        struct nfs_read_data *rdata = (struct nfs_read_data *)data;
>>
>> +       rdata->read_done_cb = filelayout_read_done_cb;
>> +
>>        if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
>>                                &rdata->args.seq_args, &rdata->res.seq_res,
>>                                0, task))
>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> index 3fcf756..9dee49d 100644
>> --- a/fs/nfs/nfs4proc.c
>> +++ b/fs/nfs/nfs4proc.c
>> @@ -3075,41 +3075,51 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
>>        return err;
>>  }
>>
>> -static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
>> +static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
>>  {
>>        struct nfs_server *server = NFS_SERVER(data->inode);
>> -       struct nfs_client *clp = server->nfs_client;
>> -
>> -       dprintk("--> %s\n", __func__);
>> -
>> -#ifdef CONFIG_NFS_V4_1
>> -       /* Is this a DS session */
>> -       if (data->ds_clp) {
>> -               dprintk("%s DS read\n", __func__);
>> -               clp = data->ds_clp;
>> -       }
>> -#endif /* CONFIG_NFS_V4_1 */
>> -
>> -       if (!nfs4_sequence_done(task, &data->res.seq_res))
>> -               return -EAGAIN;
>>
>>        if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
>> -               nfs_restart_rpc(task, client);
>> +               nfs_restart_rpc(task, server->nfs_client);
>>                return -EAGAIN;
>>        }
>>
>>        nfs_invalidate_atime(data->inode);
>> -       if (task->tk_status > 0 && !data->ds_clp)
>> +       if (task->tk_status > 0)
>>                renew_lease(server, data->timestamp);
>>        return 0;
>>  }
>>
>> +static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
>> +{
>> +
>> +       dprintk("--> %s\n", __func__);
>> +
>> +       if (!nfs4_sequence_done(task, &data->res.seq_res))
>> +               return -EAGAIN;
>> +
>> +       return data->read_done_cb(task, data);
>> +}
>> +
>>  static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
>>  {
>>        data->timestamp   = jiffies;
>> +       data->read_done_cb = nfs4_read_done_cb;
>>        msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
>>  }
>>
>> +/* Reset the the nfs_read_data to send the read to another server. */
>> +void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
>> +{
>> +       dprintk("%s Reset task for i/o through \n", __func__);
>> +       data->ds_clp = NULL;
>> +       data->args.fh     = NFS_FH(data->inode);
>> +       data->read_done_cb = nfs4_read_done_cb;
>> +       task->tk_ops = data->call_ops;
>> +       rpc_task_reset_client(task, NFS_CLIENT(data->inode));
>> +}
>> +EXPORT_SYMBOL_GPL(nfs4_reset_read);
>> +
>>  static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
>>  {
>>        struct inode *inode = data->inode;
>> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
>> index 49433aa..346fb97 100644
>> --- a/fs/nfs/nfs4state.c
>> +++ b/fs/nfs/nfs4state.c
>> @@ -1022,6 +1022,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
>>                set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
>>        nfs4_schedule_state_manager(clp);
>>  }
>> +EXPORT_SYMBOL_GPL(nfs4_schedule_state_recovery);
>>
>>  int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
>>  {
>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>> index 6a99c33..218cdfe 100644
>> --- a/fs/nfs/pnfs.h
>> +++ b/fs/nfs/pnfs.h
>> @@ -198,7 +198,6 @@ void pnfs_roc_release(struct inode *ino);
>>  void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
>>  bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
>>
>> -
>>  static inline int lo_fail_bit(u32 iomode)
>>  {
>>        return iomode == IOMODE_RW ?
>> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
>> index 1222aa9..c91f468 100644
>> --- a/include/linux/nfs_xdr.h
>> +++ b/include/linux/nfs_xdr.h
>> @@ -1020,6 +1020,7 @@ struct nfs_read_data {
>>        struct pnfs_layout_segment *lseg;
>>        struct nfs_client       *ds_clp;        /* pNFS data server */
>>        const struct rpc_call_ops *call_ops;    /* For pNFS recovery to MDS */
>> +       int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
>>        __u64                   orig_offset;    /* Filelayout dense stripe */
>>        struct page             *page_array[NFS_PAGEVEC_SIZE];
>>  };
>> diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
>> index ef9476a..db7bcaf 100644
>> --- a/include/linux/sunrpc/clnt.h
>> +++ b/include/linux/sunrpc/clnt.h
>> @@ -129,6 +129,7 @@ struct rpc_create_args {
>>  struct rpc_clnt *rpc_create(struct rpc_create_args *args);
>>  struct rpc_clnt        *rpc_bind_new_program(struct rpc_clnt *,
>>                                struct rpc_program *, u32);
>> +void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt);
>>  struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
>>  void           rpc_shutdown_client(struct rpc_clnt *);
>>  void           rpc_release_client(struct rpc_clnt *);
>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>> index 57d344c..5c4df70 100644
>> --- a/net/sunrpc/clnt.c
>> +++ b/net/sunrpc/clnt.c
>> @@ -597,6 +597,14 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
>>        }
>>  }
>>
>> +void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt)
>> +{
>> +       rpc_task_release_client(task);
>> +       rpc_task_set_client(task, clnt);
>> +}
>> +EXPORT_SYMBOL_GPL(rpc_task_reset_client);
>> +
>> +
>>  static void
>>  rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
>>  {
>> --
>> 1.6.6
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@xxxxxxxxxxxxxxx
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux