From: The pNFS Team <linux-nfs@xxxxxxxxxxxxxxx> Signed-off-by: Andy Adamson <andros@xxxxxxxxxxxxxx> Signed-off-by: Dean Hildebrand <dhildeb@xxxxxxxxxx> Signed-off-by: Fred Isaman <iisaman@xxxxxxxxxxxxxx> Signed-off-by: Mingyang Guo <guomingyang@xxxxxxxxxxxx> Signed-off-by: Oleg Drokin <green@xxxxxxxxxxxxxx> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@xxxxxxxxxx> Tested-by: Guo Mingyang <guomingyang@xxxxxxxxxxxx> Signed-off-by: Andy Adamson <andros@xxxxxxxxxx> Signed-off-by: Benny Halevy <bhalevy@xxxxxxxxxxx> Signed-off-by: Fred Isaman <iisaman@xxxxxxxxxx> --- fs/nfs/internal.h | 5 ++ fs/nfs/nfs4filelayout.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 17 ++++++++ fs/nfs/write.c | 5 ++- include/linux/nfs_xdr.h | 1 + 5 files changed, 130 insertions(+), 1 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index f69a322..34096a1 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -272,6 +272,10 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, extern void nfs_read_prepare(struct rpc_task *task, void *calldata); /* write.c */ +extern int nfs_initiate_write(struct nfs_write_data *data, + struct rpc_clnt *clnt, + const struct rpc_call_ops *call_ops, + int how); extern void nfs_write_prepare(struct rpc_task *task, void *calldata); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, @@ -282,6 +286,7 @@ extern int nfs_migrate_page(struct address_space *, /* nfs4proc.c */ extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); +extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data); extern int _nfs4_call_sync(struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 80e7a66..be4af22 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -208,12 +208,69 @@ static void filelayout_read_release(void *data) rdata->mds_ops->rpc_release(data); } +static int filelayout_write_done_cb(struct rpc_task *task, + struct nfs_write_data *data) +{ + int reset = 0; + + if (filelayout_async_handle_error(task, data->args.context->state, + data->ds_clp, &reset) == -EAGAIN) { + struct nfs_client *clp; + + dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", + __func__, data->ds_clp, data->ds_clp->cl_session); + if (reset) { + nfs4_reset_write(task, data); + filelayout_set_lo_fail(data->lseg); + clp = NFS_SERVER(data->inode)->nfs_client; + } else + clp = data->ds_clp; + nfs_restart_rpc(task, clp); + return -EAGAIN; + } + + return 0; +} + +static void filelayout_write_prepare(struct rpc_task *task, void *data) +{ + struct nfs_write_data *wdata = (struct nfs_write_data *)data; + + if (nfs41_setup_sequence(wdata->ds_clp->cl_session, + &wdata->args.seq_args, &wdata->res.seq_res, + 0, task)) + return; + + rpc_call_start(task); +} + +static void filelayout_write_call_done(struct rpc_task *task, void *data) +{ + struct nfs_write_data *wdata = (struct nfs_write_data *)data; + + /* Note this may cause RPC to be resent */ + wdata->mds_ops->rpc_call_done(task, data); +} + +static void filelayout_write_release(void *data) +{ + struct nfs_write_data *wdata = (struct nfs_write_data *)data; + + wdata->mds_ops->rpc_release(data); +} + struct rpc_call_ops filelayout_read_call_ops = { .rpc_call_prepare = filelayout_read_prepare, .rpc_call_done = filelayout_read_call_done, .rpc_release = filelayout_read_release, }; +struct rpc_call_ops filelayout_write_call_ops = { + .rpc_call_prepare = filelayout_write_prepare, + .rpc_call_done = filelayout_write_call_done, + .rpc_release = filelayout_write_release, +}; + static enum pnfs_try_status filelayout_read_pagelist(struct nfs_read_data *data) { @@ -254,6 +311,51 @@ filelayout_read_pagelist(struct nfs_read_data *data) return PNFS_ATTEMPTED; } +/* Perform async writes. */ +static enum pnfs_try_status +filelayout_write_pagelist(struct nfs_write_data *data, int sync) +{ + struct pnfs_layout_segment *lseg = data->lseg; + struct nfs4_pnfs_ds *ds; + loff_t offset = data->args.offset; + u32 idx; + struct nfs_fh *fh; + + /* Retrieve the correct rpc_client for the byte range */ + idx = nfs4_fl_calc_ds_index(lseg, offset); + ds = nfs4_fl_prepare_ds(lseg, idx); + if (!ds) { + printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); + set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); + set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); + return PNFS_NOT_ATTEMPTED; + } + dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__, + data->inode->i_ino, sync, (size_t) data->args.count, offset, + ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + + /* We can't handle commit to ds yet */ + if (!FILELAYOUT_LSEG(lseg)->commit_through_mds) + data->args.stable = NFS_FILE_SYNC; + + data->write_done_cb = filelayout_write_done_cb; + data->ds_clp = ds->ds_clp; + fh = nfs4_fl_select_ds_fh(lseg, offset); + if (fh) + data->args.fh = fh; + /* + * Get the file offset on the dserver. Set the write offset to + * this offset and save the original offset. + */ + data->args.offset = filelayout_get_dserver_offset(lseg, offset); + data->mds_offset = offset; + + /* Perform an asynchronous write */ + nfs_initiate_write(data, ds->ds_clp->cl_rpcclient, + &filelayout_write_call_ops, sync); + return PNFS_ATTEMPTED; +} + /* * filelayout_check_layout() * @@ -475,6 +577,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .free_lseg = filelayout_free_lseg, .pg_test = filelayout_pg_test, .read_pagelist = filelayout_read_pagelist, + .write_pagelist = filelayout_write_pagelist, }; static int __init nfs4filelayout_init(void) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3952f7c..6cce678 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3144,6 +3144,23 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) return data->write_done_cb(task, data); } +/* Reset the the nfs_write_data to send the write to the MDS. */ +void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data) +{ + dprintk("%s Reset task for i/o through \n", __func__); + put_lseg(data->lseg); + data->lseg = NULL; + data->ds_clp = NULL; + data->write_done_cb = nfs4_write_done_cb; + data->args.fh = NFS_FH(data->inode); + data->args.bitmask = data->res.server->cache_consistency_bitmask; + data->args.offset = data->mds_offset; + data->res.fattr = &data->fattr; + task->tk_ops = data->mds_ops; + rpc_task_reset_client(task, NFS_CLIENT(data->inode)); +} +EXPORT_SYMBOL_GPL(nfs4_reset_write); + static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) { struct nfs_server *server = NFS_SERVER(data->inode); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 24cc34d..5757b06 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -783,7 +783,7 @@ static int flush_task_priority(int how) return RPC_PRIORITY_NORMAL; } -static int nfs_initiate_write(struct nfs_write_data *data, +int nfs_initiate_write(struct nfs_write_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops, int how) @@ -833,6 +833,7 @@ static int nfs_initiate_write(struct nfs_write_data *data, out: return ret; } +EXPORT_SYMBOL(nfs_initiate_write); /* * Set up the argument/result storage required for the RPC call. @@ -1186,6 +1187,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) */ static unsigned long complain; + /* Note this will print the MDS for a DS write */ if (time_before(complain, jiffies)) { dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", @@ -1206,6 +1208,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) /* Was this an NFSv2 write or an NFSv3 stable write? */ if (resp->verf->committed != NFS_UNSTABLE) { /* Resend from where the server left off */ + data->mds_offset += resp->count; argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9ff26f9..a045d12 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1045,6 +1045,7 @@ struct nfs_write_data { #ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ #endif + __u64 mds_offset; /* Filelayout dense stripe */ struct page *page_array[NFS_PAGEVEC_SIZE]; }; -- 1.7.2.1 -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html