From: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> If the DS is local to this client use localio to write the data. Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> Signed-off-by: Mike Snitzer <snitzer@xxxxxxxxxx> --- fs/nfs/flexfilelayout/flexfilelayout.c | 109 ++++++++++++++++++++-- fs/nfs/flexfilelayout/flexfilelayout.h | 2 + fs/nfs/flexfilelayout/flexfilelayoutdev.c | 6 ++ 3 files changed, 110 insertions(+), 7 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 01ee52551a63..206b4b524e43 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -11,6 +11,7 @@ #include <linux/nfs_mount.h> #include <linux/nfs_page.h> #include <linux/module.h> +#include <linux/file.h> #include <linux/sched/mm.h> #include <linux/sunrpc/metrics.h> @@ -162,6 +163,52 @@ decode_name(struct xdr_stream *xdr, u32 *id) return 0; } +static struct file * +ff_local_open_fh(struct pnfs_layout_segment *lseg, + u32 ds_idx, + struct nfs_client *clp, + const struct cred *cred, + struct nfs_fh *fh, + fmode_t mode) +{ + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); + struct file *filp, *new, __rcu **pfile; + + if (!nfs_server_is_local(clp)) + return NULL; + if (mode & FMODE_WRITE) { + /* + * Always request read and write access since this corresponds + * to a rw layout. + */ + mode |= FMODE_READ; + pfile = &mirror->rw_file; + } else + pfile = &mirror->ro_file; + + new = NULL; + rcu_read_lock(); + filp = rcu_dereference(*pfile); + if (!filp) { + rcu_read_unlock(); + new = nfs_local_open_fh(clp, cred, fh, mode); + if (IS_ERR(new)) + return NULL; + rcu_read_lock(); + /* try to swap in the pointer */ + filp = cmpxchg(pfile, NULL, new); + if (!filp) { + filp = new; + new = NULL; + } + } + filp = get_file_rcu(&filp); + rcu_read_unlock(); + if (new) + fput(new); + return filp; +} + static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1, const struct nfs4_ff_layout_mirror *m2) { @@ -237,8 +284,15 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) { + struct file *filp; const struct cred *cred; + filp = rcu_access_pointer(mirror->ro_file); + if (filp) + fput(filp); + filp = rcu_access_pointer(mirror->rw_file); + if (filp) + fput(filp); ff_layout_remove_mirror(mirror); kfree(mirror->fh_versions); cred = rcu_access_pointer(mirror->ro_cred); @@ -414,6 +468,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, struct nfs4_ff_layout_mirror *mirror; struct cred *kcred; const struct cred __rcu *cred; + const struct cred __rcu *old; kuid_t uid; kgid_t gid; u32 ds_count, fh_count, id; @@ -513,13 +568,26 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]); if (mirror != fls->mirror_array[i]) { + struct file *filp; + /* swap cred ptrs so free_mirror will clean up old */ if (lgr->range.iomode == IOMODE_READ) { - cred = xchg(&mirror->ro_cred, cred); - rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred); + old = xchg(&mirror->ro_cred, cred); + rcu_assign_pointer(fls->mirror_array[i]->ro_cred, old); + /* drop file if creds changed */ + if (old != cred) { + filp = rcu_dereference_protected(xchg(&mirror->ro_file, NULL), 1); + if (filp) + fput(filp); + } } else { - cred = xchg(&mirror->rw_cred, cred); - rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred); + old = xchg(&mirror->rw_cred, cred); + rcu_assign_pointer(fls->mirror_array[i]->rw_cred, old); + if (old != cred) { + filp = rcu_dereference_protected(xchg(&mirror->rw_file, NULL), 1); + if (filp) + fput(filp); + } } ff_layout_free_mirror(fls->mirror_array[i]); fls->mirror_array[i] = mirror; @@ -1756,6 +1824,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; + struct file *filp; struct nfs4_ff_layout_mirror *mirror; const struct cred *ds_cred; loff_t offset = hdr->args.offset; @@ -1802,11 +1871,19 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) hdr->args.offset = offset; hdr->mds_offset = offset; + /* Start IO accounting for local read */ + filp = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, + FMODE_READ); + if (filp) { + hdr->task.tk_start = ktime_get(); + ff_layout_read_record_layoutstats_start(&hdr->task, hdr); + } + /* Perform an asynchronous read to ds */ nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, vers == 3 ? &ff_layout_read_call_ops_v3 : &ff_layout_read_call_ops_v4, - 0, RPC_TASK_SOFTCONN, NULL); + 0, RPC_TASK_SOFTCONN, filp); put_cred(ds_cred); return PNFS_ATTEMPTED; @@ -1826,6 +1903,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; + struct file *filp; struct nfs4_ff_layout_mirror *mirror; const struct cred *ds_cred; loff_t offset = hdr->args.offset; @@ -1870,11 +1948,19 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) */ hdr->args.offset = offset; + /* Start IO accounting for local write */ + filp = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, + FMODE_READ|FMODE_WRITE); + if (filp) { + hdr->task.tk_start = ktime_get(); + ff_layout_write_record_layoutstats_start(&hdr->task, hdr); + } + /* Perform an asynchronous write */ nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, vers == 3 ? &ff_layout_write_call_ops_v3 : &ff_layout_write_call_ops_v4, - sync, RPC_TASK_SOFTCONN, NULL); + sync, RPC_TASK_SOFTCONN, filp); put_cred(ds_cred); return PNFS_ATTEMPTED; @@ -1908,6 +1994,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) struct pnfs_layout_segment *lseg = data->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; + struct file *filp; struct nfs4_ff_layout_mirror *mirror; const struct cred *ds_cred; u32 idx; @@ -1946,10 +2033,18 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) if (fh) data->args.fh = fh; + /* Start IO accounting for local commit */ + filp = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh, + FMODE_READ|FMODE_WRITE); + if (filp) { + data->task.tk_start = ktime_get(); + ff_layout_commit_record_layoutstats_start(&data->task, data); + } + ret = nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, vers == 3 ? &ff_layout_commit_call_ops_v3 : &ff_layout_commit_call_ops_v4, - how, RPC_TASK_SOFTCONN, NULL); + how, RPC_TASK_SOFTCONN, filp); put_cred(ds_cred); return ret; out_err: diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index f84b3fb0dddd..8e042df5a2c9 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -82,7 +82,9 @@ struct nfs4_ff_layout_mirror { struct nfs_fh *fh_versions; nfs4_stateid stateid; const struct cred __rcu *ro_cred; + struct file __rcu *ro_file; const struct cred __rcu *rw_cred; + struct file __rcu *rw_file; refcount_t ref; spinlock_t lock; unsigned long flags; diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index e028f5a0ef5f..e58bedfb1dcc 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -395,6 +395,12 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, /* connect success, check rsize/wsize limit */ if (!status) { + /* + * ds_clp is put in destroy_ds(). + * keep ds_clp even if DS is local, so that if local IO cannot + * proceed somehow, we can fall back to NFS whenever we want. + */ + nfs_local_probe(ds->ds_clp); max_payload = nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), NULL); -- 2.44.0