On 2011-07-12 22:29, Trond Myklebust wrote: > ...and ensure that we recoalese to take into account differences in > block sizes when falling back to read through the MDS. > > Signed-off-by: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx> > --- > fs/nfs/internal.h | 4 +++ > fs/nfs/nfs4filelayout.c | 2 +- > fs/nfs/objlayout/objio_osd.c | 2 +- > fs/nfs/pnfs.c | 57 ++++++++++++++++++++++++++++++++++++++++- > fs/nfs/pnfs.h | 10 +------ > fs/nfs/read.c | 46 ++++++++++++++------------------- > include/linux/nfs_page.h | 1 - > 7 files changed, 82 insertions(+), 40 deletions(-) > > diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h > index 31e8b50..795b3e0 100644 > --- a/fs/nfs/internal.h > +++ b/fs/nfs/internal.h > @@ -295,10 +295,14 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); > extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, > const struct rpc_call_ops *call_ops); > extern void nfs_read_prepare(struct rpc_task *task, void *calldata); > +extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, > + struct list_head *head); > > struct nfs_pageio_descriptor; > extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, > struct inode *inode); > +extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); > +extern void nfs_readdata_release(struct nfs_read_data *rdata); > > /* write.c */ > extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, > diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c > index af9bf9e..fc556d6 100644 > --- a/fs/nfs/nfs4filelayout.c > +++ b/fs/nfs/nfs4filelayout.c > @@ -735,7 +735,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, > static const struct nfs_pageio_ops filelayout_pg_read_ops = { > .pg_init = filelayout_pg_init_read, > .pg_test = filelayout_pg_test, > - .pg_doio = nfs_generic_pg_readpages, > + .pg_doio = pnfs_generic_pg_readpages, > }; > > static const struct nfs_pageio_ops filelayout_pg_write_ops = { > diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c > index 70272d5..add6289 100644 > --- a/fs/nfs/objlayout/objio_osd.c > +++ b/fs/nfs/objlayout/objio_osd.c > @@ -1007,7 +1007,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, > static const struct nfs_pageio_ops objio_pg_read_ops = { > .pg_init = pnfs_generic_pg_init_read, > .pg_test = objio_pg_test, > - .pg_doio = nfs_generic_pg_readpages, > + .pg_doio = pnfs_generic_pg_readpages, > }; > > static const struct nfs_pageio_ops objio_pg_write_ops = { > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c > index 5b3cc3f..9eca5a8 100644 > --- a/fs/nfs/pnfs.c > +++ b/fs/nfs/pnfs.c > @@ -28,6 +28,7 @@ > */ > > #include <linux/nfs_fs.h> > +#include <linux/nfs_page.h> > #include "internal.h" > #include "pnfs.h" > #include "iostat.h" > @@ -1216,18 +1217,32 @@ pnfs_ld_read_done(struct nfs_read_data *data) > } > EXPORT_SYMBOL_GPL(pnfs_ld_read_done); > > +static void > +pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, > + struct nfs_read_data *data) > +{ > + list_splice_tail_init(&data->pages, &desc->pg_list); > + if (data->req && list_empty(&data->req->wb_list)) > + nfs_list_add_request(data->req, &desc->pg_list); > + nfs_pageio_reset_read_mds(desc); > + desc->pg_recoalesce = 1; > + nfs_readdata_release(data); I'm confused... Isn't this function supposed to call the nfs read path? Benny > +} > + > /* > * Call the appropriate parallel I/O subsystem read function. > */ > -enum pnfs_try_status > +static enum pnfs_try_status > pnfs_try_to_read_data(struct nfs_read_data *rdata, > - const struct rpc_call_ops *call_ops) > + const struct rpc_call_ops *call_ops, > + struct pnfs_layout_segment *lseg) > { > struct inode *inode = rdata->inode; > struct nfs_server *nfss = NFS_SERVER(inode); > enum pnfs_try_status trypnfs; > > rdata->mds_ops = call_ops; > + rdata->lseg = get_lseg(lseg); > > dprintk("%s: Reading ino:%lu %u@%llu\n", > __func__, inode->i_ino, rdata->args.count, rdata->args.offset); > @@ -1243,6 +1258,44 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, > return trypnfs; > } > > +static void > +pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) > +{ > + struct nfs_read_data *data; > + const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; > + struct pnfs_layout_segment *lseg = desc->pg_lseg; > + > + desc->pg_lseg = NULL; > + while (!list_empty(head)) { > + enum pnfs_try_status trypnfs; > + > + data = list_entry(head->next, struct nfs_read_data, list); > + list_del_init(&data->list); > + > + trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); > + if (trypnfs == PNFS_NOT_ATTEMPTED) > + pnfs_read_through_mds(desc, data); > + } > + put_lseg(lseg); > +} > + > +int > +pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) > +{ > + LIST_HEAD(head); > + int ret; > + > + ret = nfs_generic_pagein(desc, &head); > + if (ret != 0) { > + put_lseg(desc->pg_lseg); > + desc->pg_lseg = NULL; > + return ret; > + } > + pnfs_do_multiple_reads(desc, &head); > + return 0; > +} > +EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); > + > /* > * Currently there is only one (whole file) write lseg. > */ > diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h > index a59736e..c40ffa5 100644 > --- a/fs/nfs/pnfs.h > +++ b/fs/nfs/pnfs.h > @@ -157,9 +157,8 @@ void set_pnfs_layoutdriver(struct nfs_server *, u32 id); > void unset_pnfs_layoutdriver(struct nfs_server *); > enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, > const struct rpc_call_ops *, int); > -enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, > - const struct rpc_call_ops *); > void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); > +int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); > void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); > bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); > int pnfs_layout_process(struct nfs4_layoutget *lgp); > @@ -330,13 +329,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) > } > > static inline enum pnfs_try_status > -pnfs_try_to_read_data(struct nfs_read_data *data, > - const struct rpc_call_ops *call_ops) > -{ > - return PNFS_NOT_ATTEMPTED; > -} > - > -static inline enum pnfs_try_status > pnfs_try_to_write_data(struct nfs_write_data *data, > const struct rpc_call_ops *call_ops, int how) > { > diff --git a/fs/nfs/read.c b/fs/nfs/read.c > index 47f92c1..3745eed 100644 > --- a/fs/nfs/read.c > +++ b/fs/nfs/read.c > @@ -67,7 +67,7 @@ void nfs_readdata_free(struct nfs_read_data *p) > mempool_free(p, nfs_rdata_mempool); > } > > -static void nfs_readdata_release(struct nfs_read_data *rdata) > +void nfs_readdata_release(struct nfs_read_data *rdata) > { > put_lseg(rdata->lseg); > put_nfs_open_context(rdata->args.context); > @@ -120,6 +120,12 @@ void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, > } > EXPORT_SYMBOL_GPL(nfs_pageio_init_read_mds); > > +void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) > +{ > + pgio->pg_ops = &nfs_pageio_read_ops; > + pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; > +} > + > static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, > struct inode *inode) > { > @@ -235,26 +241,16 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, > } > > static int nfs_do_read(struct nfs_read_data *data, > - const struct rpc_call_ops *call_ops, > - struct pnfs_layout_segment *lseg) > + const struct rpc_call_ops *call_ops) > { > struct inode *inode = data->args.context->path.dentry->d_inode; > > - if (lseg) { > - data->lseg = get_lseg(lseg); > - if (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED) > - return 0; > - put_lseg(data->lseg); > - data->lseg = NULL; > - } > - > return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); > } > > static int > nfs_do_multiple_reads(struct list_head *head, > - const struct rpc_call_ops *call_ops, > - struct pnfs_layout_segment *lseg) > + const struct rpc_call_ops *call_ops) > { > struct nfs_read_data *data; > int ret = 0; > @@ -265,7 +261,7 @@ nfs_do_multiple_reads(struct list_head *head, > data = list_entry(head->next, struct nfs_read_data, list); > list_del_init(&data->list); > > - ret2 = nfs_do_read(data, call_ops, lseg); > + ret2 = nfs_do_read(data, call_ops); > if (ret == 0) > ret = ret2; > } > @@ -372,25 +368,23 @@ out: > return ret; > } > > -int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) > +int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) > +{ > + if (desc->pg_bsize < PAGE_CACHE_SIZE) > + return nfs_pagein_multi(desc, head); > + return nfs_pagein_one(desc, head); > +} > + > +static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) > { > LIST_HEAD(head); > int ret; > > - if (desc->pg_bsize < PAGE_CACHE_SIZE) > - ret = nfs_pagein_multi(desc, &head); > - else > - ret = nfs_pagein_one(desc, &head); > - > + ret = nfs_generic_pagein(desc, &head); > if (ret == 0) > - ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops, > - desc->pg_lseg); > - put_lseg(desc->pg_lseg); > - desc->pg_lseg = NULL; > + ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); > return ret; > } > -EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages); > - > > static const struct nfs_pageio_ops nfs_pageio_read_ops = { > .pg_test = nfs_generic_pg_test, > diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h > index 7241b2a..0a48f84 100644 > --- a/include/linux/nfs_page.h > +++ b/include/linux/nfs_page.h > @@ -108,7 +108,6 @@ extern void nfs_unlock_request(struct nfs_page *req); > extern int nfs_set_page_tag_locked(struct nfs_page *req); > extern void nfs_clear_page_tag_locked(struct nfs_page *req); > > -extern int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); > extern int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); > > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html