2011/3/23 Benny Halevy <bhalevy@xxxxxxxxxxx>: > On 2011-03-23 15:27, Fred Isaman wrote: >> >> We create three major hooks for the pnfs code. >> >> pnfs_mark_request_commit() is called during writeback_done from >> nfs_mark_request_commit, which gives the driver an opportunity to >> claim it wants control over commiting a particular req. >> >> pnfs_choose_commit_list() is called from nfs_scan_list >> to choose which list a given req should be added to, based on >> where we intend to send it for COMMIT. It is up to the driver >> to have preallocated list headers for each destination it may need. >> >> pnfs_commit_list() is how the driver actually takes control, it is >> used instead of nfs_commit_list(). >> >> In order to pass information between the above functions, we create >> a union in nfs_page to hold a lseg (which is possible because the req is >> not on any list while in transition), and add some flags to indicate >> if we need to use the pnfs code. >> >> Signed-off-by: Fred Isaman<iisaman@xxxxxxxxxx> >> --- >> fs/nfs/pagelist.c | 5 ++- >> fs/nfs/pnfs.h | 73 >> ++++++++++++++++++++++++++++++++++++++++++++++ >> fs/nfs/write.c | 41 ++++++++++++++++--------- >> include/linux/nfs_fs.h | 1 + >> include/linux/nfs_page.h | 6 +++- >> 5 files changed, 108 insertions(+), 18 deletions(-) >> >> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c >> index fd85618..87a593c 100644 >> --- a/fs/nfs/pagelist.c >> +++ b/fs/nfs/pagelist.c >> @@ -398,6 +398,7 @@ int nfs_scan_list(struct nfs_inode *nfsi, >> pgoff_t idx_end; >> int found, i; >> int res; >> + struct list_head *list; >> >> res = 0; >> if (npages == 0) >> @@ -418,10 +419,10 @@ int nfs_scan_list(struct nfs_inode *nfsi, >> idx_start = req->wb_index + 1; >> if (nfs_set_page_tag_locked(req)) { >> kref_get(&req->wb_kref); >> - nfs_list_remove_request(req); >> radix_tree_tag_clear(&nfsi->nfs_page_tree, >> req->wb_index, tag); >> - nfs_list_add_request(req, dst); >> + list = pnfs_choose_commit_list(req, dst); >> + nfs_list_add_request(req, list); >> res++; >> if (res == INT_MAX) >> goto out; >> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h >> index 6380b94..5370f1b 100644 >> --- a/fs/nfs/pnfs.h >> +++ b/fs/nfs/pnfs.h >> @@ -74,6 +74,13 @@ struct pnfs_layoutdriver_type { >> /* test for nfs page cache coalescing */ >> int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, >> struct nfs_page *); >> >> + /* Returns true if layoutdriver wants to divert this request to >> + * driver's commit routine. >> + */ >> + bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg); >> + struct list_head * (*choose_commit_list) (struct nfs_page *req); >> + int (*commit_pagelist)(struct inode *inode, struct list_head >> *mds_pages, int how); >> + >> /* >> * Return PNFS_ATTEMPTED to indicate the layout code has attempted >> * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS >> @@ -169,6 +176,51 @@ static inline int pnfs_enabled_sb(struct nfs_server >> *nfss) >> return nfss->pnfs_curr_ld != NULL; >> } >> >> +static inline void >> +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment >> *lseg) >> +{ >> + if (lseg) { >> + struct pnfs_layoutdriver_type *ld; >> + >> + ld = >> NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld; >> + if (ld->mark_pnfs_commit&& ld->mark_pnfs_commit(lseg)) { > > nit: space before and after "&&" > I'm ignoring the formatting nits due to mailer mishandling, per your subsequent email. >> + set_bit(PG_PNFS_COMMIT,&req->wb_flags); > > nit: space after comma > >> + req->wb_commit_lseg = get_lseg(lseg); > > What are the atomicity requirements of the PG_PNFS_COMMIT bit in wb_flags > and the validity of wb_commit_lseg? > It is handled under the PG_BUSY bit_lock. >> + } >> + } >> +} >> + >> +static inline int >> +pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int >> how) >> +{ >> + if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT,&NFS_I(inode)->flags)) >> + return PNFS_NOT_ATTEMPTED; >> + return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, >> mds_pages, how); > > Again, I don't get the concurrency control here... > NFS_INO_PNFS_COMMIT is set in pnfs_choose_commit_list under i_lock > but then pnfs_commit_list is called outside the i_lock so what guarantees > that NFS_INO_PNFS_COMMIT is still valid with respect to the output of > pnfs_choose_commit_list? > They are both done under the NFS_INO_COMMIT bit lock. >> +} >> + >> +static inline struct list_head * >> +pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) >> +{ >> + struct list_head *rv; >> + >> + if (test_and_clear_bit(PG_PNFS_COMMIT,&req->wb_flags)) { > > nit: space after comma > >> + struct inode *inode = >> req->wb_commit_lseg->pls_layout->plh_inode; >> + >> + set_bit(NFS_INO_PNFS_COMMIT,&NFS_I(inode)->flags); > > nit: ditto > >> + rv = >> NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req); >> + /* matched by ref taken when PG_PNFS_COMMIT is set */ >> + put_lseg(req->wb_commit_lseg); > > Since wb_commit_lseg and wb_list are in a union, how about > INIT_LIST_HEAD(&req->wb_list); I actually had that there. Trond pointed out it was unnecessary and asked that it be removed. > >> + } else >> + rv = mds; >> + return rv; >> +} >> + >> +static inline void pnfs_clear_request_commit(struct nfs_page *req) >> +{ >> + if (test_and_clear_bit(PG_PNFS_COMMIT,&req->wb_flags)) >> + put_lseg(req->wb_commit_lseg); > > ditto I see your ditto, and raise you one :) > > Benny > >> +} >> + >> #else /* CONFIG_NFS_V4_1 */ >> >> static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) >> @@ -252,6 +304,27 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor >> *pgio, struct inode *ino) >> pgio->pg_test = NULL; >> } >> >> +static inline void >> +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment >> *lseg) >> +{ >> +} >> + >> +static inline int >> +pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int >> how) >> +{ >> + return PNFS_NOT_ATTEMPTED; >> +} >> + >> +static inline struct list_head * >> +pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) >> +{ >> + return mds; >> +} >> + >> +static inline void pnfs_clear_request_commit(struct nfs_page *req) >> +{ >> +} >> + >> #endif /* CONFIG_NFS_V4_1 */ >> >> #endif /* FS_NFS_PNFS_H */ >> diff --git a/fs/nfs/write.c b/fs/nfs/write.c >> index f5f005e..6927a18 100644 >> --- a/fs/nfs/write.c >> +++ b/fs/nfs/write.c >> @@ -441,7 +441,7 @@ nfs_mark_request_dirty(struct nfs_page *req) >> * Add a request to the inode's commit list. >> */ >> static void >> -nfs_mark_request_commit(struct nfs_page *req) >> +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment >> *lseg) >> { >> struct inode *inode = req->wb_context->path.dentry->d_inode; >> struct nfs_inode *nfsi = NFS_I(inode); >> @@ -453,6 +453,7 @@ nfs_mark_request_commit(struct nfs_page *req) >> NFS_PAGE_TAG_COMMIT); >> nfsi->ncommit++; >> spin_unlock(&inode->i_lock); >> + pnfs_mark_request_commit(req, lseg); > > Why do this out of the i_lock? > Both the req and lseg are held in memory by references, and any serialization needs are met by the req's PG_BUSY bit lock. Fred >> inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); >> inc_bdi_stat(req->wb_page->mapping->backing_dev_info, >> BDI_RECLAIMABLE); >> __mark_inode_dirty(inode, I_DIRTY_DATASYNC); >> @@ -481,10 +482,11 @@ int nfs_write_need_commit(struct nfs_write_data >> *data) >> } >> >> static inline >> -int nfs_reschedule_unstable_write(struct nfs_page *req) >> +int nfs_reschedule_unstable_write(struct nfs_page *req, >> + struct nfs_write_data *data) >> { >> if (test_and_clear_bit(PG_NEED_COMMIT,&req->wb_flags)) { >> - nfs_mark_request_commit(req); >> + nfs_mark_request_commit(req, data->lseg); >> return 1; >> } >> if (test_and_clear_bit(PG_NEED_RESCHED,&req->wb_flags)) { >> @@ -495,7 +497,7 @@ int nfs_reschedule_unstable_write(struct nfs_page >> *req) >> } >> #else >> static inline void >> -nfs_mark_request_commit(struct nfs_page *req) >> +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment >> *lseg) >> { >> } >> >> @@ -512,7 +514,8 @@ int nfs_write_need_commit(struct nfs_write_data *data) >> } >> >> static inline >> -int nfs_reschedule_unstable_write(struct nfs_page *req) >> +int nfs_reschedule_unstable_write(struct nfs_page *req, >> + struct nfs_write_data *data) >> { >> return 0; >> } >> @@ -615,9 +618,11 @@ static struct nfs_page >> *nfs_try_to_update_request(struct inode *inode, >> } >> >> if (nfs_clear_request_commit(req)&& >> - radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, >> - req->wb_index, NFS_PAGE_TAG_COMMIT) != >> NULL) >> + radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, >> + req->wb_index, NFS_PAGE_TAG_COMMIT) != >> NULL) { >> NFS_I(inode)->ncommit--; >> + pnfs_clear_request_commit(req); >> + } >> >> /* Okay, the request matches. Update the region */ >> if (offset< req->wb_offset) { >> @@ -765,11 +770,12 @@ int nfs_updatepage(struct file *file, struct page >> *page, >> return status; >> } >> >> -static void nfs_writepage_release(struct nfs_page *req) >> +static void nfs_writepage_release(struct nfs_page *req, >> + struct nfs_write_data *data) >> { >> struct page *page = req->wb_page; >> >> - if (PageError(req->wb_page) || >> !nfs_reschedule_unstable_write(req)) >> + if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, >> data)) >> nfs_inode_remove_request(req); >> nfs_clear_page_tag_locked(req); >> nfs_end_page_writeback(page); >> @@ -1087,7 +1093,7 @@ static void nfs_writeback_release_partial(void >> *calldata) >> >> out: >> if (atomic_dec_and_test(&req->wb_complete)) >> - nfs_writepage_release(req); >> + nfs_writepage_release(req, data); >> nfs_writedata_release(calldata); >> } >> >> @@ -1154,7 +1160,7 @@ static void nfs_writeback_release_full(void >> *calldata) >> >> if (nfs_write_need_commit(data)) { >> memcpy(&req->wb_verf,&data->verf, >> sizeof(req->wb_verf)); >> - nfs_mark_request_commit(req); >> + nfs_mark_request_commit(req, data->lseg); >> dprintk(" marked for commit\n"); >> goto next; >> } >> @@ -1357,14 +1363,15 @@ static void nfs_init_commit(struct nfs_write_data >> *data, >> nfs_fattr_init(&data->fattr); >> } >> >> -static void nfs_retry_commit(struct list_head *page_list) >> +static void nfs_retry_commit(struct list_head *page_list, >> + struct pnfs_layout_segment *lseg) >> { >> struct nfs_page *req; >> >> while (!list_empty(page_list)) { >> req = nfs_list_entry(page_list->next); >> nfs_list_remove_request(req); >> - nfs_mark_request_commit(req); >> + nfs_mark_request_commit(req, lseg); >> dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); >> dec_bdi_stat(req->wb_page->mapping->backing_dev_info, >> BDI_RECLAIMABLE); >> @@ -1389,7 +1396,7 @@ nfs_commit_list(struct inode *inode, struct >> list_head *head, int how) >> nfs_init_commit(data, head); >> return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, >> how); >> out_bad: >> - nfs_retry_commit(head); >> + nfs_retry_commit(head, NULL); >> nfs_commit_clear_lock(NFS_I(inode)); >> return -ENOMEM; >> } >> @@ -1477,7 +1484,11 @@ int nfs_commit_inode(struct inode *inode, int how) >> res = nfs_scan_commit(inode,&head, 0, 0); >> spin_unlock(&inode->i_lock); >> if (res) { >> - int error = nfs_commit_list(inode,&head, how); >> + int error; >> + >> + error = pnfs_commit_list(inode,&head, how); >> + if (error == PNFS_NOT_ATTEMPTED) >> + error = nfs_commit_list(inode,&head, how); >> if (error< 0) >> return error; >> if (!may_wait) >> diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h >> index cb2add4..f64ea14 100644 >> --- a/include/linux/nfs_fs.h >> +++ b/include/linux/nfs_fs.h >> @@ -221,6 +221,7 @@ struct nfs_inode { >> #define NFS_INO_FSCACHE (5) /* inode can be >> cached by FS-Cache */ >> #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie >> management lock */ >> #define NFS_INO_COMMIT (7) /* inode is >> committing unstable writes */ >> +#define NFS_INO_PNFS_COMMIT (8) /* use pnfs code for >> commit */ > > nit: alignment > >> >> static inline struct nfs_inode *NFS_I(const struct inode *inode) >> { >> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h >> index 92d54c8..8023e4e 100644 >> --- a/include/linux/nfs_page.h >> +++ b/include/linux/nfs_page.h >> @@ -33,11 +33,15 @@ enum { >> PG_CLEAN, >> PG_NEED_COMMIT, >> PG_NEED_RESCHED, >> + PG_PNFS_COMMIT, >> }; >> >> struct nfs_inode; >> struct nfs_page { >> - struct list_head wb_list; /* Defines state of page: >> */ >> + union { >> + struct list_head wb_list; /* Defines state >> of page: */ >> + struct pnfs_layout_segment *wb_commit_lseg; /* Used when >> PG_PNFS_COMMIT set */ >> + }; >> struct page *wb_page; /* page to read in/write >> out */ >> struct nfs_open_context *wb_context; /* File state context info >> */ >> struct nfs_lock_context *wb_lock_context; /* lock context >> info */ > > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html