Re: [PATCH 08/12] NFSv4.1: add generic layer hooks for pnfs COMMIT

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2011-03-23 22:30, Fred Isaman wrote:
> 2011/3/23 Benny Halevy <bhalevy@xxxxxxxxxxx>:
>> On 2011-03-23 15:27, Fred Isaman wrote:
>>>
>>> We create three major hooks for the pnfs code.
>>>
>>> pnfs_mark_request_commit() is called during writeback_done from
>>> nfs_mark_request_commit, which gives the driver an opportunity to
>>> claim it wants control over commiting a particular req.
>>>
>>> pnfs_choose_commit_list() is called from nfs_scan_list
>>> to choose which list a given req should be added to, based on
>>> where we intend to send it for COMMIT.  It is up to the driver
>>> to have preallocated list headers for each destination it may need.
>>>
>>> pnfs_commit_list() is how the driver actually takes control, it is
>>> used instead of nfs_commit_list().
>>>
>>> In order to pass information between the above functions, we create
>>> a union in nfs_page to hold a lseg (which is possible because the req is
>>> not on any list while in transition), and add some flags to indicate
>>> if we need to use the pnfs code.
>>>
>>> Signed-off-by: Fred Isaman<iisaman@xxxxxxxxxx>
>>> ---
>>>  fs/nfs/pagelist.c        |    5 ++-
>>>  fs/nfs/pnfs.h            |   73
>>> ++++++++++++++++++++++++++++++++++++++++++++++
>>>  fs/nfs/write.c           |   41 ++++++++++++++++---------
>>>  include/linux/nfs_fs.h   |    1 +
>>>  include/linux/nfs_page.h |    6 +++-
>>>  5 files changed, 108 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
>>> index fd85618..87a593c 100644
>>> --- a/fs/nfs/pagelist.c
>>> +++ b/fs/nfs/pagelist.c
>>> @@ -398,6 +398,7 @@ int nfs_scan_list(struct nfs_inode *nfsi,
>>>        pgoff_t idx_end;
>>>        int found, i;
>>>        int res;
>>> +       struct list_head *list;
>>>
>>>        res = 0;
>>>        if (npages == 0)
>>> @@ -418,10 +419,10 @@ int nfs_scan_list(struct nfs_inode *nfsi,
>>>                        idx_start = req->wb_index + 1;
>>>                        if (nfs_set_page_tag_locked(req)) {
>>>                                kref_get(&req->wb_kref);
>>> -                               nfs_list_remove_request(req);
>>>                                radix_tree_tag_clear(&nfsi->nfs_page_tree,
>>>                                                req->wb_index, tag);
>>> -                               nfs_list_add_request(req, dst);
>>> +                               list = pnfs_choose_commit_list(req, dst);
>>> +                               nfs_list_add_request(req, list);
>>>                                res++;
>>>                                if (res == INT_MAX)
>>>                                        goto out;
>>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>>> index 6380b94..5370f1b 100644
>>> --- a/fs/nfs/pnfs.h
>>> +++ b/fs/nfs/pnfs.h
>>> @@ -74,6 +74,13 @@ struct pnfs_layoutdriver_type {
>>>        /* test for nfs page cache coalescing */
>>>        int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *,
>>> struct nfs_page *);
>>>
>>> +       /* Returns true if layoutdriver wants to divert this request to
>>> +        * driver's commit routine.
>>> +        */
>>> +       bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg);
>>> +       struct list_head * (*choose_commit_list) (struct nfs_page *req);
>>> +       int (*commit_pagelist)(struct inode *inode, struct list_head
>>> *mds_pages, int how);
>>> +
>>>        /*
>>>         * Return PNFS_ATTEMPTED to indicate the layout code has attempted
>>>         * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
>>> @@ -169,6 +176,51 @@ static inline int pnfs_enabled_sb(struct nfs_server
>>> *nfss)
>>>        return nfss->pnfs_curr_ld != NULL;
>>>  }
>>>
>>> +static inline void
>>> +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment
>>> *lseg)
>>> +{
>>> +       if (lseg) {
>>> +               struct pnfs_layoutdriver_type *ld;
>>> +
>>> +               ld =
>>> NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld;
>>> +               if (ld->mark_pnfs_commit&&  ld->mark_pnfs_commit(lseg)) {
>>
>> nit: space before and after "&&"
>>
> 
> I'm ignoring the formatting nits due to mailer mishandling, per your
> subsequent email.
> 
>>> +                       set_bit(PG_PNFS_COMMIT,&req->wb_flags);
>>
>> nit: space after comma
>>
>>> +                       req->wb_commit_lseg = get_lseg(lseg);
>>
>> What are the atomicity requirements of the PG_PNFS_COMMIT bit in wb_flags
>> and the validity of wb_commit_lseg?
>>
> 
> It is handled under the PG_BUSY bit_lock.
> 

OK

>>> +               }
>>> +       }
>>> +}
>>> +
>>> +static inline int
>>> +pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int
>>> how)
>>> +{
>>> +       if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT,&NFS_I(inode)->flags))
>>> +               return PNFS_NOT_ATTEMPTED;
>>> +       return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode,
>>> mds_pages, how);
>>
>> Again, I don't get the concurrency control here...
>> NFS_INO_PNFS_COMMIT is set in pnfs_choose_commit_list under i_lock
>> but then pnfs_commit_list is called outside the i_lock so what guarantees
>> that NFS_INO_PNFS_COMMIT is still valid with respect to the output of
>> pnfs_choose_commit_list?
>>
> 
> 
> They are both done under the NFS_INO_COMMIT bit lock.
> 
>>> +}
>>> +
>>> +static inline struct list_head *
>>> +pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds)
>>> +{
>>> +       struct list_head *rv;
>>> +
>>> +       if (test_and_clear_bit(PG_PNFS_COMMIT,&req->wb_flags)) {
>>
>> nit: space after comma
>>
>>> +               struct inode *inode =
>>> req->wb_commit_lseg->pls_layout->plh_inode;
>>> +
>>> +               set_bit(NFS_INO_PNFS_COMMIT,&NFS_I(inode)->flags);
>>
>> nit: ditto
>>
>>> +               rv =
>>> NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req);
>>> +               /* matched by ref taken when PG_PNFS_COMMIT is set */
>>> +               put_lseg(req->wb_commit_lseg);
>>
>> Since wb_commit_lseg and wb_list are in a union, how about
>>                INIT_LIST_HEAD(&req->wb_list);
> 
> I actually had that there.  Trond pointed out it was unnecessary and
> asked that it be removed.
> 

Seems fragile to me. I hope Trond's around to fix it when it breaks ;-)

>>
>>> +       } else
>>> +               rv = mds;
>>> +       return rv;
>>> +}
>>> +
>>> +static inline void pnfs_clear_request_commit(struct nfs_page *req)
>>> +{
>>> +       if (test_and_clear_bit(PG_PNFS_COMMIT,&req->wb_flags))
>>> +               put_lseg(req->wb_commit_lseg);
>>
>> ditto
> 
> I see your ditto, and raise you one :)
> 

:)

>>
>> Benny
>>
>>> +}
>>> +
>>>  #else  /* CONFIG_NFS_V4_1 */
>>>
>>>  static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
>>> @@ -252,6 +304,27 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor
>>> *pgio, struct inode *ino)
>>>        pgio->pg_test = NULL;
>>>  }
>>>
>>> +static inline void
>>> +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment
>>> *lseg)
>>> +{
>>> +}
>>> +
>>> +static inline int
>>> +pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int
>>> how)
>>> +{
>>> +       return PNFS_NOT_ATTEMPTED;
>>> +}
>>> +
>>> +static inline struct list_head *
>>> +pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds)
>>> +{
>>> +       return mds;
>>> +}
>>> +
>>> +static inline void pnfs_clear_request_commit(struct nfs_page *req)
>>> +{
>>> +}
>>> +
>>>  #endif /* CONFIG_NFS_V4_1 */
>>>
>>>  #endif /* FS_NFS_PNFS_H */
>>> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
>>> index f5f005e..6927a18 100644
>>> --- a/fs/nfs/write.c
>>> +++ b/fs/nfs/write.c
>>> @@ -441,7 +441,7 @@ nfs_mark_request_dirty(struct nfs_page *req)
>>>   * Add a request to the inode's commit list.
>>>   */
>>>  static void
>>> -nfs_mark_request_commit(struct nfs_page *req)
>>> +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment
>>> *lseg)
>>>  {
>>>        struct inode *inode = req->wb_context->path.dentry->d_inode;
>>>        struct nfs_inode *nfsi = NFS_I(inode);
>>> @@ -453,6 +453,7 @@ nfs_mark_request_commit(struct nfs_page *req)
>>>                        NFS_PAGE_TAG_COMMIT);
>>>        nfsi->ncommit++;
>>>        spin_unlock(&inode->i_lock);
>>> +       pnfs_mark_request_commit(req, lseg);
>>
>> Why do this out of the i_lock?
>>
> 
> Both the req and lseg are held in memory by references, and any
> serialization needs are met by the req's PG_BUSY bit lock.
> 
> Fred
> 
>>>        inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
>>>        inc_bdi_stat(req->wb_page->mapping->backing_dev_info,
>>> BDI_RECLAIMABLE);
>>>        __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
>>> @@ -481,10 +482,11 @@ int nfs_write_need_commit(struct nfs_write_data
>>> *data)
>>>  }
>>>
>>>  static inline
>>> -int nfs_reschedule_unstable_write(struct nfs_page *req)
>>> +int nfs_reschedule_unstable_write(struct nfs_page *req,
>>> +                                 struct nfs_write_data *data)
>>>  {
>>>        if (test_and_clear_bit(PG_NEED_COMMIT,&req->wb_flags)) {
>>> -               nfs_mark_request_commit(req);
>>> +               nfs_mark_request_commit(req, data->lseg);
>>>                return 1;
>>>        }
>>>        if (test_and_clear_bit(PG_NEED_RESCHED,&req->wb_flags)) {
>>> @@ -495,7 +497,7 @@ int nfs_reschedule_unstable_write(struct nfs_page
>>> *req)
>>>  }
>>>  #else
>>>  static inline void
>>> -nfs_mark_request_commit(struct nfs_page *req)
>>> +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment
>>> *lseg)
>>>  {
>>>  }
>>>
>>> @@ -512,7 +514,8 @@ int nfs_write_need_commit(struct nfs_write_data *data)
>>>  }
>>>
>>>  static inline
>>> -int nfs_reschedule_unstable_write(struct nfs_page *req)
>>> +int nfs_reschedule_unstable_write(struct nfs_page *req,
>>> +                                 struct nfs_write_data *data)
>>>  {
>>>        return 0;
>>>  }
>>> @@ -615,9 +618,11 @@ static struct nfs_page
>>> *nfs_try_to_update_request(struct inode *inode,
>>>        }
>>>
>>>        if (nfs_clear_request_commit(req)&&
>>> -                       radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
>>> -                               req->wb_index, NFS_PAGE_TAG_COMMIT) !=
>>> NULL)
>>> +           radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
>>> +                                req->wb_index, NFS_PAGE_TAG_COMMIT) !=
>>> NULL) {
>>>                NFS_I(inode)->ncommit--;
>>> +               pnfs_clear_request_commit(req);
>>> +       }
>>>
>>>        /* Okay, the request matches. Update the region */
>>>        if (offset<  req->wb_offset) {
>>> @@ -765,11 +770,12 @@ int nfs_updatepage(struct file *file, struct page
>>> *page,
>>>        return status;
>>>  }
>>>
>>> -static void nfs_writepage_release(struct nfs_page *req)
>>> +static void nfs_writepage_release(struct nfs_page *req,
>>> +                                 struct nfs_write_data *data)
>>>  {
>>>        struct page *page = req->wb_page;
>>>
>>> -       if (PageError(req->wb_page) ||
>>> !nfs_reschedule_unstable_write(req))
>>> +       if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req,
>>> data))
>>>                nfs_inode_remove_request(req);
>>>        nfs_clear_page_tag_locked(req);
>>>        nfs_end_page_writeback(page);
>>> @@ -1087,7 +1093,7 @@ static void nfs_writeback_release_partial(void
>>> *calldata)
>>>
>>>  out:
>>>        if (atomic_dec_and_test(&req->wb_complete))
>>> -               nfs_writepage_release(req);
>>> +               nfs_writepage_release(req, data);
>>>        nfs_writedata_release(calldata);
>>>  }
>>>
>>> @@ -1154,7 +1160,7 @@ static void nfs_writeback_release_full(void
>>> *calldata)
>>>
>>>                if (nfs_write_need_commit(data)) {
>>>                        memcpy(&req->wb_verf,&data->verf,
>>> sizeof(req->wb_verf));
>>> -                       nfs_mark_request_commit(req);
>>> +                       nfs_mark_request_commit(req, data->lseg);
>>>                        dprintk(" marked for commit\n");
>>>                        goto next;
>>>                }
>>> @@ -1357,14 +1363,15 @@ static void nfs_init_commit(struct nfs_write_data
>>> *data,
>>>        nfs_fattr_init(&data->fattr);
>>>  }
>>>
>>> -static void nfs_retry_commit(struct list_head *page_list)
>>> +static void nfs_retry_commit(struct list_head *page_list,
>>> +                     struct pnfs_layout_segment *lseg)
>>>  {
>>>        struct nfs_page *req;
>>>
>>>        while (!list_empty(page_list)) {
>>>                req = nfs_list_entry(page_list->next);
>>>                nfs_list_remove_request(req);
>>> -               nfs_mark_request_commit(req);
>>> +               nfs_mark_request_commit(req, lseg);
>>>                dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
>>>                dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
>>>                             BDI_RECLAIMABLE);
>>> @@ -1389,7 +1396,7 @@ nfs_commit_list(struct inode *inode, struct
>>> list_head *head, int how)
>>>        nfs_init_commit(data, head);
>>>        return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops,
>>> how);
>>>   out_bad:
>>> -       nfs_retry_commit(head);
>>> +       nfs_retry_commit(head, NULL);
>>>        nfs_commit_clear_lock(NFS_I(inode));
>>>        return -ENOMEM;
>>>  }
>>> @@ -1477,7 +1484,11 @@ int nfs_commit_inode(struct inode *inode, int how)
>>>        res = nfs_scan_commit(inode,&head, 0, 0);
>>>        spin_unlock(&inode->i_lock);
>>>        if (res) {
>>> -               int error = nfs_commit_list(inode,&head, how);
>>> +               int error;
>>> +
>>> +               error = pnfs_commit_list(inode,&head, how);
>>> +               if (error == PNFS_NOT_ATTEMPTED)
>>> +                       error = nfs_commit_list(inode,&head, how);
>>>                if (error<  0)
>>>                        return error;
>>>                if (!may_wait)
>>> diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
>>> index cb2add4..f64ea14 100644
>>> --- a/include/linux/nfs_fs.h
>>> +++ b/include/linux/nfs_fs.h
>>> @@ -221,6 +221,7 @@ struct nfs_inode {
>>>  #define NFS_INO_FSCACHE               (5)             /* inode can be
>>> cached by FS-Cache */
>>>  #define NFS_INO_FSCACHE_LOCK  (6)             /* FS-Cache cookie
>>> management lock */
>>>  #define NFS_INO_COMMIT                (7)             /* inode is
>>> committing unstable writes */
>>> +#define NFS_INO_PNFS_COMMIT    (8)             /* use pnfs code for
>>> commit */
>>
>> nit: alignment
>>
>>>
>>>  static inline struct nfs_inode *NFS_I(const struct inode *inode)
>>>  {
>>> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
>>> index 92d54c8..8023e4e 100644
>>> --- a/include/linux/nfs_page.h
>>> +++ b/include/linux/nfs_page.h
>>> @@ -33,11 +33,15 @@ enum {
>>>        PG_CLEAN,
>>>        PG_NEED_COMMIT,
>>>        PG_NEED_RESCHED,
>>> +       PG_PNFS_COMMIT,
>>>  };
>>>
>>>  struct nfs_inode;
>>>  struct nfs_page {
>>> -       struct list_head        wb_list;        /* Defines state of page:
>>> */
>>> +       union {
>>> +               struct list_head        wb_list;        /* Defines state
>>> of page: */
>>> +               struct pnfs_layout_segment *wb_commit_lseg; /* Used when
>>> PG_PNFS_COMMIT set */
>>> +       };
>>>        struct page             *wb_page;       /* page to read in/write
>>> out */
>>>        struct nfs_open_context *wb_context;    /* File state context info
>>> */
>>>        struct nfs_lock_context *wb_lock_context;       /* lock context
>>> info */
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@xxxxxxxxxxxxxxx
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux