Re: [PATCH 11/24] pnfs_submit: expose pnfs_update_layout, put_lseg, and get_lseg functions

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jun 9, 2010 at 2:58 PM, Boaz Harrosh <bharrosh@xxxxxxxxxxx> wrote:
> On 06/08/2010 07:19 AM, Fred Isaman wrote:
>> These will be used in the generic code.  Set so they will compile away to
>> nothing if CONFIG_NFS_V4_1 not set.
>>
>> This requires kref_put to be under lock.  See rule 3 of Documentation/kref.txt
>>
>
> I don't see "rule 3" in here. Please explain how?

3) If the code attempts to gain a reference to a kref-ed structure
   without already holding a valid pointer, it must serialize access
   where a kref_put() cannot occur during the kref_get(), and the
   structure must remain valid during the kref_get().

This occurs every time we call pnfs_update_layout


>
> BTW: Even "rule 3" bad example with the lists, have a counter example in the Kernel
>     with lists and searches that kref_put/get lockless. (By each element refing
>     it's pear and taking the reference of the first one before search)
>

I don't follow this.

>> Signed-off-by: Fred Isaman <iisaman@xxxxxxxxxx>
>> ---
>>  fs/nfs/pnfs.c |   45 ++++++++++++++++++++++++++++++++-------------
>>  fs/nfs/pnfs.h |   44 +++++++++++++++++++++++++++++++++++++++++++-
>>  2 files changed, 75 insertions(+), 14 deletions(-)
>>
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index 836cb0f..a74a4b6 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -436,7 +436,25 @@ destroy_lseg(struct kref *kref)
>>       PNFS_LD_IO_OPS(lseg->layout)->free_lseg(lseg);
>>  }
>>
>> -static inline void
>> +static void
>> +put_lseg_locked(struct pnfs_layout_segment *lseg)
>> +{
>> +     bool do_wake_up;
>> +     struct nfs_inode *nfsi;
>> +
>> +     if (!lseg)
>> +             return;
>> +
>> +     dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
>> +             atomic_read(&lseg->kref.refcount), lseg->valid);
>> +     do_wake_up = !lseg->valid;
>> +     nfsi = PNFS_NFS_INODE(lseg->layout);
>> +     kref_put(&lseg->kref, destroy_lseg);
>> +     if (do_wake_up)
>> +             wake_up(&nfsi->lo_waitq);
>> +}
>> +
>> +void
>>  put_lseg(struct pnfs_layout_segment *lseg)
>>  {
>>       bool do_wake_up;
>> @@ -449,7 +467,9 @@ put_lseg(struct pnfs_layout_segment *lseg)
>>               atomic_read(&lseg->kref.refcount), lseg->valid);
>>       do_wake_up = !lseg->valid;
>>       nfsi = PNFS_NFS_INODE(lseg->layout);
>> +     lock_current_layout(nfsi);
>>       kref_put(&lseg->kref, destroy_lseg);
>> +     unlock_current_layout(nfsi);
>>       if (do_wake_up)
>>               wake_up(&nfsi->lo_waitq);
>>  }
>> @@ -674,7 +694,7 @@ pnfs_free_layout(struct pnfs_layout_type *lo,
>>                       lseg, lseg->range.iomode, lseg->range.offset,
>>                       lseg->range.length);
>>               list_del(&lseg->fi_list);
>> -             put_lseg(lseg);
>> +             put_lseg_locked(lseg);
>>       }
>>
>>       dprintk("%s:Return\n", __func__);
>> @@ -1033,7 +1053,7 @@ pnfs_has_layout(struct pnfs_layout_type *lo,
>>                   (lseg->valid || !only_valid)) {
>>                       ret = lseg;
>>                       if (take_ref)
>> -                             kref_get(&ret->kref);
>> +                             get_lseg(ret);
>>                       break;
>>               }
>>               if (cmp_layout(range, &lseg->range) > 0)
>> @@ -1053,7 +1073,7 @@ pnfs_has_layout(struct pnfs_layout_type *lo,
>>   * returned to the caller.
>>   */
>>  int
>> -pnfs_update_layout(struct inode *ino,
>> +_pnfs_update_layout(struct inode *ino,
>>                  struct nfs_open_context *ctx,
>>                  u64 count,
>>                  loff_t pos,
>> @@ -1085,8 +1105,7 @@ pnfs_update_layout(struct inode *ino,
>>       lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
>>       if (lseg && !lseg->valid) {
>>               if (take_ref)
>> -                     put_lseg(lseg);
>> -
>> +                     put_lseg_locked(lseg);
>>               /* someone is cleaning the layout */
>>               lseg = NULL;
>>               result = -EAGAIN;
>> @@ -1262,7 +1281,7 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp)
>>       init_lseg(lo, lseg);
>>       lseg->range = res->lseg;
>>       if (lgp->lsegpp) {
>> -             kref_get(&lseg->kref);
>> +             get_lseg(lseg);
>>               *lgp->lsegpp = lseg;
>>       }
>>
>> @@ -1380,7 +1399,7 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
>>       readahead_range(inode, pages, &loff, &count);
>>
>>       if (count > 0) {
>> -             status = pnfs_update_layout(inode, ctx, count,
>> +             status = _pnfs_update_layout(inode, ctx, count,
>>                                               loff, IOMODE_READ, NULL);
>>               dprintk("%s virt update returned %d\n", __func__, status);
>>               if (status != 0)
>> @@ -1438,7 +1457,7 @@ pnfs_update_layout_commit(struct inode *inode,
>>       if (start == 0 && count == 0)
>>               count = NFS4_MAX_UINT64;
>>
>> -     status = pnfs_update_layout(inode, nfs_page->wb_context,
>> +     status = _pnfs_update_layout(inode, nfs_page->wb_context,
>>                               count,
>>                               start,
>>                               IOMODE_RW,
>> @@ -1538,7 +1557,7 @@ pnfs_file_write(struct file *filp, const char __user *buf, size_t count,
>>               goto out;
>>
>>       /* Retrieve and set layout if not allready cached */
>> -     status = pnfs_update_layout(inode,
>> +     status = _pnfs_update_layout(inode,
>>                                   context,
>>                                   count,
>>                                   *pos,
>> @@ -1580,7 +1599,7 @@ pnfs_writepages(struct nfs_write_data *wdata, int how)
>>               args->offset);
>>
>>       /* Retrieve and set layout if not allready cached */
>> -     status = pnfs_update_layout(inode,
>> +     status = _pnfs_update_layout(inode,
>>                                   args->context,
>>                                   args->count,
>>                                   args->offset,
>> @@ -1681,7 +1700,7 @@ pnfs_readpages(struct nfs_read_data *rdata)
>>               args->offset);
>>
>>       /* Retrieve and set layout if not allready cached */
>> -     status = pnfs_update_layout(inode,
>> +     status = _pnfs_update_layout(inode,
>>                                   args->context,
>>                                   args->count,
>>                                   args->offset,
>> @@ -1845,7 +1864,7 @@ pnfs_commit(struct nfs_write_data *data, int sync)
>>          new one.  If it was recalled we better commit the data first
>>          before returning it, otherwise the data needs to be rewritten,
>>          either with a new layout or to the MDS */
>> -     result = pnfs_update_layout(data->inode,
>> +     result = _pnfs_update_layout(data->inode,
>>                                   NULL,
>>                                   count,
>>                                   first->wb_offset,
>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>> index 214d567..6326ed5 100644
>> --- a/fs/nfs/pnfs.h
>> +++ b/fs/nfs/pnfs.h
>> @@ -31,7 +31,8 @@ extern int pnfs4_proc_layoutreturn(struct nfs4_pnfs_layoutreturn *lrp, bool wait
>>  /* pnfs.c */
>>  extern const nfs4_stateid zero_stateid;
>>
>> -int pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
>> +void put_lseg(struct pnfs_layout_segment *lseg);
>> +int _pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
>>       u64 count, loff_t pos, enum pnfs_iomode access_type,
>>       struct pnfs_layout_segment **lsegpp);
>>
>> @@ -81,6 +82,12 @@ static inline int lo_fail_bit(u32 iomode)
>>                        NFS_INO_RW_LAYOUT_FAILED : NFS_INO_RO_LAYOUT_FAILED;
>>  }
>>
>> +static inline void get_lseg(struct pnfs_layout_segment *lseg)
>> +{
>> +     if (lseg)
>
> Really? That in my experience is a shoot in the foot.
>
> I don't believe any code that decided to get an lseg could get there without one.
> if so I want to crash.
>
> From all instances of  get_lseg in this patch they already ask.
>

It is needed by one instance from the following patch.  I can change this.

Fred

>> +             kref_get(&lseg->kref);
>> +}
>> +
>>  /* Return true if a layout driver is being used for this mountpoint */
>>  static inline int pnfs_enabled_sb(struct nfs_server *nfss)
>>  {
>> @@ -170,6 +177,23 @@ static inline int pnfs_return_layout(struct inode *ino,
>>       return 0;
>>  }
>>
>> +static inline int pnfs_update_layout(struct inode *ino,
>> +     struct nfs_open_context *ctx,
>> +     u64 count, loff_t pos, enum pnfs_iomode access_type,
>> +     struct pnfs_layout_segment **lsegpp)
>> +{
>> +     struct nfs_server *nfss = NFS_SERVER(ino);
>> +
>> +     if (pnfs_enabled_sb(nfss))
>> +             return _pnfs_update_layout(ino, ctx, count, pos,
>> +                                        access_type, lsegpp);
>> +     else {
>> +             if (lsegpp)
>> +                     *lsegpp = NULL;
>> +             return 0;
>> +     }
>> +}
>> +
>>  static inline int pnfs_get_write_status(struct nfs_write_data *data)
>>  {
>>       return data->pdata.pnfs_error;
>> @@ -190,6 +214,24 @@ static inline int pnfs_use_rpc(struct nfs_server *nfss)
>>
>>  #else  /* CONFIG_NFS_V4_1 */
>>
>> +static inline void get_lseg(struct pnfs_layout_segment *lseg)
>> +{
>> +}
>> +
>> +static inline void put_lseg(struct pnfs_layout_segment *lseg)
>> +{
>> +}
>> +
>> +static inline int
>> +pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
>> +     u64 count, loff_t pos, enum pnfs_iomode access_type,
>> +     struct pnfs_layout_segment **lsegpp)
>> +{
>> +     if (lsegpp)
>> +             *lsegpp = NULL;
>> +     return 0;
>> +}
>> +
>>  static inline enum pnfs_try_status
>>  pnfs_try_to_read_data(struct nfs_read_data *data,
>>                     const struct rpc_call_ops *call_ops)
>
> Comments aside. Good needed stuff
> Boaz
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux