Re: [PATCH 2/4] ceph: consider file's last read/write when calculating wanted caps

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2020-02-20 at 22:53 +0800, Yan, Zheng wrote:
> On Thu, Feb 20, 2020 at 10:18 PM Jeff Layton <jlayton@xxxxxxxxxx> wrote:
> > On Thu, 2020-02-20 at 20:26 +0800, Yan, Zheng wrote:
> > > When getting caps for read/write, update corresponding file's last
> > > read/write. If a file hasn't been read/write for 'caps_wanted_delay_max'
> > > seconds, ignore the file when calculating wanted caps.
> > > 
> > 
> > Please explain in the changelog how the new info is to be stored, given
> > that it is quite complex.
> > 
> > > Signed-off-by: "Yan, Zheng" <zyan@xxxxxxxxxx>
> > > ---
> > >  fs/ceph/caps.c               | 149 ++++++++++++++++++++++++-----------
> > >  fs/ceph/file.c               |  23 +++---
> > >  fs/ceph/inode.c              |  15 +++-
> > >  fs/ceph/ioctl.c              |   4 +-
> > >  fs/ceph/super.h              |  16 +++-
> > >  include/linux/ceph/ceph_fs.h |   1 +
> > >  6 files changed, 145 insertions(+), 63 deletions(-)
> > > 
> > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> > > index 293920d013ff..ccdc47bd7cf0 100644
> > > --- a/fs/ceph/caps.c
> > > +++ b/fs/ceph/caps.c
> > > @@ -971,18 +971,44 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
> > >       return used;
> > >  }
> > > 
> > > +#define FMODE_WAIT_BIAS 1000
> > > +
> > >  /*
> > >   * wanted, by virtue of open file modes
> > >   */
> > >  int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
> > >  {
> > > +     struct ceph_mount_options *opt =
> > > +             ceph_inode_to_client(&ci->vfs_inode)->mount_options;
> > > +     unsigned long used_cutoff =
> > > +             round_jiffies(jiffies - opt->caps_wanted_delay_max * HZ);
> > > +     unsigned long idle_cutoff =
> > > +             round_jiffies(jiffies - opt->caps_wanted_delay_min * HZ);
> > >       int i, bits = 0;
> > > +
> > >       for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
> > > -             if (ci->i_nr_by_mode[i])
> > > +             if (ci->i_file_by_mode[i].nr >= FMODE_WAIT_BIAS) {
> > > +                     /* there are cap waiters or lots of open files */
> > >                       bits |= 1 << i;
> > > +             } else if (ci->i_file_by_mode[i].nr > 0) {
> > > +                     if (i ==  CEPH_FILE_MODE_PIN ||
> > > +                         time_after(ci->i_file_by_mode[i].last_used,
> > > +                                    used_cutoff))
> > > +                             bits |= 1 << i;
> > > +             } else if ((ci->i_file_by_mode[i].last_used & 1)) {
> > > +                     if (time_after(ci->i_file_by_mode[i].last_used,
> > > +                                    idle_cutoff)) {
> > > +                             bits |= 1 << i;
> > > +                     } else {
> > > +                             ci->i_file_by_mode[i].last_used &= ~1UL;
> > > +                     }
> > > +             }
> > >       }
> > >       if (bits == 0)
> > >               return 0;
> > > +     if (bits == 1 && !S_ISDIR(ci->vfs_inode.i_mode))
> > > +             return 0;
> > > +
> > >       return ceph_caps_for_mode(bits >> 1);
> > >  }
> > > 
> > > @@ -1021,14 +1047,6 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
> > >       return mds_wanted;
> > >  }
> > > 
> > > -/*
> > > - * called under i_ceph_lock
> > > - */
> > > -static int __ceph_is_single_caps(struct ceph_inode_info *ci)
> > > -{
> > > -     return rb_first(&ci->i_caps) == rb_last(&ci->i_caps);
> > > -}
> > > -
> > >  int ceph_is_any_caps(struct inode *inode)
> > >  {
> > >       struct ceph_inode_info *ci = ceph_inode(inode);
> > > @@ -1856,10 +1874,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
> > >       if (ci->i_ceph_flags & CEPH_I_FLUSH)
> > >               flags |= CHECK_CAPS_FLUSH;
> > > 
> > > -     if (!(flags & CHECK_CAPS_AUTHONLY) ||
> > > -         (ci->i_auth_cap && __ceph_is_single_caps(ci)))
> > > -             __cap_delay_cancel(mdsc, ci);
> > > -
> > >       goto retry_locked;
> > >  retry:
> > >       spin_lock(&ci->i_ceph_lock);
> > > @@ -2081,9 +2095,16 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
> > >               goto retry; /* retake i_ceph_lock and restart our cap scan. */
> > >       }
> > > 
> > > -     /* Reschedule delayed caps release if we delayed anything */
> > > -     if (delayed)
> > > -             __cap_delay_requeue(mdsc, ci, false);
> > > +     if (list_empty(&ci->i_cap_delay_list)) {
> > > +         if (delayed) {
> > > +                 /* Reschedule delayed caps release if we delayed anything */
> > > +                 __cap_delay_requeue(mdsc, ci, false);
> > > +         } else if ((file_wanted & ~CEPH_CAP_PIN) &&
> > > +                     !(used & (CEPH_CAP_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
> > > +                 /* periodically re-calculate caps wanted by open files */
> > > +                 __cap_delay_requeue(mdsc, ci, true);
> > > +         }
> > > +     }
> > > 
> > >       spin_unlock(&ci->i_ceph_lock);
> > > 
> > > @@ -2549,8 +2570,9 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got,
> > >   * FIXME: how does a 0 return differ from -EAGAIN?
> > >   */
> > >  enum {
> > > -     NON_BLOCKING    = 1,
> > > -     CHECK_FILELOCK  = 2,
> > > +     /* first 8 bits are reserved for CEPH_FILE_MODE_FOO */
> > > +     NON_BLOCKING    = (1 << 8),
> > > +     CHECK_FILELOCK  = (1 << 9),
> > >  };
> > > 
> > >  static int try_get_cap_refs(struct inode *inode, int need, int want,
> > > @@ -2560,7 +2582,6 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
> > >       struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
> > >       int ret = 0;
> > >       int have, implemented;
> > > -     int file_wanted;
> > >       bool snap_rwsem_locked = false;
> > > 
> > >       dout("get_cap_refs %p need %s want %s\n", inode,
> > > @@ -2576,15 +2597,6 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
> > >               goto out_unlock;
> > >       }
> > > 
> > > -     /* make sure file is actually open */
> > > -     file_wanted = __ceph_caps_file_wanted(ci);
> > > -     if ((file_wanted & need) != need) {
> > > -             dout("try_get_cap_refs need %s file_wanted %s, EBADF\n",
> > > -                  ceph_cap_string(need), ceph_cap_string(file_wanted));
> > > -             ret = -EBADF;
> > > -             goto out_unlock;
> > > -     }
> > > -
> > >       /* finish pending truncate */
> > >       while (ci->i_truncate_pending) {
> > >               spin_unlock(&ci->i_ceph_lock);
> > > @@ -2692,6 +2704,9 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
> > >                    ceph_cap_string(have), ceph_cap_string(need));
> > >       }
> > >  out_unlock:
> > > +
> > > +     __ceph_touch_fmode(ci, mdsc, flags);
> > > +
> > >       spin_unlock(&ci->i_ceph_lock);
> > >       if (snap_rwsem_locked)
> > >               up_read(&mdsc->snap_rwsem);
> > > @@ -2729,10 +2744,22 @@ static void check_max_size(struct inode *inode, loff_t endoff)
> > >               ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
> > >  }
> > > 
> > > +static inline int get_used_file_mode(int need, int want)
> > > +{
> > > +     int fmode = 0;
> > > +     if (need & CEPH_CAP_FILE_RD)
> > > +             fmode |= CEPH_FILE_MODE_RD;
> > > +     if (need & CEPH_CAP_FILE_WR)
> > > +             fmode |= CEPH_FILE_MODE_WR;
> > > +     if (want & CEPH_CAP_FILE_LAZYIO)
> > > +             fmode |= CEPH_FILE_MODE_LAZY;
> > > +     return fmode;
> > > +}
> > > +
> > >  int ceph_try_get_caps(struct inode *inode, int need, int want,
> > >                     bool nonblock, int *got)
> > >  {
> > > -     int ret;
> > > +     int ret, flags;
> > > 
> > >       BUG_ON(need & ~CEPH_CAP_FILE_RD);
> > >       BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED));
> > > @@ -2740,8 +2767,11 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,
> > >       if (ret < 0)
> > >               return ret;
> > > 
> > > -     ret = try_get_cap_refs(inode, need, want, 0,
> > > -                            (nonblock ? NON_BLOCKING : 0), got);
> > > +     flags = get_used_file_mode(need, want);
> > > +     if (nonblock)
> > > +             flags |= NON_BLOCKING;
> > > +
> > > +     ret = try_get_cap_refs(inode, need, want, 0, flags, got);
> > >       return ret == -EAGAIN ? 0 : ret;
> > >  }
> > > 
> > > @@ -2767,11 +2797,15 @@ int ceph_get_caps(struct file *filp, int need, int want,
> > >           fi->filp_gen != READ_ONCE(fsc->filp_gen))
> > >               return -EBADF;
> > > 
> > > +     flags = get_used_file_mode(need, want);
> > > +
> > >       while (true) {
> > >               if (endoff > 0)
> > >                       check_max_size(inode, endoff);
> > > 
> > > -             flags = atomic_read(&fi->num_locks) ? CHECK_FILELOCK : 0;
> > > +             flags &= CEPH_FILE_MODE_MASK;
> > > +             if (atomic_read(&fi->num_locks))
> > > +                     flags |= CHECK_FILELOCK;
> > >               _got = 0;
> > >               ret = try_get_cap_refs(inode, need, want, endoff,
> > >                                      flags, &_got);
> > > @@ -2791,6 +2825,8 @@ int ceph_get_caps(struct file *filp, int need, int want,
> > >                       list_add(&cw.list, &mdsc->cap_wait_list);
> > >                       spin_unlock(&mdsc->caps_list_lock);
> > > 
> > > +                     /* make sure used fmode not timeout */
> > > +                     ceph_get_fmode(ci, flags, FMODE_WAIT_BIAS);
> > >                       add_wait_queue(&ci->i_cap_wq, &wait);
> > > 
> > >                       flags |= NON_BLOCKING;
> > > @@ -2804,6 +2840,7 @@ int ceph_get_caps(struct file *filp, int need, int want,
> > >                       }
> > > 
> > >                       remove_wait_queue(&ci->i_cap_wq, &wait);
> > > +                     ceph_put_fmode(ci, flags, FMODE_WAIT_BIAS);
> > > 
> > >                       spin_lock(&mdsc->caps_list_lock);
> > >                       list_del(&cw.list);
> > > @@ -2823,7 +2860,7 @@ int ceph_get_caps(struct file *filp, int need, int want,
> > >               if (ret < 0) {
> > >                       if (ret == -ESTALE) {
> > >                               /* session was killed, try renew caps */
> > > -                             ret = ceph_renew_caps(inode);
> > > +                             ret = ceph_renew_caps(inode, flags);
> > >                               if (ret == 0)
> > >                                       continue;
> > >                       }
> > > @@ -4121,13 +4158,41 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
> > >       dout("flush_dirty_caps done\n");
> > >  }
> > > 
> > > +void __ceph_touch_fmode(struct ceph_inode_info *ci,
> > > +                     struct ceph_mds_client *mdsc, int fmode)
> > > +{
> > > +     int i;
> > > +     int bits = (fmode << 1);
> > > +     unsigned long now = jiffies | 1;
> > > +     for (i = 1; i < CEPH_FILE_MODE_BITS; i++) {
> > > +             if (bits & (1 << i))
> > > +                     ci->i_file_by_mode[i].last_used = now;
> > > +     }
> > > +
> > > +     /* queue periodic check */
> > > +     if (bits && list_empty(&ci->i_cap_delay_list))
> > > +             __cap_delay_requeue(mdsc, ci, true);
> > > +}
> > > +
> > > +void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
> > > +{
> > > +     int i;
> > > +     int bits = (fmode << 1) | 1;
> > > +     spin_lock(&ci->i_ceph_lock);
> > > +     for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
> > > +             if (bits & (1 << i))
> > > +                     ci->i_file_by_mode[i].nr += count;
> > > +     }
> > > +     spin_unlock(&ci->i_ceph_lock);
> > > +}
> > > +
> > >  void __ceph_get_fmode(struct ceph_inode_info *ci, int fmode)
> > >  {
> > >       int i;
> > >       int bits = (fmode << 1) | 1;
> > >       for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
> > >               if (bits & (1 << i))
> > > -                     ci->i_nr_by_mode[i]++;
> > > +                     ci->i_file_by_mode[i].nr++;
> > >       }
> > >  }
> > > 
> > > @@ -4136,26 +4201,18 @@ void __ceph_get_fmode(struct ceph_inode_info *ci, int fmode)
> > >   * we may need to release capabilities to the MDS (or schedule
> > >   * their delayed release).
> > >   */
> > > -void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
> > > +void ceph_put_fmode(struct ceph_inode_info *ci, int fmode, int count)
> > >  {
> > > -     int i, last = 0;
> > > +     int i;
> > >       int bits = (fmode << 1) | 1;
> > >       spin_lock(&ci->i_ceph_lock);
> > >       for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
> > >               if (bits & (1 << i)) {
> > > -                     BUG_ON(ci->i_nr_by_mode[i] == 0);
> > > -                     if (--ci->i_nr_by_mode[i] == 0)
> > > -                             last++;
> > > +                     BUG_ON(ci->i_file_by_mode[i].nr < count);
> > > +                     ci->i_file_by_mode[i].nr -= count;
> > >               }
> > >       }
> > > -     dout("put_fmode %p fmode %d {%d,%d,%d,%d}\n",
> > > -          &ci->vfs_inode, fmode,
> > > -          ci->i_nr_by_mode[0], ci->i_nr_by_mode[1],
> > > -          ci->i_nr_by_mode[2], ci->i_nr_by_mode[3]);
> > >       spin_unlock(&ci->i_ceph_lock);
> > > -
> > > -     if (last && ci->i_vino.snap == CEPH_NOSNAP)
> > > -             ceph_check_caps(ci, 0, NULL);
> > >  }
> > > 
> > >  /*
> > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > > index 7e0190b1f821..f28f420bad23 100644
> > > --- a/fs/ceph/file.c
> > > +++ b/fs/ceph/file.c
> > > @@ -213,7 +213,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
> > >               struct ceph_dir_file_info *dfi =
> > >                       kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
> > >               if (!dfi) {
> > > -                     ceph_put_fmode(ci, fmode); /* clean up */
> > > +                     ceph_put_fmode(ci, fmode, 1); /* clean up */
> > >                       return -ENOMEM;
> > >               }
> > > 
> > > @@ -224,7 +224,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
> > >       } else {
> > >               fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
> > >               if (!fi) {
> > > -                     ceph_put_fmode(ci, fmode); /* clean up */
> > > +                     ceph_put_fmode(ci, fmode, 1); /* clean up */
> > >                       return -ENOMEM;
> > >               }
> > > 
> > > @@ -263,7 +263,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
> > >       case S_IFLNK:
> > >               dout("init_file %p %p 0%o (symlink)\n", inode, file,
> > >                    inode->i_mode);
> > > -             ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
> > > +             ceph_put_fmode(ceph_inode(inode), fmode, 1); /* clean up */
> > >               break;
> > > 
> > >       default:
> > > @@ -273,7 +273,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
> > >                * we need to drop the open ref now, since we don't
> > >                * have .release set to ceph_release.
> > >                */
> > > -             ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
> > > +             ceph_put_fmode(ceph_inode(inode), fmode, 1); /* clean up */
> > >               BUG_ON(inode->i_fop->release == ceph_release);
> > > 
> > >               /* call the proper open fop */
> > > @@ -285,14 +285,15 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
> > >  /*
> > >   * try renew caps after session gets killed.
> > >   */
> > > -int ceph_renew_caps(struct inode *inode)
> > > +int ceph_renew_caps(struct inode *inode, int fmode)
> > >  {
> > > -     struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
> > > +     struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
> > >       struct ceph_inode_info *ci = ceph_inode(inode);
> > >       struct ceph_mds_request *req;
> > >       int err, flags, wanted;
> > > 
> > >       spin_lock(&ci->i_ceph_lock);
> > > +     __ceph_touch_fmode(ci, mdsc, fmode);
> > >       wanted = __ceph_caps_file_wanted(ci);
> > >       if (__ceph_is_any_real_caps(ci) &&
> > >           (!(wanted & CEPH_CAP_ANY_WR) || ci->i_auth_cap)) {
> > > @@ -405,6 +406,7 @@ int ceph_open(struct inode *inode, struct file *file)
> > >       } else if (ceph_snap(inode) != CEPH_NOSNAP &&
> > >                  (ci->i_snap_caps & wanted) == wanted) {
> > >               __ceph_get_fmode(ci, fmode);
> > > +             __ceph_touch_fmode(ci, mdsc, fmode);
> > >               spin_unlock(&ci->i_ceph_lock);
> > >               return ceph_init_file(inode, file, fmode);
> > >       }
> > > @@ -525,7 +527,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
> > >       }
> > >  out_req:
> > >       if (!req->r_err && req->r_target_inode)
> > > -             ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode);
> > > +             ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode, 1);
> > >       ceph_mdsc_put_request(req);
> > >  out_ctx:
> > >       ceph_release_acl_sec_ctx(&as_ctx);
> > > @@ -542,7 +544,7 @@ int ceph_release(struct inode *inode, struct file *file)
> > >               dout("release inode %p dir file %p\n", inode, file);
> > >               WARN_ON(!list_empty(&dfi->file_info.rw_contexts));
> > > 
> > > -             ceph_put_fmode(ci, dfi->file_info.fmode);
> > > +             ceph_put_fmode(ci, dfi->file_info.fmode, 1);
> > > 
> > >               if (dfi->last_readdir)
> > >                       ceph_mdsc_put_request(dfi->last_readdir);
> > > @@ -554,7 +556,8 @@ int ceph_release(struct inode *inode, struct file *file)
> > >               dout("release inode %p regular file %p\n", inode, file);
> > >               WARN_ON(!list_empty(&fi->rw_contexts));
> > > 
> > > -             ceph_put_fmode(ci, fi->fmode);
> > > +             ceph_put_fmode(ci, fi->fmode, 1);
> > > +
> > >               kmem_cache_free(ceph_file_cachep, fi);
> > >       }
> > > 
> > > @@ -1560,7 +1563,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > >               if (dirty)
> > >                       __mark_inode_dirty(inode, dirty);
> > >               if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos))
> > > -                     ceph_check_caps(ci, CHECK_CAPS_NODELAY, NULL);
> > > +                     ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
> > >       }
> > > 
> > >       dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
> > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> > > index 094b8fc37787..b279bd8e168e 100644
> > > --- a/fs/ceph/inode.c
> > > +++ b/fs/ceph/inode.c
> > > @@ -478,8 +478,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
> > >       ci->i_head_snapc = NULL;
> > >       ci->i_snap_caps = 0;
> > > 
> > > -     for (i = 0; i < CEPH_FILE_MODE_BITS; i++)
> > > -             ci->i_nr_by_mode[i] = 0;
> > > +     for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
> > > +             ci->i_file_by_mode[i].nr = 0;
> > > +             ci->i_file_by_mode[i].last_used = 0;
> > > +     }
> > > 
> > >       mutex_init(&ci->i_truncate_mutex);
> > >       ci->i_truncate_seq = 0;
> > > @@ -637,7 +639,7 @@ int ceph_fill_file_size(struct inode *inode, int issued,
> > >                       if ((issued & (CEPH_CAP_FILE_CACHE|
> > >                                      CEPH_CAP_FILE_BUFFER)) ||
> > >                           mapping_mapped(inode->i_mapping) ||
> > > -                         __ceph_caps_file_wanted(ci)) {
> > > +                         __ceph_is_file_opened(ci)) {
> > >                               ci->i_truncate_pending++;
> > >                               queue_trunc = 1;
> > >                       }
> > > @@ -1010,6 +1012,13 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
> > >                       fill_inline = true;
> > >       }
> > > 
> > > +     if (cap_fmode >= 0) {
> > > +             if (!info_caps)
> > > +                     pr_warn("mds issued no caps on %llx.%llx\n",
> > > +                             ceph_vinop(inode));
> > > +             __ceph_touch_fmode(ci, mdsc, cap_fmode);
> > > +     }
> > > +
> > >       spin_unlock(&ci->i_ceph_lock);
> > > 
> > >       if (fill_inline)
> > > diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
> > > index c90f03beb15d..da0ee54ae5bc 100644
> > > --- a/fs/ceph/ioctl.c
> > > +++ b/fs/ceph/ioctl.c
> > > @@ -243,11 +243,13 @@ static long ceph_ioctl_lazyio(struct file *file)
> > >       struct ceph_file_info *fi = file->private_data;
> > >       struct inode *inode = file_inode(file);
> > >       struct ceph_inode_info *ci = ceph_inode(inode);
> > > +     struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
> > > 
> > >       if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
> > >               spin_lock(&ci->i_ceph_lock);
> > >               fi->fmode |= CEPH_FILE_MODE_LAZY;
> > > -             ci->i_nr_by_mode[ffs(CEPH_FILE_MODE_LAZY)]++;
> > > +             ci->i_file_by_mode[ffs(CEPH_FILE_MODE_LAZY)].nr++;
> > > +             __ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_LAZY);
> > >               spin_unlock(&ci->i_ceph_lock);
> > >               dout("ioctl_layzio: file %p marked lazy\n", file);
> > > 
> > > diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> > > index d370f89df358..029823643b8b 100644
> > > --- a/fs/ceph/super.h
> > > +++ b/fs/ceph/super.h
> > > @@ -361,7 +361,10 @@ struct ceph_inode_info {
> > >                                                   dirty|flushing caps */
> > >       unsigned i_snap_caps;           /* cap bits for snapped files */
> > > 
> > > -     int i_nr_by_mode[CEPH_FILE_MODE_BITS];  /* open file counts */
> > > +     struct {
> > > +             int nr;
> > > +             unsigned long last_used;
> > > +     } i_file_by_mode[CEPH_FILE_MODE_BITS];  /* open file counts */
> > > 
> > 
> > Ok, so we're growing ceph_inode_info by 40 bytes here (on 64-bit arch).
> > 
> > That's quite a bit, actually, but it turns out that there are 32 bytes
> > worth of holes in ceph_inode_info now. It'd be good to reorganize the
> > struct for better packing before you do this, so that this at least
> > won't make memory consumption too much worse.
> > 
> > There other ways we could approach this too that would be more space
> > efficient. We don't really need to keep a timestamp for each mode bit.
> > All we're really interested in is what modes were used in the last time
> > interval.
> > 
> > We could keep an active and inactive set of CEPH_FILE_MODE bits (which
> > are just a single byte each), and a timestamp representing the switch
> > between the two.
> > 
> > As we use the file, we'd set bits in the active mask if the timestamp is
> > less than half the time interval old. If it's more than half the
> > interval, copy the active mask to the inactive one and zero out the
> > active mask first.
> > 
> > When you go to check what modes have been used you can do the switch
> > again first if the timestamp is too old. To see what bits were actually
> > used, you just logically or the activ You could couple this with
> > something that requeues e and inactive sets together.
> > 
> > That would take a lot less space per inode.
> 
> The problem is there is no reliable tick for interval.

I handwaved over that part, but you could ensure that it was revisited
in a timely fashion the same way as here. Just call __cap_delay_requeue
at each active/inactive switch event.

You might not end up with a hard timeout for each individual bit, but
I'm not sure that's really required to fix this problem. There may also
be benefits in keeping the timeouts "looser" -- more opportunities to
batch up cap releases when there are spiky, mixed read/write workloads.

> besides, we can
> not have patch 4 if use active bit. patch 4 simplifies code a lot. I
> really like to keep it.  we can just track last use of read/write,
> which uses 16 bytes.  this patch removes i_hold_caps_min. So only need
> 8 more bytes.

Ok, I do like the last two patches in the series a lot.

Don't you also need to timestamp LAZY too if you do it this way? If you
have a mix of lazy and nonlazy clients and the lazy one is holding the
cap w/o using it, that seems like it could be the same problem all over
again.

> > >       struct mutex i_truncate_mutex;
> > >       u32 i_truncate_seq;        /* last truncate to smaller size */
> > > @@ -673,6 +676,10 @@ extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
> > >  extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
> > >  extern int __ceph_caps_used(struct ceph_inode_info *ci);
> > > 
> > > +static inline bool __ceph_is_file_opened(struct ceph_inode_info *ci)
> > > +{
> > > +     return ci->i_file_by_mode[0].nr;
> > > +}
> > >  extern int __ceph_caps_file_wanted(struct ceph_inode_info *ci);
> > >  extern int __ceph_caps_wanted(struct ceph_inode_info *ci);
> > > 
> > > @@ -1074,7 +1081,10 @@ extern int ceph_try_get_caps(struct inode *inode,
> > > 
> > >  /* for counting open files by mode */
> > >  extern void __ceph_get_fmode(struct ceph_inode_info *ci, int mode);
> > > -extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode);
> > > +extern void ceph_get_fmode(struct ceph_inode_info *ci, int mode, int count);
> > > +extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode, int count);
> > > +extern void __ceph_touch_fmode(struct ceph_inode_info *ci,
> > > +                            struct ceph_mds_client *mdsc, int fmode);
> > > 
> > >  /* addr.c */
> > >  extern const struct address_space_operations ceph_aops;
> > > @@ -1086,7 +1096,7 @@ extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);
> > >  /* file.c */
> > >  extern const struct file_operations ceph_file_fops;
> > > 
> > > -extern int ceph_renew_caps(struct inode *inode);
> > > +extern int ceph_renew_caps(struct inode *inode, int fmode);
> > >  extern int ceph_open(struct inode *inode, struct file *file);
> > >  extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
> > >                           struct file *file, unsigned flags, umode_t mode);
> > > diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
> > > index cb21c5cf12c3..8017130a08a1 100644
> > > --- a/include/linux/ceph/ceph_fs.h
> > > +++ b/include/linux/ceph/ceph_fs.h
> > > @@ -564,6 +564,7 @@ struct ceph_filelock {
> > >  #define CEPH_FILE_MODE_RDWR       3  /* RD | WR */
> > >  #define CEPH_FILE_MODE_LAZY       4  /* lazy io */
> > >  #define CEPH_FILE_MODE_BITS       4
> > > +#define CEPH_FILE_MODE_MASK       ((1 << CEPH_FILE_MODE_BITS) - 1)
> > > 
> > >  int ceph_flags_to_mode(int flags);
> > > 
> > 
> > --
> > Jeff Layton <jlayton@xxxxxxxxxx>
> > 

-- 
Jeff Layton <jlayton@xxxxxxxxxx>




[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux