Re: [PATCH] ceph: optimize memory usage

"Chengguang Xu" <cgxu519@xxxxxxx> · Tue, 6 Mar 2018 11:08:20 +0100

> Sent: Tuesday, March 06, 2018 at 4:34 PM
> From: "Yan, Zheng" <ukernel@xxxxxxxxx>
> To: "Chengguang Xu" <cgxu519@xxxxxxx>
> Cc: "Zheng Yan" <zyan@xxxxxxxxxx>, "Ilya Dryomov" <idryomov@xxxxxxxxx>, ceph-devel <ceph-devel@xxxxxxxxxxxxxxx>
> Subject: Re: [PATCH] ceph: optimize memory usage
>
> On Mon, Mar 5, 2018 at 8:17 PM, Chengguang Xu <cgxu519@xxxxxxx> wrote:
> > In current code, regular file and directory use same struct
> > ceph_file_info to store fs specific data so the struct has to
> > include some fields which are only used for directory
> > (e.g., readdir related info), when having plenty of regular files,
> > it will lead to memory waste.
> >
> > This patch introduces dedicated readdir_info cache to separate readdir
> > related things from struct ceph_file_info, so that regular file could
> > not include those unused fields anymore. Also, chagned to manipulate
> > fscache only when reuglar file successfully acquired ceph_file_info.
> >
> > Signed-off-by: Chengguang Xu <cgxu519@xxxxxxx>
> > ---
> >  fs/ceph/dir.c                | 158 +++++++++++++++++++++++--------------------
> >  fs/ceph/file.c               |  88 ++++++++++++++++--------
> >  fs/ceph/super.c              |  14 +++-
> >  fs/ceph/super.h              |  19 ++++--
> >  include/linux/ceph/libceph.h |   1 +
> >  5 files changed, 170 insertions(+), 110 deletions(-)
> >
> > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > index 0c43468..84bc75a 100644
> > --- a/fs/ceph/dir.c
> > +++ b/fs/ceph/dir.c
> > @@ -105,15 +105,17 @@ static int fpos_cmp(loff_t l, loff_t r)
> >  static int note_last_dentry(struct ceph_file_info *fi, const char *name,
> >                             int len, unsigned next_offset)
> >  {
> > +       struct ceph_readdir_info *ri = fi->readdir_info;
> >         char *buf = kmalloc(len+1, GFP_KERNEL);
> >         if (!buf)
> >                 return -ENOMEM;
> > -       kfree(fi->last_name);
> > -       fi->last_name = buf;
> > -       memcpy(fi->last_name, name, len);
> > -       fi->last_name[len] = 0;
> > -       fi->next_offset = next_offset;
> > -       dout("note_last_dentry '%s'\n", fi->last_name);
> > +
> > +       kfree(ri->last_name);
> > +       ri->last_name = buf;
> > +       memcpy(ri->last_name, name, len);
> > +       ri->last_name[len] = 0;
> > +       ri->next_offset = next_offset;
> > +       dout("note_last_dentry '%s'\n", ri->last_name);
> >         return 0;
> >  }
> >
> > @@ -176,6 +178,7 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
> >                             int shared_gen)
> >  {
> >         struct ceph_file_info *fi = file->private_data;
> > +       struct ceph_readdir_info *ri = fi->readdir_info;
> >         struct dentry *parent = file->f_path.dentry;
> >         struct inode *dir = d_inode(parent);
> >         struct dentry *dentry, *last = NULL;
> > @@ -279,9 +282,9 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
> >                         err = ret;
> >                 dput(last);
> >                 /* last_name no longer match cache index */
> > -               if (fi->readdir_cache_idx >= 0) {
> > -                       fi->readdir_cache_idx = -1;
> > -                       fi->dir_release_count = 0;
> > +               if (ri->readdir_cache_idx >= 0) {
> > +                       ri->readdir_cache_idx = -1;
> > +                       ri->dir_release_count = 0;
> >                 }
> >         }
> >         return err;
> > @@ -289,17 +292,20 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
> >
> >  static bool need_send_readdir(struct ceph_file_info *fi, loff_t pos)
> >  {
> > -       if (!fi->last_readdir)
> > +       struct ceph_readdir_info *ri = fi->readdir_info;
> > +
> > +       if (!ri->last_readdir)
> >                 return true;
> >         if (is_hash_order(pos))
> > -               return !ceph_frag_contains_value(fi->frag, fpos_hash(pos));
> > +               return !ceph_frag_contains_value(ri->frag, fpos_hash(pos));
> >         else
> > -               return fi->frag != fpos_frag(pos);
> > +               return ri->frag != fpos_frag(pos);
> >  }
> >
> >  static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >  {
> >         struct ceph_file_info *fi = file->private_data;
> > +       struct ceph_readdir_info *ri = fi->readdir_info;
> >         struct inode *inode = file_inode(file);
> >         struct ceph_inode_info *ci = ceph_inode(inode);
> >         struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
> > @@ -357,9 +363,9 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >                         CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
> >
> >                 /* discard old result, if any */
> > -               if (fi->last_readdir) {
> > -                       ceph_mdsc_put_request(fi->last_readdir);
> > -                       fi->last_readdir = NULL;
> > +               if (ri->last_readdir) {
> > +                       ceph_mdsc_put_request(ri->last_readdir);
> > +                       ri->last_readdir = NULL;
> >                 }
> >
> >                 if (is_hash_order(ctx->pos)) {
> > @@ -373,7 +379,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >                 }
> >
> >                 dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
> > -                    ceph_vinop(inode), frag, fi->last_name);
> > +                    ceph_vinop(inode), frag, ri->last_name);
> >                 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
> >                 if (IS_ERR(req))
> >                         return PTR_ERR(req);
> > @@ -389,8 +395,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >                         __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
> >                         req->r_inode_drop = CEPH_CAP_FILE_EXCL;
> >                 }
> > -               if (fi->last_name) {
> > -                       req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL);
> > +               if (ri->last_name) {
> > +                       req->r_path2 = kstrdup(ri->last_name, GFP_KERNEL);
> >                         if (!req->r_path2) {
> >                                 ceph_mdsc_put_request(req);
> >                                 return -ENOMEM;
> > @@ -400,10 +406,10 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >                                 cpu_to_le32(fpos_hash(ctx->pos));
> >                 }
> >
> > -               req->r_dir_release_cnt = fi->dir_release_count;
> > -               req->r_dir_ordered_cnt = fi->dir_ordered_count;
> > -               req->r_readdir_cache_idx = fi->readdir_cache_idx;
> > -               req->r_readdir_offset = fi->next_offset;
> > +               req->r_dir_release_cnt = ri->dir_release_count;
> > +               req->r_dir_ordered_cnt = ri->dir_ordered_count;
> > +               req->r_readdir_cache_idx = ri->readdir_cache_idx;
> > +               req->r_readdir_offset = ri->next_offset;
> >                 req->r_args.readdir.frag = cpu_to_le32(frag);
> >                 req->r_args.readdir.flags =
> >                                 cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
> > @@ -427,35 +433,35 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >                 if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
> >                         frag = le32_to_cpu(rinfo->dir_dir->frag);
> >                         if (!rinfo->hash_order) {
> > -                               fi->next_offset = req->r_readdir_offset;
> > +                               ri->next_offset = req->r_readdir_offset;
> >                                 /* adjust ctx->pos to beginning of frag */
> >                                 ctx->pos = ceph_make_fpos(frag,
> > -                                                         fi->next_offset,
> > +                                                         ri->next_offset,
> >                                                           false);
> >                         }
> >                 }
> >
> > -               fi->frag = frag;
> > -               fi->last_readdir = req;
> > +               ri->frag = frag;
> > +               ri->last_readdir = req;
> >
> >                 if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) {
> > -                       fi->readdir_cache_idx = req->r_readdir_cache_idx;
> > -                       if (fi->readdir_cache_idx < 0) {
> > +                       ri->readdir_cache_idx = req->r_readdir_cache_idx;
> > +                       if (ri->readdir_cache_idx < 0) {
> >                                 /* preclude from marking dir ordered */
> > -                               fi->dir_ordered_count = 0;
> > +                               ri->dir_ordered_count = 0;
> >                         } else if (ceph_frag_is_leftmost(frag) &&
> > -                                  fi->next_offset == 2) {
> > +                                  ri->next_offset == 2) {
> >                                 /* note dir version at start of readdir so
> >                                  * we can tell if any dentries get dropped */
> > -                               fi->dir_release_count = req->r_dir_release_cnt;
> > -                               fi->dir_ordered_count = req->r_dir_ordered_cnt;
> > +                               ri->dir_release_count = req->r_dir_release_cnt;
> > +                               ri->dir_ordered_count = req->r_dir_ordered_cnt;
> >                         }
> >                 } else {
> >                         dout("readdir !did_prepopulate");
> >                         /* disable readdir cache */
> > -                       fi->readdir_cache_idx = -1;
> > +                       ri->readdir_cache_idx = -1;
> >                         /* preclude from marking dir complete */
> > -                       fi->dir_release_count = 0;
> > +                       ri->dir_release_count = 0;
> >                 }
> >
> >                 /* note next offset and last dentry name */
> > @@ -469,14 +475,14 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >                         if (err)
> >                                 return err;
> >                 } else if (req->r_reply_info.dir_end) {
> > -                       fi->next_offset = 2;
> > +                       ri->next_offset = 2;
> >                         /* keep last name */
> >                 }
> >         }
> >
> > -       rinfo = &fi->last_readdir->r_reply_info;
> > +       rinfo = &ri->last_readdir->r_reply_info;
> >         dout("readdir frag %x num %d pos %llx chunk first %llx\n",
> > -            fi->frag, rinfo->dir_nr, ctx->pos,
> > +            ri->frag, rinfo->dir_nr, ctx->pos,
> >              rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL);
> >
> >         i = 0;
> > @@ -520,27 +526,27 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >                 ctx->pos++;
> >         }
> >
> > -       ceph_mdsc_put_request(fi->last_readdir);
> > -       fi->last_readdir = NULL;
> > +       ceph_mdsc_put_request(ri->last_readdir);
> > +       ri->last_readdir = NULL;
> >
> > -       if (fi->next_offset > 2) {
> > -               frag = fi->frag;
> > +       if (ri->next_offset > 2) {
> > +               frag = ri->frag;
> >                 goto more;
> >         }
> >
> >         /* more frags? */
> > -       if (!ceph_frag_is_rightmost(fi->frag)) {
> > -               frag = ceph_frag_next(fi->frag);
> > +       if (!ceph_frag_is_rightmost(ri->frag)) {
> > +               frag = ceph_frag_next(ri->frag);
> >                 if (is_hash_order(ctx->pos)) {
> >                         loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
> > -                                                       fi->next_offset, true);
> > +                                                       ri->next_offset, true);
> >                         if (new_pos > ctx->pos)
> >                                 ctx->pos = new_pos;
> >                         /* keep last_name */
> >                 } else {
> > -                       ctx->pos = ceph_make_fpos(frag, fi->next_offset, false);
> > -                       kfree(fi->last_name);
> > -                       fi->last_name = NULL;
> > +                       ctx->pos = ceph_make_fpos(frag, ri->next_offset, false);
> > +                       kfree(ri->last_name);
> > +                       ri->last_name = NULL;
> >                 }
> >                 dout("readdir next frag is %x\n", frag);
> >                 goto more;
> > @@ -552,20 +558,20 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >          * were released during the whole readdir, and we should have
> >          * the complete dir contents in our cache.
> >          */
> > -       if (atomic64_read(&ci->i_release_count) == fi->dir_release_count) {
> > +       if (atomic64_read(&ci->i_release_count) == ri->dir_release_count) {
> >                 spin_lock(&ci->i_ceph_lock);
> > -               if (fi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) {
> > +               if (ri->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) {
> >                         dout(" marking %p complete and ordered\n", inode);
> >                         /* use i_size to track number of entries in
> >                          * readdir cache */
> > -                       BUG_ON(fi->readdir_cache_idx < 0);
> > -                       i_size_write(inode, fi->readdir_cache_idx *
> > +                       BUG_ON(ri->readdir_cache_idx < 0);
> > +                       i_size_write(inode, ri->readdir_cache_idx *
> >                                      sizeof(struct dentry*));
> >                 } else {
> >                         dout(" marking %p complete\n", inode);
> >                 }
> > -               __ceph_dir_set_complete(ci, fi->dir_release_count,
> > -                                       fi->dir_ordered_count);
> > +               __ceph_dir_set_complete(ci, ri->dir_release_count,
> > +                                       ri->dir_ordered_count);
> >                 spin_unlock(&ci->i_ceph_lock);
> >         }
> >
> > @@ -575,15 +581,17 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
> >
> >  static void reset_readdir(struct ceph_file_info *fi)
> >  {
> > -       if (fi->last_readdir) {
> > -               ceph_mdsc_put_request(fi->last_readdir);
> > -               fi->last_readdir = NULL;
> > +       struct ceph_readdir_info *ri = fi->readdir_info;
> > +
> > +       if (ri->last_readdir) {
> > +               ceph_mdsc_put_request(ri->last_readdir);
> > +               ri->last_readdir = NULL;
> >         }
> > -       kfree(fi->last_name);
> > -       fi->last_name = NULL;
> > -       fi->dir_release_count = 0;
> > -       fi->readdir_cache_idx = -1;
> > -       fi->next_offset = 2;  /* compensate for . and .. */
> > +       kfree(ri->last_name);
> > +       ri->last_name = NULL;
> > +       ri->dir_release_count = 0;
> > +       ri->readdir_cache_idx = -1;
> > +       ri->next_offset = 2;  /* compensate for . and .. */
> >         fi->flags &= ~CEPH_F_ATEND;
> >  }
> >
> > @@ -594,16 +602,18 @@ static void reset_readdir(struct ceph_file_info *fi)
> >  static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
> >  {
> >         struct ceph_mds_reply_info_parsed *rinfo;
> > +       struct ceph_readdir_info *ri = fi->readdir_info;
> >         loff_t chunk_offset;
> > +
> >         if (new_pos == 0)
> >                 return true;
> >         if (is_hash_order(new_pos)) {
> >                 /* no need to reset last_name for a forward seek when
> >                  * dentries are sotred in hash order */
> > -       } else if (fi->frag != fpos_frag(new_pos)) {
> > +       } else if (ri->frag != fpos_frag(new_pos)) {
> >                 return true;
> >         }
> > -       rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL;
> > +       rinfo = ri->last_readdir ? &ri->last_readdir->r_reply_info : NULL;
> >         if (!rinfo || !rinfo->dir_nr)
> >                 return true;
> >         chunk_offset = rinfo->dir_entries[0].offset;
> > @@ -614,6 +624,7 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
> >  static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
> >  {
> >         struct ceph_file_info *fi = file->private_data;
> > +       struct ceph_readdir_info *ri = fi->readdir_info;
> >         struct inode *inode = file->f_mapping->host;
> >         loff_t retval;
> >
> > @@ -637,8 +648,8 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
> >                 } else if (is_hash_order(offset) && offset > file->f_pos) {
> >                         /* for hash offset, we don't know if a forward seek
> >                          * is within same frag */
> > -                       fi->dir_release_count = 0;
> > -                       fi->readdir_cache_idx = -1;
> > +                       ri->dir_release_count = 0;
> > +                       ri->readdir_cache_idx = -1;
> >                 }
> >
> >                 if (offset != file->f_pos) {
> > @@ -1371,6 +1382,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
> >                              loff_t *ppos)
> >  {
> >         struct ceph_file_info *cf = file->private_data;
> > +       struct ceph_readdir_info *ri = cf->readdir_info;
> >         struct inode *inode = file_inode(file);
> >         struct ceph_inode_info *ci = ceph_inode(inode);
> >         int left;
> > @@ -1379,12 +1391,12 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
> >         if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
> >                 return -EISDIR;
> >
> > -       if (!cf->dir_info) {
> > -               cf->dir_info = kmalloc(bufsize, GFP_KERNEL);
> > -               if (!cf->dir_info)
> > +       if (!ri->dir_info) {
> > +               ri->dir_info = kmalloc(bufsize, GFP_KERNEL);
> > +               if (!ri->dir_info)
> >                         return -ENOMEM;
> > -               cf->dir_info_len =
> > -                       snprintf(cf->dir_info, bufsize,
> > +               ri->dir_info_len =
> > +                       snprintf(ri->dir_info, bufsize,
> >                                 "entries:   %20lld\n"
> >                                 " files:    %20lld\n"
> >                                 " subdirs:  %20lld\n"
> > @@ -1404,10 +1416,10 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
> >                                 (long)ci->i_rctime.tv_nsec);
> >         }
> >
> > -       if (*ppos >= cf->dir_info_len)
> > +       if (*ppos >= ri->dir_info_len)
> >                 return 0;
> > -       size = min_t(unsigned, size, cf->dir_info_len-*ppos);
> > -       left = copy_to_user(buf, cf->dir_info + *ppos, size);
> > +       size = min_t(unsigned int, size, ri->dir_info_len-*ppos);
> > +       left = copy_to_user(buf, ri->dir_info + *ppos, size);
> >         if (left == size)
> >                 return -EFAULT;
> >         *ppos += (size - left);
> > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > index 6639926..611aaef 100644
> > --- a/fs/ceph/file.c
> > +++ b/fs/ceph/file.c
> > @@ -159,36 +159,60 @@ static size_t dio_get_pagev_size(const struct iov_iter *it)
> >         return req;
> >  }
> >
> > +
> > +static int ceph_init_file_info(struct inode *inode,
> > +                       struct file *file, int fmode)
> > +{
> > +       struct ceph_file_info *fi;
> > +
> > +       dout("init_file inode %p file %p mode 0%o (%s)\n", inode, file,
> > +               inode->i_mode, S_ISREG(inode->i_mode) ? "regular" : "dir");
> > +
> > +       BUG_ON(inode->i_fop->release != ceph_release);
> > +
> > +       fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
> > +       if (!fi) {
> > +               ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
> > +               return -ENOMEM;
> > +       }
> > +
> > +       if (S_ISDIR(inode->i_mode)) {
> > +               fi->readdir_info = kmem_cache_zalloc(ceph_readdir_cachep,
> > +                                                       GFP_KERNEL);
> > +               if (!fi->readdir_info) {
> > +                       kmem_cache_free(ceph_file_cachep, fi);
> > +                       return -ENOMEM;
> > +               }
> > +
> > +               fi->readdir_info->next_offset = 2;
> > +               fi->readdir_info->readdir_cache_idx = -1;
> > +       } else {
> > +               ceph_fscache_register_inode_cookie(inode);
> > +               ceph_fscache_file_set_cookie(inode, file);
> > +       }
> > +
> > +       fi->fmode = fmode;
> > +       spin_lock_init(&fi->rw_contexts_lock);
> > +       INIT_LIST_HEAD(&fi->rw_contexts);
> > +       file->private_data = fi;
> > +
> > +       return 0;
> > +}
> > +
> >  /*
> >   * initialize private struct file data.
> >   * if we fail, clean up by dropping fmode reference on the ceph_inode
> >   */
> >  static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
> >  {
> > -       struct ceph_file_info *cf;
> >         int ret = 0;
> >
> >         switch (inode->i_mode & S_IFMT) {
> >         case S_IFREG:
> > -               ceph_fscache_register_inode_cookie(inode);
> > -               ceph_fscache_file_set_cookie(inode, file);
> >         case S_IFDIR:
> > -               dout("init_file %p %p 0%o (regular)\n", inode, file,
> > -                    inode->i_mode);
> > -               cf = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
> > -               if (!cf) {
> > -                       ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
> > -                       return -ENOMEM;
> > -               }
> > -               cf->fmode = fmode;
> > -
> > -               spin_lock_init(&cf->rw_contexts_lock);
> > -               INIT_LIST_HEAD(&cf->rw_contexts);
> > -
> > -               cf->next_offset = 2;
> > -               cf->readdir_cache_idx = -1;
> > -               file->private_data = cf;
> > -               BUG_ON(inode->i_fop->release != ceph_release);
> > +               ret = ceph_init_file_info(inode, file, fmode);
> > +               if (ret)
> > +                       return ret;
> >                 break;
> >
> >         case S_IFLNK:
> > @@ -460,16 +484,24 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
> >  int ceph_release(struct inode *inode, struct file *file)
> >  {
> >         struct ceph_inode_info *ci = ceph_inode(inode);
> > -       struct ceph_file_info *cf = file->private_data;
> > +       struct ceph_file_info *fi = file->private_data;
> > +       struct ceph_readdir_info *ri = fi->readdir_info;
> > +
> > +       dout("release inode %p file %p mode 0%o (%s)\n", inode, file,
> > +               inode->i_mode, S_ISREG(inode->i_mode) ? "regular" : "dir");
> >
> > -       dout("release inode %p file %p\n", inode, file);
> > -       ceph_put_fmode(ci, cf->fmode);
> > -       if (cf->last_readdir)
> > -               ceph_mdsc_put_request(cf->last_readdir);
> > -       kfree(cf->last_name);
> > -       kfree(cf->dir_info);
> > -       WARN_ON(!list_empty(&cf->rw_contexts));
> > -       kmem_cache_free(ceph_file_cachep, cf);
> > +       WARN_ON(!list_empty(&fi->rw_contexts));
> > +
> > +       ceph_put_fmode(ci, fi->fmode);
> > +       if (ri) {
> > +               if (ri->last_readdir)
> > +                       ceph_mdsc_put_request(ri->last_readdir);
> > +               kfree(ri->last_name);
> > +               kfree(ri->dir_info);
> > +               kmem_cache_free(ceph_readdir_cachep, ri);
> > +               ri = NULL;
> > +       }
> > +       kmem_cache_free(ceph_file_cachep, fi);
> >
> >         /* wake up anyone waiting for caps on this inode */
> >         wake_up_all(&ci->i_cap_wq);
> > diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> > index a62d2a9..c1732ff 100644
> > --- a/fs/ceph/super.c
> > +++ b/fs/ceph/super.c
> > @@ -677,6 +677,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
> >  struct kmem_cache *ceph_cap_flush_cachep;
> >  struct kmem_cache *ceph_dentry_cachep;
> >  struct kmem_cache *ceph_file_cachep;
> > +struct kmem_cache *ceph_readdir_cachep;
> >
> >  static void ceph_inode_init_once(void *foo)
> >  {
> > @@ -711,14 +712,22 @@ static int __init init_caches(void)
> >                 goto bad_dentry;
> >
> >         ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD);
> > -
> >         if (!ceph_file_cachep)
> >                 goto bad_file;
> >
> > +       ceph_readdir_cachep = KMEM_CACHE(ceph_readdir_info, SLAB_MEM_SPREAD);
> > +       if (!ceph_readdir_cachep)
> > +               goto bad_readdir;
> > +
> >         if ((error = ceph_fscache_register()))
> > -               goto bad_file;
> > +               goto bad_fscache;
> >
> >         return 0;
> > +
> > +bad_fscache:
> > +       kmem_cache_destroy(ceph_readdir_cachep);
> > +bad_readdir:
> > +       kmem_cache_destroy(ceph_file_cachep);
> >  bad_file:
> >         kmem_cache_destroy(ceph_dentry_cachep);
> >  bad_dentry:
> > @@ -743,6 +752,7 @@ static void destroy_caches(void)
> >         kmem_cache_destroy(ceph_cap_flush_cachep);
> >         kmem_cache_destroy(ceph_dentry_cachep);
> >         kmem_cache_destroy(ceph_file_cachep);
> > +       kmem_cache_destroy(ceph_readdir_cachep);
> >
> >         ceph_fscache_unregister();
> >  }
> > diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> > index 21b2e5b..3a943a0 100644
> > --- a/fs/ceph/super.h
> > +++ b/fs/ceph/super.h
> > @@ -665,13 +665,7 @@ extern void ceph_reservation_status(struct ceph_fs_client *client,
> >  #define CEPH_F_SYNC     1
> >  #define CEPH_F_ATEND    2
> >
> > -struct ceph_file_info {
> > -       short fmode;     /* initialized on open */
> > -       short flags;     /* CEPH_F_* */
> > -
> > -       spinlock_t rw_contexts_lock;
> > -       struct list_head rw_contexts;
> > -
> > +struct ceph_readdir_info {
> >         /* readdir: position within the dir */
> >         u32 frag;
> >         struct ceph_mds_request *last_readdir;
> > @@ -688,6 +682,17 @@ struct ceph_file_info {
> >         int dir_info_len;
> >  };
> >
> > +struct ceph_file_info {
> > +       short fmode;     /* initialized on open */
> > +       short flags;     /* CEPH_F_* */
> > +
> > +       spinlock_t rw_contexts_lock;
> > +       struct list_head rw_contexts;
> > +
> > +       /* readdir related things */
> > +       struct ceph_readdir_info *readdir_info;
> > +};
> 
> 
> For dir file. I prefer something like:
> 
> struct ceph_dir_file_info {
>    struct ceph_file_info file;
>    ceph_readdir_info readdir.
> };
> 
> This avoids double memory allocation for dir file.

That makes sense. The only reason I keep ceph_file_info as
common interface of regular file and dir is for voiding 
detection of file type in many calling places. But if you
think it's worth, I can modify like that.

Thanks,
Chengguang.

> > +
> >  struct ceph_rw_context {
> >         struct list_head list;
> >         struct task_struct *thread;
> > diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
> > index c2ec44c..4ca23fa 100644
> > --- a/include/linux/ceph/libceph.h
> > +++ b/include/linux/ceph/libceph.h
> > @@ -262,6 +262,7 @@ static inline int calc_pages_for(u64 off, u64 len)
> >  extern struct kmem_cache *ceph_cap_flush_cachep;
> >  extern struct kmem_cache *ceph_dentry_cachep;
> >  extern struct kmem_cache *ceph_file_cachep;
> > +extern struct kmem_cache *ceph_readdir_cachep;
> >
> >  /* ceph_common.c */
> >  extern bool libceph_compatible(void *data);
> > --
> > 1.8.3.1
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> > the body of a message to majordomo@xxxxxxxxxxxxxxx
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html