Re: [PATCH v2] ceph: optimize memory usage

"Yan, Zheng" <ukernel@xxxxxxxxx> · Mon, 12 Mar 2018 18:59:29 +0800



On Mon, Mar 12, 2018 at 6:54 PM, Chengguang Xu <cgxu519@xxxxxxx> wrote:
>
>
>> Sent: Monday, March 12, 2018 at 3:29 PM
>> From: "Yan, Zheng" <ukernel@xxxxxxxxx>
>> To: "Chengguang Xu" <cgxu519@xxxxxxx>
>> Cc: "Zheng Yan" <zyan@xxxxxxxxxx>, "Ilya Dryomov" <idryomov@xxxxxxxxx>, ceph-devel <ceph-devel@xxxxxxxxxxxxxxx>
>> Subject: Re: [PATCH v2] ceph: optimize memory usage
>>
>> On Fri, Mar 9, 2018 at 11:41 AM, Chengguang Xu <cgxu519@xxxxxxx> wrote:
>> > In current code, regular file and directory use same struct
>> > ceph_file_info to store fs specific data so the struct has to
>> > include some fields which are only used for directory
>> > (e.g., readdir related info), when having plenty of regular files,
>> > it will lead to memory waste.
>> >
>> > This patch introduces dedicated ceph_dir_file_info cache for
>> > directory and delete readdir related thins from ceph_file_info,
>> > so that regular file does not include those unused fields anymore.
>> > Also, chagned to manipulate fscache after reuglar file acquires
>> > ceph_file_info successfully.
>> >
>> > Signed-off-by: Chengguang Xu <cgxu519@xxxxxxx>
>> > ---
>> > Changes since v1:
>> > - Modify ceph_dir_file_info to include ceph_file_info instead of pointing to it.
>> >
>> >  fs/ceph/addr.c               |  12 ++-
>> >  fs/ceph/dir.c                | 180 ++++++++++++++++++++++---------------------
>> >  fs/ceph/file.c               |  99 +++++++++++++++++-------
>> >  fs/ceph/super.c              |   8 ++
>> >  fs/ceph/super.h              |   5 ++
>> >  include/linux/ceph/libceph.h |   1 +
>> >  6 files changed, 187 insertions(+), 118 deletions(-)
>> >
>> > diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
>> > index b4336b4..c14f91e 100644
>> > --- a/fs/ceph/addr.c
>> > +++ b/fs/ceph/addr.c
>> > @@ -438,7 +438,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
>> >  {
>> >         struct inode *inode = file_inode(file);
>> >         struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
>> > -       struct ceph_file_info *ci = file->private_data;
>> > +       struct ceph_file_info *fi;
>> > +       struct ceph_dir_file_info *dfi;
>> >         struct ceph_rw_context *rw_ctx;
>> >         int rc = 0;
>> >         int max = 0;
>> > @@ -452,7 +453,14 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
>> >         if (rc == 0)
>> >                 goto out;
>> >
>> > -       rw_ctx = ceph_find_rw_context(ci);
>> > +       if (S_ISDIR(inode->i_mode)) {
>> > +               dfi = file->private_data;
>> > +               fi = &dfi->file_info;
>> > +       } else {
>> > +               fi = file->private_data;
>> > +       }
>>
>> di and dfi are dereferencing the same address. I think we can avoid
>> introducing dfi in most cases.
>
> Yeah, that will be fine.
>
> By the way, should I use variable name 'fi' to point to struct ceph_dir_file_info in dir.c?
> I think it can make the patch shorter but I'm not sure if it brings additional confusion to
> code reader. What do you think?

please use 'dfi' for only for readdir related code

>
>>
>> > +
>> > +       rw_ctx = ceph_find_rw_context(fi);
>> >         max = fsc->mount_options->rsize >> PAGE_SHIFT;
>> >         dout("readpages %p file %p ctx %p nr_pages %d max %d\n",
>> >              inode, file, rw_ctx, nr_pages, max);
>> > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
>> > index f1d9c6c..f422c7b 100644
>> > --- a/fs/ceph/dir.c
>> > +++ b/fs/ceph/dir.c
>> > @@ -102,18 +102,18 @@ static int fpos_cmp(loff_t l, loff_t r)
>> >   * regardless of what dir changes take place on the
>> >   * server.
>> >   */
>> > -static int note_last_dentry(struct ceph_file_info *fi, const char *name,
>> > +static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name,
>> >                             int len, unsigned next_offset)
>> >  {
>> >         char *buf = kmalloc(len+1, GFP_KERNEL);
>> >         if (!buf)
>> >                 return -ENOMEM;
>> > -       kfree(fi->last_name);
>> > -       fi->last_name = buf;
>> > -       memcpy(fi->last_name, name, len);
>> > -       fi->last_name[len] = 0;
>> > -       fi->next_offset = next_offset;
>> > -       dout("note_last_dentry '%s'\n", fi->last_name);
>> > +       kfree(dfi->last_name);
>> > +       dfi->last_name = buf;
>> > +       memcpy(dfi->last_name, name, len);
>> > +       dfi->last_name[len] = 0;
>> > +       dfi->next_offset = next_offset;
>> > +       dout("%s: '%s'\n", __func__, dfi->last_name);
>> >         return 0;
>> >  }
>> >
>> > @@ -175,7 +175,8 @@ static int note_last_dentry(struct ceph_file_info *fi, const char *name,
>> >  static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
>> >                             int shared_gen)
>> >  {
>> > -       struct ceph_file_info *fi = file->private_data;
>> > +       struct ceph_dir_file_info *dfi = file->private_data;
>> > +       struct ceph_file_info *fi = &dfi->file_info;
>> >         struct dentry *parent = file->f_path.dentry;
>> >         struct inode *dir = d_inode(parent);
>> >         struct dentry *dentry, *last = NULL;
>> > @@ -273,33 +274,34 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
>> >         if (last) {
>> >                 int ret;
>> >                 di = ceph_dentry(last);
>> > -               ret = note_last_dentry(fi, last->d_name.name, last->d_name.len,
>> > +               ret = note_last_dentry(dfi, last->d_name.name, last->d_name.len,
>> >                                        fpos_off(di->offset) + 1);
>> >                 if (ret < 0)
>> >                         err = ret;
>> >                 dput(last);
>> >                 /* last_name no longer match cache index */
>> > -               if (fi->readdir_cache_idx >= 0) {
>> > -                       fi->readdir_cache_idx = -1;
>> > -                       fi->dir_release_count = 0;
>> > +               if (dfi->readdir_cache_idx >= 0) {
>> > +                       dfi->readdir_cache_idx = -1;
>> > +                       dfi->dir_release_count = 0;
>> >                 }
>> >         }
>> >         return err;
>> >  }
>> >
>> > -static bool need_send_readdir(struct ceph_file_info *fi, loff_t pos)
>> > +static bool need_send_readdir(struct ceph_dir_file_info *dfi, loff_t pos)
>> >  {
>> > -       if (!fi->last_readdir)
>> > +       if (!dfi->last_readdir)
>> >                 return true;
>> >         if (is_hash_order(pos))
>> > -               return !ceph_frag_contains_value(fi->frag, fpos_hash(pos));
>> > +               return !ceph_frag_contains_value(dfi->frag, fpos_hash(pos));
>> >         else
>> > -               return fi->frag != fpos_frag(pos);
>> > +               return dfi->frag != fpos_frag(pos);
>> >  }
>> >
>> >  static int ceph_readdir(struct file *file, struct dir_context *ctx)
>> >  {
>> > -       struct ceph_file_info *fi = file->private_data;
>> > +       struct ceph_dir_file_info *dfi = file->private_data;
>> > +       struct ceph_file_info *fi = &dfi->file_info;
>> >         struct inode *inode = file_inode(file);
>> >         struct ceph_inode_info *ci = ceph_inode(inode);
>> >         struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
>> > @@ -351,15 +353,15 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>> >         /* proceed with a normal readdir */
>> >  more:
>> >         /* do we have the correct frag content buffered? */
>> > -       if (need_send_readdir(fi, ctx->pos)) {
>> > +       if (need_send_readdir(dfi, ctx->pos)) {
>> >                 struct ceph_mds_request *req;
>> >                 int op = ceph_snap(inode) == CEPH_SNAPDIR ?
>> >                         CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
>> >
>> >                 /* discard old result, if any */
>> > -               if (fi->last_readdir) {
>> > -                       ceph_mdsc_put_request(fi->last_readdir);
>> > -                       fi->last_readdir = NULL;
>> > +               if (dfi->last_readdir) {
>> > +                       ceph_mdsc_put_request(dfi->last_readdir);
>> > +                       dfi->last_readdir = NULL;
>> >                 }
>> >
>> >                 if (is_hash_order(ctx->pos)) {
>> > @@ -373,7 +375,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>> >                 }
>> >
>> >                 dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
>> > -                    ceph_vinop(inode), frag, fi->last_name);
>> > +                    ceph_vinop(inode), frag, dfi->last_name);
>> >                 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
>> >                 if (IS_ERR(req))
>> >                         return PTR_ERR(req);
>> > @@ -389,8 +391,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>> >                         __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
>> >                         req->r_inode_drop = CEPH_CAP_FILE_EXCL;
>> >                 }
>> > -               if (fi->last_name) {
>> > -                       req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL);
>> > +               if (dfi->last_name) {
>> > +                       req->r_path2 = kstrdup(dfi->last_name, GFP_KERNEL);
>> >                         if (!req->r_path2) {
>> >                                 ceph_mdsc_put_request(req);
>> >                                 return -ENOMEM;
>> > @@ -400,10 +402,10 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>> >                                 cpu_to_le32(fpos_hash(ctx->pos));
>> >                 }
>> >
>> > -               req->r_dir_release_cnt = fi->dir_release_count;
>> > -               req->r_dir_ordered_cnt = fi->dir_ordered_count;
>> > -               req->r_readdir_cache_idx = fi->readdir_cache_idx;
>> > -               req->r_readdir_offset = fi->next_offset;
>> > +               req->r_dir_release_cnt = dfi->dir_release_count;
>> > +               req->r_dir_ordered_cnt = dfi->dir_ordered_count;
>> > +               req->r_readdir_cache_idx = dfi->readdir_cache_idx;
>> > +               req->r_readdir_offset = dfi->next_offset;
>> >                 req->r_args.readdir.frag = cpu_to_le32(frag);
>> >                 req->r_args.readdir.flags =
>> >                                 cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
>> > @@ -427,35 +429,35 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>> >                 if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
>> >                         frag = le32_to_cpu(rinfo->dir_dir->frag);
>> >                         if (!rinfo->hash_order) {
>> > -                               fi->next_offset = req->r_readdir_offset;
>> > +                               dfi->next_offset = req->r_readdir_offset;
>> >                                 /* adjust ctx->pos to beginning of frag */
>> >                                 ctx->pos = ceph_make_fpos(frag,
>> > -                                                         fi->next_offset,
>> > +                                                         dfi->next_offset,
>> >                                                           false);
>> >                         }
>> >                 }
>> >
>> > -               fi->frag = frag;
>> > -               fi->last_readdir = req;
>> > +               dfi->frag = frag;
>> > +               dfi->last_readdir = req;
>> >
>> >                 if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) {
>> > -                       fi->readdir_cache_idx = req->r_readdir_cache_idx;
>> > -                       if (fi->readdir_cache_idx < 0) {
>> > +                       dfi->readdir_cache_idx = req->r_readdir_cache_idx;
>> > +                       if (dfi->readdir_cache_idx < 0) {
>> >                                 /* preclude from marking dir ordered */
>> > -                               fi->dir_ordered_count = 0;
>> > +                               dfi->dir_ordered_count = 0;
>> >                         } else if (ceph_frag_is_leftmost(frag) &&
>> > -                                  fi->next_offset == 2) {
>> > +                                  dfi->next_offset == 2) {
>> >                                 /* note dir version at start of readdir so
>> >                                  * we can tell if any dentries get dropped */
>> > -                               fi->dir_release_count = req->r_dir_release_cnt;
>> > -                               fi->dir_ordered_count = req->r_dir_ordered_cnt;
>> > +                               dfi->dir_release_count = req->r_dir_release_cnt;
>> > +                               dfi->dir_ordered_count = req->r_dir_ordered_cnt;
>> >                         }
>> >                 } else {
>> >                         dout("readdir !did_prepopulate");
>> >                         /* disable readdir cache */
>> > -                       fi->readdir_cache_idx = -1;
>> > +                       dfi->readdir_cache_idx = -1;
>> >                         /* preclude from marking dir complete */
>> > -                       fi->dir_release_count = 0;
>> > +                       dfi->dir_release_count = 0;
>> >                 }
>> >
>> >                 /* note next offset and last dentry name */
>> > @@ -464,19 +466,19 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>> >                                         rinfo->dir_entries + (rinfo->dir_nr-1);
>> >                         unsigned next_offset = req->r_reply_info.dir_end ?
>> >                                         2 : (fpos_off(rde->offset) + 1);
>> > -                       err = note_last_dentry(fi, rde->name, rde->name_len,
>> > +                       err = note_last_dentry(dfi, rde->name, rde->name_len,
>> >                                                next_offset);
>> >                         if (err)
>> >                                 return err;
>> >                 } else if (req->r_reply_info.dir_end) {
>> > -                       fi->next_offset = 2;
>> > +                       dfi->next_offset = 2;
>> >                         /* keep last name */
>> >                 }
>> >         }
>> >
>> > -       rinfo = &fi->last_readdir->r_reply_info;
>> > +       rinfo = &dfi->last_readdir->r_reply_info;
>> >         dout("readdir frag %x num %d pos %llx chunk first %llx\n",
>> > -            fi->frag, rinfo->dir_nr, ctx->pos,
>> > +            dfi->frag, rinfo->dir_nr, ctx->pos,
>> >              rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL);
>> >
>> >         i = 0;
>> > @@ -520,27 +522,28 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>> >                 ctx->pos++;
>> >         }
>> >
>> > -       ceph_mdsc_put_request(fi->last_readdir);
>> > -       fi->last_readdir = NULL;
>> > +       ceph_mdsc_put_request(dfi->last_readdir);
>> > +       dfi->last_readdir = NULL;
>> >
>> > -       if (fi->next_offset > 2) {
>> > -               frag = fi->frag;
>> > +       if (dfi->next_offset > 2) {
>> > +               frag = dfi->frag;
>> >                 goto more;
>> >         }
>> >
>> >         /* more frags? */
>> > -       if (!ceph_frag_is_rightmost(fi->frag)) {
>> > -               frag = ceph_frag_next(fi->frag);
>> > +       if (!ceph_frag_is_rightmost(dfi->frag)) {
>> > +               frag = ceph_frag_next(dfi->frag);
>> >                 if (is_hash_order(ctx->pos)) {
>> >                         loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
>> > -                                                       fi->next_offset, true);
>> > +                                                       dfi->next_offset, true);
>> >                         if (new_pos > ctx->pos)
>> >                                 ctx->pos = new_pos;
>> >                         /* keep last_name */
>> >                 } else {
>> > -                       ctx->pos = ceph_make_fpos(frag, fi->next_offset, false);
>> > -                       kfree(fi->last_name);
>> > -                       fi->last_name = NULL;
>> > +                       ctx->pos = ceph_make_fpos(frag, dfi->next_offset,
>> > +                                                       false);
>> > +                       kfree(dfi->last_name);
>> > +                       dfi->last_name = NULL;
>> >                 }
>> >                 dout("readdir next frag is %x\n", frag);
>> >                 goto more;
>> > @@ -552,20 +555,21 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>> >          * were released during the whole readdir, and we should have
>> >          * the complete dir contents in our cache.
>> >          */
>> > -       if (atomic64_read(&ci->i_release_count) == fi->dir_release_count) {
>> > +       if (atomic64_read(&ci->i_release_count) == dfi->dir_release_count) {
>> >                 spin_lock(&ci->i_ceph_lock);
>> > -               if (fi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) {
>> > +               if (dfi->dir_ordered_count ==
>> > +                               atomic64_read(&ci->i_ordered_count)) {
>> >                         dout(" marking %p complete and ordered\n", inode);
>> >                         /* use i_size to track number of entries in
>> >                          * readdir cache */
>> > -                       BUG_ON(fi->readdir_cache_idx < 0);
>> > -                       i_size_write(inode, fi->readdir_cache_idx *
>> > +                       BUG_ON(dfi->readdir_cache_idx < 0);
>> > +                       i_size_write(inode, dfi->readdir_cache_idx *
>> >                                      sizeof(struct dentry*));
>> >                 } else {
>> >                         dout(" marking %p complete\n", inode);
>> >                 }
>> > -               __ceph_dir_set_complete(ci, fi->dir_release_count,
>> > -                                       fi->dir_ordered_count);
>> > +               __ceph_dir_set_complete(ci, dfi->dir_release_count,
>> > +                                       dfi->dir_ordered_count);
>> >                 spin_unlock(&ci->i_ceph_lock);
>> >         }
>> >
>> > @@ -573,25 +577,25 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
>> >         return 0;
>> >  }
>> >
>> > -static void reset_readdir(struct ceph_file_info *fi)
>> > +static void reset_readdir(struct ceph_dir_file_info *dfi)
>> >  {
>> > -       if (fi->last_readdir) {
>> > -               ceph_mdsc_put_request(fi->last_readdir);
>> > -               fi->last_readdir = NULL;
>> > +       if (dfi->last_readdir) {
>> > +               ceph_mdsc_put_request(dfi->last_readdir);
>> > +               dfi->last_readdir = NULL;
>> >         }
>> > -       kfree(fi->last_name);
>> > -       fi->last_name = NULL;
>> > -       fi->dir_release_count = 0;
>> > -       fi->readdir_cache_idx = -1;
>> > -       fi->next_offset = 2;  /* compensate for . and .. */
>> > -       fi->flags &= ~CEPH_F_ATEND;
>> > +       kfree(dfi->last_name);
>> > +       dfi->last_name = NULL;
>> > +       dfi->dir_release_count = 0;
>> > +       dfi->readdir_cache_idx = -1;
>> > +       dfi->next_offset = 2;  /* compensate for . and .. */
>> > +       dfi->file_info.flags &= ~CEPH_F_ATEND;
>> >  }
>> >
>> >  /*
>> >   * discard buffered readdir content on seekdir(0), or seek to new frag,
>> >   * or seek prior to current chunk
>> >   */
>> > -static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
>> > +static bool need_reset_readdir(struct ceph_dir_file_info *dfi, loff_t new_pos)
>> >  {
>> >         struct ceph_mds_reply_info_parsed *rinfo;
>> >         loff_t chunk_offset;
>> > @@ -600,10 +604,10 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
>> >         if (is_hash_order(new_pos)) {
>> >                 /* no need to reset last_name for a forward seek when
>> >                  * dentries are sotred in hash order */
>> > -       } else if (fi->frag != fpos_frag(new_pos)) {
>> > +       } else if (dfi->frag != fpos_frag(new_pos)) {
>> >                 return true;
>> >         }
>> > -       rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL;
>> > +       rinfo = dfi->last_readdir ? &dfi->last_readdir->r_reply_info : NULL;
>> >         if (!rinfo || !rinfo->dir_nr)
>> >                 return true;
>> >         chunk_offset = rinfo->dir_entries[0].offset;
>> > @@ -613,7 +617,7 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
>> >
>> >  static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
>> >  {
>> > -       struct ceph_file_info *fi = file->private_data;
>> > +       struct ceph_dir_file_info *dfi = file->private_data;
>> >         struct inode *inode = file->f_mapping->host;
>> >         loff_t retval;
>> >
>> > @@ -631,20 +635,20 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
>> >         }
>> >
>> >         if (offset >= 0) {
>> > -               if (need_reset_readdir(fi, offset)) {
>> > +               if (need_reset_readdir(dfi, offset)) {
>> >                         dout("dir_llseek dropping %p content\n", file);
>> > -                       reset_readdir(fi);
>> > +                       reset_readdir(dfi);
>> >                 } else if (is_hash_order(offset) && offset > file->f_pos) {
>> >                         /* for hash offset, we don't know if a forward seek
>> >                          * is within same frag */
>> > -                       fi->dir_release_count = 0;
>> > -                       fi->readdir_cache_idx = -1;
>> > +                       dfi->dir_release_count = 0;
>> > +                       dfi->readdir_cache_idx = -1;
>> >                 }
>> >
>> >                 if (offset != file->f_pos) {
>> >                         file->f_pos = offset;
>> >                         file->f_version = 0;
>> > -                       fi->flags &= ~CEPH_F_ATEND;
>> > +                       dfi->file_info.flags &= ~CEPH_F_ATEND;
>> >                 }
>> >                 retval = offset;
>> >         }
>> > @@ -1352,7 +1356,7 @@ static void ceph_d_prune(struct dentry *dentry)
>> >  static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
>> >                              loff_t *ppos)
>> >  {
>> > -       struct ceph_file_info *cf = file->private_data;
>> > +       struct ceph_dir_file_info *dfi = file->private_data;
>> >         struct inode *inode = file_inode(file);
>> >         struct ceph_inode_info *ci = ceph_inode(inode);
>> >         int left;
>> > @@ -1361,12 +1365,12 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
>> >         if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
>> >                 return -EISDIR;
>> >
>> > -       if (!cf->dir_info) {
>> > -               cf->dir_info = kmalloc(bufsize, GFP_KERNEL);
>> > -               if (!cf->dir_info)
>> > +       if (!dfi->dir_info) {
>> > +               dfi->dir_info = kmalloc(bufsize, GFP_KERNEL);
>> > +               if (!dfi->dir_info)
>> >                         return -ENOMEM;
>> > -               cf->dir_info_len =
>> > -                       snprintf(cf->dir_info, bufsize,
>> > +               dfi->dir_info_len =
>> > +                       snprintf(dfi->dir_info, bufsize,
>> >                                 "entries:   %20lld\n"
>> >                                 " files:    %20lld\n"
>> >                                 " subdirs:  %20lld\n"
>> > @@ -1386,10 +1390,10 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
>> >                                 (long)ci->i_rctime.tv_nsec);
>> >         }
>> >
>> > -       if (*ppos >= cf->dir_info_len)
>> > +       if (*ppos >= dfi->dir_info_len)
>> >                 return 0;
>> > -       size = min_t(unsigned, size, cf->dir_info_len-*ppos);
>> > -       left = copy_to_user(buf, cf->dir_info + *ppos, size);
>> > +       size = min_t(unsigned int, size, dfi->dir_info_len-*ppos);
>> > +       left = copy_to_user(buf, dfi->dir_info + *ppos, size);
>> >         if (left == size)
>> >                 return -EFAULT;
>> >         *ppos += (size - left);
>> > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>> > index 6639926..af3ab53 100644
>> > --- a/fs/ceph/file.c
>> > +++ b/fs/ceph/file.c
>> > @@ -159,36 +159,63 @@ static size_t dio_get_pagev_size(const struct iov_iter *it)
>> >         return req;
>> >  }
>> >
>> > +static int ceph_init_file_info(struct inode *inode, struct file *file,
>> > +                                       int fmode, bool isdir)
>> > +{
>> > +       struct ceph_file_info *fi;
>> > +       struct ceph_dir_file_info *dfi;
>> > +
>> > +       dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
>> > +                               inode->i_mode, isdir ? "dir" : "regular");
>> > +       BUG_ON(inode->i_fop->release != ceph_release);
>> > +
>> > +       if (isdir) {
>> > +               dfi = kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
>> > +               if (!dfi) {
>> > +                       ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
>> > +                       return -ENOMEM;
>> > +               }
>> > +
>> > +               fi = &dfi->file_info;
>> > +               dfi->next_offset = 2;
>> > +               dfi->readdir_cache_idx = -1;
>> > +               file->private_data = dfi;
>> > +       } else {
>> > +               fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
>> > +               if (!fi) {
>> > +                       ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
>> > +                       return -ENOMEM;
>> > +               }
>> > +
>> > +               fi->fmode = fmode;
>> > +               file->private_data = fi;
>> > +
>> > +               ceph_fscache_register_inode_cookie(inode);
>> > +               ceph_fscache_file_set_cookie(inode, file);
>> > +       }
>> > +
>> > +       fi->fmode = fmode;
>> > +       spin_lock_init(&fi->rw_contexts_lock);
>> > +       INIT_LIST_HEAD(&fi->rw_contexts);
>> > +
>> > +       return 0;
>> > +}
>> > +
>> >  /*
>> >   * initialize private struct file data.
>> >   * if we fail, clean up by dropping fmode reference on the ceph_inode
>> >   */
>> >  static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
>> >  {
>> > -       struct ceph_file_info *cf;
>> >         int ret = 0;
>> >
>> >         switch (inode->i_mode & S_IFMT) {
>> >         case S_IFREG:
>> > -               ceph_fscache_register_inode_cookie(inode);
>> > -               ceph_fscache_file_set_cookie(inode, file);
>> >         case S_IFDIR:
>> > -               dout("init_file %p %p 0%o (regular)\n", inode, file,
>> > -                    inode->i_mode);
>> > -               cf = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
>> > -               if (!cf) {
>> > -                       ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
>> > -                       return -ENOMEM;
>> > -               }
>> > -               cf->fmode = fmode;
>> > -
>> > -               spin_lock_init(&cf->rw_contexts_lock);
>> > -               INIT_LIST_HEAD(&cf->rw_contexts);
>> > -
>> > -               cf->next_offset = 2;
>> > -               cf->readdir_cache_idx = -1;
>> > -               file->private_data = cf;
>> > -               BUG_ON(inode->i_fop->release != ceph_release);
>> > +               ret = ceph_init_file_info(inode, file, fmode,
>> > +                                               S_ISDIR(inode->i_mode));
>> > +               if (ret)
>> > +                       return ret;
>> >                 break;
>> >
>> >         case S_IFLNK:
>> > @@ -460,16 +487,32 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
>> >  int ceph_release(struct inode *inode, struct file *file)
>> >  {
>> >         struct ceph_inode_info *ci = ceph_inode(inode);
>> > -       struct ceph_file_info *cf = file->private_data;
>> > +       struct ceph_dir_file_info *dfi;
>> > +       struct ceph_file_info *fi;
>> > +
>> > +       if (S_ISDIR(inode->i_mode)) {
>> > +               dfi = file->private_data;
>> > +               fi = &dfi->file_info;
>> > +
>> > +               dout("release inode %p dir file %p\n", inode, file);
>> > +               WARN_ON(!list_empty(&fi->rw_contexts));
>> > +
>> > +               ceph_put_fmode(ci, fi->fmode);
>> > +               if (dfi->last_readdir)
>> > +                       ceph_mdsc_put_request(dfi->last_readdir);
>> >
>> > -       dout("release inode %p file %p\n", inode, file);
>> > -       ceph_put_fmode(ci, cf->fmode);
>> > -       if (cf->last_readdir)
>> > -               ceph_mdsc_put_request(cf->last_readdir);
>> > -       kfree(cf->last_name);
>> > -       kfree(cf->dir_info);
>> > -       WARN_ON(!list_empty(&cf->rw_contexts));
>> > -       kmem_cache_free(ceph_file_cachep, cf);
>> > +               kfree(dfi->last_name);
>> > +               kfree(dfi->dir_info);
>> > +               kmem_cache_free(ceph_dir_file_cachep, dfi);
>> > +       } else {
>> > +               fi = file->private_data;
>> > +
>> > +               dout("release inode %p regular file %p\n", inode, file);
>> > +               WARN_ON(!list_empty(&fi->rw_contexts));
>> > +
>> > +               ceph_put_fmode(ci, fi->fmode);
>> > +               kmem_cache_free(ceph_file_cachep, fi);
>> > +       }
>> >
>> >         /* wake up anyone waiting for caps on this inode */
>> >         wake_up_all(&ci->i_cap_wq);
>> > diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> > index fb2bc9c..d884ba9 100644
>> > --- a/fs/ceph/super.c
>> > +++ b/fs/ceph/super.c
>> > @@ -679,6 +679,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
>> >  struct kmem_cache *ceph_cap_flush_cachep;
>> >  struct kmem_cache *ceph_dentry_cachep;
>> >  struct kmem_cache *ceph_file_cachep;
>> > +struct kmem_cache *ceph_dir_file_cachep;
>> >
>> >  static void ceph_inode_init_once(void *foo)
>> >  {
>> > @@ -716,6 +717,10 @@ static int __init init_caches(void)
>> >         if (!ceph_file_cachep)
>> >                 goto bad_file;
>> >
>> > +       ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD);
>> > +       if (!ceph_dir_file_cachep)
>> > +               goto bad_dir_file;
>> > +
>> >         error = ceph_fscache_register();
>> >         if (error)
>> >                 goto bad_fscache;
>> > @@ -723,6 +728,8 @@ static int __init init_caches(void)
>> >         return 0;
>> >
>> >  bad_fscache:
>> > +       kmem_cache_destroy(ceph_dir_file_cachep);
>> > +bad_dir_file:
>> >         kmem_cache_destroy(ceph_file_cachep);
>> >  bad_file:
>> >         kmem_cache_destroy(ceph_dentry_cachep);
>> > @@ -748,6 +755,7 @@ static void destroy_caches(void)
>> >         kmem_cache_destroy(ceph_cap_flush_cachep);
>> >         kmem_cache_destroy(ceph_dentry_cachep);
>> >         kmem_cache_destroy(ceph_file_cachep);
>> > +       kmem_cache_destroy(ceph_dir_file_cachep);
>> >
>> >         ceph_fscache_unregister();
>> >  }
>> > diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> > index 1c2086e..c9f9474 100644
>> > --- a/fs/ceph/super.h
>> > +++ b/fs/ceph/super.h
>> > @@ -671,6 +671,10 @@ struct ceph_file_info {
>> >
>> >         spinlock_t rw_contexts_lock;
>> >         struct list_head rw_contexts;
>> > +};
>> > +
>> > +struct ceph_dir_file_info {
>> > +       struct ceph_file_info file_info;
>> >
>> >         /* readdir: position within the dir */
>> >         u32 frag;
>> > @@ -686,6 +690,7 @@ struct ceph_file_info {
>> >         /* used for -o dirstat read() on directory thing */
>> >         char *dir_info;
>> >         int dir_info_len;
>> > +
>> >  };
>> >
>> >  struct ceph_rw_context {
>> > diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
>> > index c2ec44c..49c93b9 100644
>> > --- a/include/linux/ceph/libceph.h
>> > +++ b/include/linux/ceph/libceph.h
>> > @@ -262,6 +262,7 @@ static inline int calc_pages_for(u64 off, u64 len)
>> >  extern struct kmem_cache *ceph_cap_flush_cachep;
>> >  extern struct kmem_cache *ceph_dentry_cachep;
>> >  extern struct kmem_cache *ceph_file_cachep;
>> > +extern struct kmem_cache *ceph_dir_file_cachep;
>> >
>> >  /* ceph_common.c */
>> >  extern bool libceph_compatible(void *data);
>> > --
>> > 1.8.3.1
>> >
>> > --
>> > To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
>> > the body of a message to majordomo@xxxxxxxxxxxxxxx
>> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html