> On 12 Mar 2018, at 16:56, Chengguang Xu <cgxu519@xxxxxxx> wrote: > >> Sent: Monday, March 12, 2018 at 3:17 PM >> From: "Yan, Zheng" <ukernel@xxxxxxxxx> >> To: "Chengguang Xu" <cgxu519@xxxxxxx> >> Cc: "Zheng Yan" <zyan@xxxxxxxxxx>, "Ilya Dryomov" <idryomov@xxxxxxxxx>, ceph-devel <ceph-devel@xxxxxxxxxxxxxxx> >> Subject: Re: [PATCH v2] ceph: optimize memory usage >> >> On Fri, Mar 9, 2018 at 11:41 AM, Chengguang Xu <cgxu519@xxxxxxx> wrote: >>> In current code, regular file and directory use same struct >>> ceph_file_info to store fs specific data so the struct has to >>> include some fields which are only used for directory >>> (e.g., readdir related info), when having plenty of regular files, >>> it will lead to memory waste. >>> >>> This patch introduces dedicated ceph_dir_file_info cache for >>> directory and delete readdir related thins from ceph_file_info, >>> so that regular file does not include those unused fields anymore. >>> Also, chagned to manipulate fscache after reuglar file acquires >>> ceph_file_info successfully. >>> >>> Signed-off-by: Chengguang Xu <cgxu519@xxxxxxx> >>> >> Can't apply this patch cleanly. please rebase it against >> https://github.com/ceph/ceph-client/commits/testing. > > I'll resend rebased version later. By the way, should I send patch based on this tree next time? > yes, please created patch based on this tree. Regards Yan, Zheng > > Thanks, > Chengguang. > >> >> Yan, Zheng >> --- >>> Changes since v1: >>> - Modify ceph_dir_file_info to include ceph_file_info instead of pointing to it. >>> >>> fs/ceph/addr.c | 12 ++- >>> fs/ceph/dir.c | 180 ++++++++++++++++++++++--------------------- >>> fs/ceph/file.c | 99 +++++++++++++++++------- >>> fs/ceph/super.c | 8 ++ >>> fs/ceph/super.h | 5 ++ >>> include/linux/ceph/libceph.h | 1 + >>> 6 files changed, 187 insertions(+), 118 deletions(-) >>> >>> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c >>> index b4336b4..c14f91e 100644 >>> --- a/fs/ceph/addr.c >>> +++ b/fs/ceph/addr.c >>> @@ -438,7 +438,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, >>> { >>> struct inode *inode = file_inode(file); >>> struct ceph_fs_client *fsc = ceph_inode_to_client(inode); >>> - struct ceph_file_info *ci = file->private_data; >>> + struct ceph_file_info *fi; >>> + struct ceph_dir_file_info *dfi; >>> struct ceph_rw_context *rw_ctx; >>> int rc = 0; >>> int max = 0; >>> @@ -452,7 +453,14 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, >>> if (rc == 0) >>> goto out; >>> >>> - rw_ctx = ceph_find_rw_context(ci); >>> + if (S_ISDIR(inode->i_mode)) { >>> + dfi = file->private_data; >>> + fi = &dfi->file_info; >>> + } else { >>> + fi = file->private_data; >>> + } >>> + >>> + rw_ctx = ceph_find_rw_context(fi); >>> max = fsc->mount_options->rsize >> PAGE_SHIFT; >>> dout("readpages %p file %p ctx %p nr_pages %d max %d\n", >>> inode, file, rw_ctx, nr_pages, max); >>> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c >>> index f1d9c6c..f422c7b 100644 >>> --- a/fs/ceph/dir.c >>> +++ b/fs/ceph/dir.c >>> @@ -102,18 +102,18 @@ static int fpos_cmp(loff_t l, loff_t r) >>> * regardless of what dir changes take place on the >>> * server. >>> */ >>> -static int note_last_dentry(struct ceph_file_info *fi, const char *name, >>> +static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name, >>> int len, unsigned next_offset) >>> { >>> char *buf = kmalloc(len+1, GFP_KERNEL); >>> if (!buf) >>> return -ENOMEM; >>> - kfree(fi->last_name); >>> - fi->last_name = buf; >>> - memcpy(fi->last_name, name, len); >>> - fi->last_name[len] = 0; >>> - fi->next_offset = next_offset; >>> - dout("note_last_dentry '%s'\n", fi->last_name); >>> + kfree(dfi->last_name); >>> + dfi->last_name = buf; >>> + memcpy(dfi->last_name, name, len); >>> + dfi->last_name[len] = 0; >>> + dfi->next_offset = next_offset; >>> + dout("%s: '%s'\n", __func__, dfi->last_name); >>> return 0; >>> } >>> >>> @@ -175,7 +175,8 @@ static int note_last_dentry(struct ceph_file_info *fi, const char *name, >>> static int __dcache_readdir(struct file *file, struct dir_context *ctx, >>> int shared_gen) >>> { >>> - struct ceph_file_info *fi = file->private_data; >>> + struct ceph_dir_file_info *dfi = file->private_data; >>> + struct ceph_file_info *fi = &dfi->file_info; >>> struct dentry *parent = file->f_path.dentry; >>> struct inode *dir = d_inode(parent); >>> struct dentry *dentry, *last = NULL; >>> @@ -273,33 +274,34 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, >>> if (last) { >>> int ret; >>> di = ceph_dentry(last); >>> - ret = note_last_dentry(fi, last->d_name.name, last->d_name.len, >>> + ret = note_last_dentry(dfi, last->d_name.name, last->d_name.len, >>> fpos_off(di->offset) + 1); >>> if (ret < 0) >>> err = ret; >>> dput(last); >>> /* last_name no longer match cache index */ >>> - if (fi->readdir_cache_idx >= 0) { >>> - fi->readdir_cache_idx = -1; >>> - fi->dir_release_count = 0; >>> + if (dfi->readdir_cache_idx >= 0) { >>> + dfi->readdir_cache_idx = -1; >>> + dfi->dir_release_count = 0; >>> } >>> } >>> return err; >>> } >>> >>> -static bool need_send_readdir(struct ceph_file_info *fi, loff_t pos) >>> +static bool need_send_readdir(struct ceph_dir_file_info *dfi, loff_t pos) >>> { >>> - if (!fi->last_readdir) >>> + if (!dfi->last_readdir) >>> return true; >>> if (is_hash_order(pos)) >>> - return !ceph_frag_contains_value(fi->frag, fpos_hash(pos)); >>> + return !ceph_frag_contains_value(dfi->frag, fpos_hash(pos)); >>> else >>> - return fi->frag != fpos_frag(pos); >>> + return dfi->frag != fpos_frag(pos); >>> } >>> >>> static int ceph_readdir(struct file *file, struct dir_context *ctx) >>> { >>> - struct ceph_file_info *fi = file->private_data; >>> + struct ceph_dir_file_info *dfi = file->private_data; >>> + struct ceph_file_info *fi = &dfi->file_info; >>> struct inode *inode = file_inode(file); >>> struct ceph_inode_info *ci = ceph_inode(inode); >>> struct ceph_fs_client *fsc = ceph_inode_to_client(inode); >>> @@ -351,15 +353,15 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) >>> /* proceed with a normal readdir */ >>> more: >>> /* do we have the correct frag content buffered? */ >>> - if (need_send_readdir(fi, ctx->pos)) { >>> + if (need_send_readdir(dfi, ctx->pos)) { >>> struct ceph_mds_request *req; >>> int op = ceph_snap(inode) == CEPH_SNAPDIR ? >>> CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; >>> >>> /* discard old result, if any */ >>> - if (fi->last_readdir) { >>> - ceph_mdsc_put_request(fi->last_readdir); >>> - fi->last_readdir = NULL; >>> + if (dfi->last_readdir) { >>> + ceph_mdsc_put_request(dfi->last_readdir); >>> + dfi->last_readdir = NULL; >>> } >>> >>> if (is_hash_order(ctx->pos)) { >>> @@ -373,7 +375,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) >>> } >>> >>> dout("readdir fetching %llx.%llx frag %x offset '%s'\n", >>> - ceph_vinop(inode), frag, fi->last_name); >>> + ceph_vinop(inode), frag, dfi->last_name); >>> req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); >>> if (IS_ERR(req)) >>> return PTR_ERR(req); >>> @@ -389,8 +391,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) >>> __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); >>> req->r_inode_drop = CEPH_CAP_FILE_EXCL; >>> } >>> - if (fi->last_name) { >>> - req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL); >>> + if (dfi->last_name) { >>> + req->r_path2 = kstrdup(dfi->last_name, GFP_KERNEL); >>> if (!req->r_path2) { >>> ceph_mdsc_put_request(req); >>> return -ENOMEM; >>> @@ -400,10 +402,10 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) >>> cpu_to_le32(fpos_hash(ctx->pos)); >>> } >>> >>> - req->r_dir_release_cnt = fi->dir_release_count; >>> - req->r_dir_ordered_cnt = fi->dir_ordered_count; >>> - req->r_readdir_cache_idx = fi->readdir_cache_idx; >>> - req->r_readdir_offset = fi->next_offset; >>> + req->r_dir_release_cnt = dfi->dir_release_count; >>> + req->r_dir_ordered_cnt = dfi->dir_ordered_count; >>> + req->r_readdir_cache_idx = dfi->readdir_cache_idx; >>> + req->r_readdir_offset = dfi->next_offset; >>> req->r_args.readdir.frag = cpu_to_le32(frag); >>> req->r_args.readdir.flags = >>> cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS); >>> @@ -427,35 +429,35 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) >>> if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { >>> frag = le32_to_cpu(rinfo->dir_dir->frag); >>> if (!rinfo->hash_order) { >>> - fi->next_offset = req->r_readdir_offset; >>> + dfi->next_offset = req->r_readdir_offset; >>> /* adjust ctx->pos to beginning of frag */ >>> ctx->pos = ceph_make_fpos(frag, >>> - fi->next_offset, >>> + dfi->next_offset, >>> false); >>> } >>> } >>> >>> - fi->frag = frag; >>> - fi->last_readdir = req; >>> + dfi->frag = frag; >>> + dfi->last_readdir = req; >>> >>> if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) { >>> - fi->readdir_cache_idx = req->r_readdir_cache_idx; >>> - if (fi->readdir_cache_idx < 0) { >>> + dfi->readdir_cache_idx = req->r_readdir_cache_idx; >>> + if (dfi->readdir_cache_idx < 0) { >>> /* preclude from marking dir ordered */ >>> - fi->dir_ordered_count = 0; >>> + dfi->dir_ordered_count = 0; >>> } else if (ceph_frag_is_leftmost(frag) && >>> - fi->next_offset == 2) { >>> + dfi->next_offset == 2) { >>> /* note dir version at start of readdir so >>> * we can tell if any dentries get dropped */ >>> - fi->dir_release_count = req->r_dir_release_cnt; >>> - fi->dir_ordered_count = req->r_dir_ordered_cnt; >>> + dfi->dir_release_count = req->r_dir_release_cnt; >>> + dfi->dir_ordered_count = req->r_dir_ordered_cnt; >>> } >>> } else { >>> dout("readdir !did_prepopulate"); >>> /* disable readdir cache */ >>> - fi->readdir_cache_idx = -1; >>> + dfi->readdir_cache_idx = -1; >>> /* preclude from marking dir complete */ >>> - fi->dir_release_count = 0; >>> + dfi->dir_release_count = 0; >>> } >>> >>> /* note next offset and last dentry name */ >>> @@ -464,19 +466,19 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) >>> rinfo->dir_entries + (rinfo->dir_nr-1); >>> unsigned next_offset = req->r_reply_info.dir_end ? >>> 2 : (fpos_off(rde->offset) + 1); >>> - err = note_last_dentry(fi, rde->name, rde->name_len, >>> + err = note_last_dentry(dfi, rde->name, rde->name_len, >>> next_offset); >>> if (err) >>> return err; >>> } else if (req->r_reply_info.dir_end) { >>> - fi->next_offset = 2; >>> + dfi->next_offset = 2; >>> /* keep last name */ >>> } >>> } >>> >>> - rinfo = &fi->last_readdir->r_reply_info; >>> + rinfo = &dfi->last_readdir->r_reply_info; >>> dout("readdir frag %x num %d pos %llx chunk first %llx\n", >>> - fi->frag, rinfo->dir_nr, ctx->pos, >>> + dfi->frag, rinfo->dir_nr, ctx->pos, >>> rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL); >>> >>> i = 0; >>> @@ -520,27 +522,28 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) >>> ctx->pos++; >>> } >>> >>> - ceph_mdsc_put_request(fi->last_readdir); >>> - fi->last_readdir = NULL; >>> + ceph_mdsc_put_request(dfi->last_readdir); >>> + dfi->last_readdir = NULL; >>> >>> - if (fi->next_offset > 2) { >>> - frag = fi->frag; >>> + if (dfi->next_offset > 2) { >>> + frag = dfi->frag; >>> goto more; >>> } >>> >>> /* more frags? */ >>> - if (!ceph_frag_is_rightmost(fi->frag)) { >>> - frag = ceph_frag_next(fi->frag); >>> + if (!ceph_frag_is_rightmost(dfi->frag)) { >>> + frag = ceph_frag_next(dfi->frag); >>> if (is_hash_order(ctx->pos)) { >>> loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag), >>> - fi->next_offset, true); >>> + dfi->next_offset, true); >>> if (new_pos > ctx->pos) >>> ctx->pos = new_pos; >>> /* keep last_name */ >>> } else { >>> - ctx->pos = ceph_make_fpos(frag, fi->next_offset, false); >>> - kfree(fi->last_name); >>> - fi->last_name = NULL; >>> + ctx->pos = ceph_make_fpos(frag, dfi->next_offset, >>> + false); >>> + kfree(dfi->last_name); >>> + dfi->last_name = NULL; >>> } >>> dout("readdir next frag is %x\n", frag); >>> goto more; >>> @@ -552,20 +555,21 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) >>> * were released during the whole readdir, and we should have >>> * the complete dir contents in our cache. >>> */ >>> - if (atomic64_read(&ci->i_release_count) == fi->dir_release_count) { >>> + if (atomic64_read(&ci->i_release_count) == dfi->dir_release_count) { >>> spin_lock(&ci->i_ceph_lock); >>> - if (fi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) { >>> + if (dfi->dir_ordered_count == >>> + atomic64_read(&ci->i_ordered_count)) { >>> dout(" marking %p complete and ordered\n", inode); >>> /* use i_size to track number of entries in >>> * readdir cache */ >>> - BUG_ON(fi->readdir_cache_idx < 0); >>> - i_size_write(inode, fi->readdir_cache_idx * >>> + BUG_ON(dfi->readdir_cache_idx < 0); >>> + i_size_write(inode, dfi->readdir_cache_idx * >>> sizeof(struct dentry*)); >>> } else { >>> dout(" marking %p complete\n", inode); >>> } >>> - __ceph_dir_set_complete(ci, fi->dir_release_count, >>> - fi->dir_ordered_count); >>> + __ceph_dir_set_complete(ci, dfi->dir_release_count, >>> + dfi->dir_ordered_count); >>> spin_unlock(&ci->i_ceph_lock); >>> } >>> >>> @@ -573,25 +577,25 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) >>> return 0; >>> } >>> >>> -static void reset_readdir(struct ceph_file_info *fi) >>> +static void reset_readdir(struct ceph_dir_file_info *dfi) >>> { >>> - if (fi->last_readdir) { >>> - ceph_mdsc_put_request(fi->last_readdir); >>> - fi->last_readdir = NULL; >>> + if (dfi->last_readdir) { >>> + ceph_mdsc_put_request(dfi->last_readdir); >>> + dfi->last_readdir = NULL; >>> } >>> - kfree(fi->last_name); >>> - fi->last_name = NULL; >>> - fi->dir_release_count = 0; >>> - fi->readdir_cache_idx = -1; >>> - fi->next_offset = 2; /* compensate for . and .. */ >>> - fi->flags &= ~CEPH_F_ATEND; >>> + kfree(dfi->last_name); >>> + dfi->last_name = NULL; >>> + dfi->dir_release_count = 0; >>> + dfi->readdir_cache_idx = -1; >>> + dfi->next_offset = 2; /* compensate for . and .. */ >>> + dfi->file_info.flags &= ~CEPH_F_ATEND; >>> } >>> >>> /* >>> * discard buffered readdir content on seekdir(0), or seek to new frag, >>> * or seek prior to current chunk >>> */ >>> -static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos) >>> +static bool need_reset_readdir(struct ceph_dir_file_info *dfi, loff_t new_pos) >>> { >>> struct ceph_mds_reply_info_parsed *rinfo; >>> loff_t chunk_offset; >>> @@ -600,10 +604,10 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos) >>> if (is_hash_order(new_pos)) { >>> /* no need to reset last_name for a forward seek when >>> * dentries are sotred in hash order */ >>> - } else if (fi->frag != fpos_frag(new_pos)) { >>> + } else if (dfi->frag != fpos_frag(new_pos)) { >>> return true; >>> } >>> - rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL; >>> + rinfo = dfi->last_readdir ? &dfi->last_readdir->r_reply_info : NULL; >>> if (!rinfo || !rinfo->dir_nr) >>> return true; >>> chunk_offset = rinfo->dir_entries[0].offset; >>> @@ -613,7 +617,7 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos) >>> >>> static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) >>> { >>> - struct ceph_file_info *fi = file->private_data; >>> + struct ceph_dir_file_info *dfi = file->private_data; >>> struct inode *inode = file->f_mapping->host; >>> loff_t retval; >>> >>> @@ -631,20 +635,20 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) >>> } >>> >>> if (offset >= 0) { >>> - if (need_reset_readdir(fi, offset)) { >>> + if (need_reset_readdir(dfi, offset)) { >>> dout("dir_llseek dropping %p content\n", file); >>> - reset_readdir(fi); >>> + reset_readdir(dfi); >>> } else if (is_hash_order(offset) && offset > file->f_pos) { >>> /* for hash offset, we don't know if a forward seek >>> * is within same frag */ >>> - fi->dir_release_count = 0; >>> - fi->readdir_cache_idx = -1; >>> + dfi->dir_release_count = 0; >>> + dfi->readdir_cache_idx = -1; >>> } >>> >>> if (offset != file->f_pos) { >>> file->f_pos = offset; >>> file->f_version = 0; >>> - fi->flags &= ~CEPH_F_ATEND; >>> + dfi->file_info.flags &= ~CEPH_F_ATEND; >>> } >>> retval = offset; >>> } >>> @@ -1352,7 +1356,7 @@ static void ceph_d_prune(struct dentry *dentry) >>> static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, >>> loff_t *ppos) >>> { >>> - struct ceph_file_info *cf = file->private_data; >>> + struct ceph_dir_file_info *dfi = file->private_data; >>> struct inode *inode = file_inode(file); >>> struct ceph_inode_info *ci = ceph_inode(inode); >>> int left; >>> @@ -1361,12 +1365,12 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, >>> if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) >>> return -EISDIR; >>> >>> - if (!cf->dir_info) { >>> - cf->dir_info = kmalloc(bufsize, GFP_KERNEL); >>> - if (!cf->dir_info) >>> + if (!dfi->dir_info) { >>> + dfi->dir_info = kmalloc(bufsize, GFP_KERNEL); >>> + if (!dfi->dir_info) >>> return -ENOMEM; >>> - cf->dir_info_len = >>> - snprintf(cf->dir_info, bufsize, >>> + dfi->dir_info_len = >>> + snprintf(dfi->dir_info, bufsize, >>> "entries: %20lld\n" >>> " files: %20lld\n" >>> " subdirs: %20lld\n" >>> @@ -1386,10 +1390,10 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, >>> (long)ci->i_rctime.tv_nsec); >>> } >>> >>> - if (*ppos >= cf->dir_info_len) >>> + if (*ppos >= dfi->dir_info_len) >>> return 0; >>> - size = min_t(unsigned, size, cf->dir_info_len-*ppos); >>> - left = copy_to_user(buf, cf->dir_info + *ppos, size); >>> + size = min_t(unsigned int, size, dfi->dir_info_len-*ppos); >>> + left = copy_to_user(buf, dfi->dir_info + *ppos, size); >>> if (left == size) >>> return -EFAULT; >>> *ppos += (size - left); >>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c >>> index 6639926..af3ab53 100644 >>> --- a/fs/ceph/file.c >>> +++ b/fs/ceph/file.c >>> @@ -159,36 +159,63 @@ static size_t dio_get_pagev_size(const struct iov_iter *it) >>> return req; >>> } >>> >>> +static int ceph_init_file_info(struct inode *inode, struct file *file, >>> + int fmode, bool isdir) >>> +{ >>> + struct ceph_file_info *fi; >>> + struct ceph_dir_file_info *dfi; >>> + >>> + dout("%s %p %p 0%o (%s)\n", __func__, inode, file, >>> + inode->i_mode, isdir ? "dir" : "regular"); >>> + BUG_ON(inode->i_fop->release != ceph_release); >>> + >>> + if (isdir) { >>> + dfi = kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL); >>> + if (!dfi) { >>> + ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ >>> + return -ENOMEM; >>> + } >>> + >>> + fi = &dfi->file_info; >>> + dfi->next_offset = 2; >>> + dfi->readdir_cache_idx = -1; >>> + file->private_data = dfi; >>> + } else { >>> + fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL); >>> + if (!fi) { >>> + ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ >>> + return -ENOMEM; >>> + } >>> + >>> + fi->fmode = fmode; >>> + file->private_data = fi; >>> + >>> + ceph_fscache_register_inode_cookie(inode); >>> + ceph_fscache_file_set_cookie(inode, file); >>> + } >>> + >>> + fi->fmode = fmode; >>> + spin_lock_init(&fi->rw_contexts_lock); >>> + INIT_LIST_HEAD(&fi->rw_contexts); >>> + >>> + return 0; >>> +} >>> + >>> /* >>> * initialize private struct file data. >>> * if we fail, clean up by dropping fmode reference on the ceph_inode >>> */ >>> static int ceph_init_file(struct inode *inode, struct file *file, int fmode) >>> { >>> - struct ceph_file_info *cf; >>> int ret = 0; >>> >>> switch (inode->i_mode & S_IFMT) { >>> case S_IFREG: >>> - ceph_fscache_register_inode_cookie(inode); >>> - ceph_fscache_file_set_cookie(inode, file); >>> case S_IFDIR: >>> - dout("init_file %p %p 0%o (regular)\n", inode, file, >>> - inode->i_mode); >>> - cf = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL); >>> - if (!cf) { >>> - ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ >>> - return -ENOMEM; >>> - } >>> - cf->fmode = fmode; >>> - >>> - spin_lock_init(&cf->rw_contexts_lock); >>> - INIT_LIST_HEAD(&cf->rw_contexts); >>> - >>> - cf->next_offset = 2; >>> - cf->readdir_cache_idx = -1; >>> - file->private_data = cf; >>> - BUG_ON(inode->i_fop->release != ceph_release); >>> + ret = ceph_init_file_info(inode, file, fmode, >>> + S_ISDIR(inode->i_mode)); >>> + if (ret) >>> + return ret; >>> break; >>> >>> case S_IFLNK: >>> @@ -460,16 +487,32 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, >>> int ceph_release(struct inode *inode, struct file *file) >>> { >>> struct ceph_inode_info *ci = ceph_inode(inode); >>> - struct ceph_file_info *cf = file->private_data; >>> + struct ceph_dir_file_info *dfi; >>> + struct ceph_file_info *fi; >>> + >>> + if (S_ISDIR(inode->i_mode)) { >>> + dfi = file->private_data; >>> + fi = &dfi->file_info; >>> + >>> + dout("release inode %p dir file %p\n", inode, file); >>> + WARN_ON(!list_empty(&fi->rw_contexts)); >>> + >>> + ceph_put_fmode(ci, fi->fmode); >>> + if (dfi->last_readdir) >>> + ceph_mdsc_put_request(dfi->last_readdir); >>> >>> - dout("release inode %p file %p\n", inode, file); >>> - ceph_put_fmode(ci, cf->fmode); >>> - if (cf->last_readdir) >>> - ceph_mdsc_put_request(cf->last_readdir); >>> - kfree(cf->last_name); >>> - kfree(cf->dir_info); >>> - WARN_ON(!list_empty(&cf->rw_contexts)); >>> - kmem_cache_free(ceph_file_cachep, cf); >>> + kfree(dfi->last_name); >>> + kfree(dfi->dir_info); >>> + kmem_cache_free(ceph_dir_file_cachep, dfi); >>> + } else { >>> + fi = file->private_data; >>> + >>> + dout("release inode %p regular file %p\n", inode, file); >>> + WARN_ON(!list_empty(&fi->rw_contexts)); >>> + >>> + ceph_put_fmode(ci, fi->fmode); >>> + kmem_cache_free(ceph_file_cachep, fi); >>> + } >>> >>> /* wake up anyone waiting for caps on this inode */ >>> wake_up_all(&ci->i_cap_wq); >>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c >>> index fb2bc9c..d884ba9 100644 >>> --- a/fs/ceph/super.c >>> +++ b/fs/ceph/super.c >>> @@ -679,6 +679,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) >>> struct kmem_cache *ceph_cap_flush_cachep; >>> struct kmem_cache *ceph_dentry_cachep; >>> struct kmem_cache *ceph_file_cachep; >>> +struct kmem_cache *ceph_dir_file_cachep; >>> >>> static void ceph_inode_init_once(void *foo) >>> { >>> @@ -716,6 +717,10 @@ static int __init init_caches(void) >>> if (!ceph_file_cachep) >>> goto bad_file; >>> >>> + ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); >>> + if (!ceph_dir_file_cachep) >>> + goto bad_dir_file; >>> + >>> error = ceph_fscache_register(); >>> if (error) >>> goto bad_fscache; >>> @@ -723,6 +728,8 @@ static int __init init_caches(void) >>> return 0; >>> >>> bad_fscache: >>> + kmem_cache_destroy(ceph_dir_file_cachep); >>> +bad_dir_file: >>> kmem_cache_destroy(ceph_file_cachep); >>> bad_file: >>> kmem_cache_destroy(ceph_dentry_cachep); >>> @@ -748,6 +755,7 @@ static void destroy_caches(void) >>> kmem_cache_destroy(ceph_cap_flush_cachep); >>> kmem_cache_destroy(ceph_dentry_cachep); >>> kmem_cache_destroy(ceph_file_cachep); >>> + kmem_cache_destroy(ceph_dir_file_cachep); >>> >>> ceph_fscache_unregister(); >>> } >>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h >>> index 1c2086e..c9f9474 100644 >>> --- a/fs/ceph/super.h >>> +++ b/fs/ceph/super.h >>> @@ -671,6 +671,10 @@ struct ceph_file_info { >>> >>> spinlock_t rw_contexts_lock; >>> struct list_head rw_contexts; >>> +}; >>> + >>> +struct ceph_dir_file_info { >>> + struct ceph_file_info file_info; >>> >>> /* readdir: position within the dir */ >>> u32 frag; >>> @@ -686,6 +690,7 @@ struct ceph_file_info { >>> /* used for -o dirstat read() on directory thing */ >>> char *dir_info; >>> int dir_info_len; >>> + >>> }; >>> >>> struct ceph_rw_context { >>> diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h >>> index c2ec44c..49c93b9 100644 >>> --- a/include/linux/ceph/libceph.h >>> +++ b/include/linux/ceph/libceph.h >>> @@ -262,6 +262,7 @@ static inline int calc_pages_for(u64 off, u64 len) >>> extern struct kmem_cache *ceph_cap_flush_cachep; >>> extern struct kmem_cache *ceph_dentry_cachep; >>> extern struct kmem_cache *ceph_file_cachep; >>> +extern struct kmem_cache *ceph_dir_file_cachep; >>> >>> /* ceph_common.c */ >>> extern bool libceph_compatible(void *data); >>> -- >>> 1.8.3.1 >>> >>> -- >>> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in >>> the body of a message to majordomo@xxxxxxxxxxxxxxx >>> More majordomo info at http://vger.kernel.org/majordomo-info.html >> -- >> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in >> the body of a message to majordomo@xxxxxxxxxxxxxxx >> More majordomo info at http://vger.kernel.org/majordomo-info.html >> -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html