>From 01d4fc410f9a9f011eec55055b341c3aa52bfb4e Mon Sep 17 00:00:00 2001 From: KB <kb@xxxxxxxxxxxxxxx> Date: Wed, 1 Jun 2016 09:17:13 +0200 Subject: [PATCH 2/3] inode cache mgmt Proper inode allocation policy requires to use super_operations.alloc_inode and .destroy_inode callbacks. This solution requires file system specific inode structure to be derived from generic struct inode. Because old approach of allocating generic inode private data is proper no longer the vxfs_blkiget()/_stiget() had to be redesigned and these functions return now the generic inode pointer. This change impacted vxfs_read_fshead() and it was also a time to fix bugs in read_fshead's error handling path like kfree(kmem_cache_alloc()) in the old code. Signed-off-by: Krzysztof Błaszkowski <kb@xxxxxxxxxxxxxxx> --- fs/freevxfs/vxfs.h | 8 --- fs/freevxfs/vxfs_bmap.c | 5 +- fs/freevxfs/vxfs_extern.h | 19 +++--- fs/freevxfs/vxfs_fshead.c | 107 ++++++++++++++------------------ fs/freevxfs/vxfs_inode.c | 153 +++++++++++++++++++++++++-------------------- fs/freevxfs/vxfs_inode.h | 16 +++++- fs/freevxfs/vxfs_super.c | 43 ++++++------- 7 files changed, 182 insertions(+), 169 deletions(-) diff --git a/fs/freevxfs/vxfs.h b/fs/freevxfs/vxfs.h index 4b561de..8b0d1a2 100644 --- a/fs/freevxfs/vxfs.h +++ b/fs/freevxfs/vxfs.h @@ -155,7 +155,6 @@ struct vxfs_sb { */ }; - /* * In core superblock filesystem private data for VxFS. */ @@ -272,13 +271,6 @@ enum { #define VXFS_ISIMMED(ip) VXFS_IS_ORG((ip), VXFS_ORG_IMMED) #define VXFS_ISTYPED(ip) VXFS_IS_ORG((ip), VXFS_ORG_TYPED) - -/* - * Get filesystem private data from VFS inode. - */ -#define VXFS_INO(ip) \ - ((struct vxfs_inode_info *)(ip)->i_private) - /* * Get filesystem private data from VFS superblock. */ diff --git a/fs/freevxfs/vxfs_bmap.c b/fs/freevxfs/vxfs_bmap.c index 1fd41cf..333093c 100644 --- a/fs/freevxfs/vxfs_bmap.c +++ b/fs/freevxfs/vxfs_bmap.c @@ -99,8 +99,9 @@ vxfs_bmap_ext4(struct inode *ip, long bn) brelse(buf); return bno; - } else - printk(KERN_WARNING "no matching indir?"); + } else { + printk(KERN_WARNING "%s:%d no matching indir?\n", __func__, __LINE__); + } return 0; diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h index 881aa3d..9648735 100644 --- a/fs/freevxfs/vxfs_extern.h +++ b/fs/freevxfs/vxfs_extern.h @@ -55,15 +55,18 @@ extern const struct inode_operations vxfs_immed_symlink_iops; /* vxfs_inode.c */ extern const struct address_space_operations vxfs_immed_aops; -extern struct kmem_cache *vxfs_inode_cachep; extern void vxfs_dumpi(struct vxfs_inode_info *, ino_t); -extern struct inode * vxfs_get_fake_inode(struct super_block *, - struct vxfs_inode_info *); -extern void vxfs_put_fake_inode(struct inode *); -extern struct vxfs_inode_info * vxfs_blkiget(struct super_block *, u_long, ino_t); -extern struct vxfs_inode_info * vxfs_stiget(struct super_block *, ino_t); -extern struct inode * vxfs_iget(struct super_block *, ino_t); -extern void vxfs_evict_inode(struct inode *); + +extern struct inode *vxfs_blkiget(struct super_block *, u_long, ino_t); +extern struct inode *vxfs_stiget(struct super_block *, ino_t); +extern struct inode *vxfs_iget(struct super_block *, ino_t); +extern void vxfs_evict_inode(struct inode *); + +extern void vxfs_destroy_inode(struct inode *ip); +extern int vxfs_ii_cache_init(void); +extern void vxfs_ii_cache_destroy(void); +extern struct inode *vxfs_inode_alloc(struct super_block *sb); + /* vxfs_lookup.c */ extern const struct inode_operations vxfs_dir_inode_ops; diff --git a/fs/freevxfs/vxfs_fshead.c b/fs/freevxfs/vxfs_fshead.c index e7501cb..0f3a806 100644 --- a/fs/freevxfs/vxfs_fshead.c +++ b/fs/freevxfs/vxfs_fshead.c @@ -86,6 +86,9 @@ vxfs_getfsh(struct inode *ip, int which) memcpy(fhp, bp->b_data, sizeof(*fhp)); put_bh(bp); +#ifdef DIAGNOSTIC + vxfs_dumpfsh(fhp); +#endif return (fhp); } out: @@ -93,6 +96,15 @@ out: return NULL; } +static inline void inode_err_put(struct inode **i) +{ + struct inode *i2 = *i; + + if (i2 && !IS_ERR(i2)) + iput(i2); + *i = NULL; +} + /** * vxfs_read_fshead - read the fileset headers * @sbp: superblock to which the fileset belongs @@ -107,98 +119,73 @@ int vxfs_read_fshead(struct super_block *sbp) { struct vxfs_sb_info *infp = VXFS_SBI(sbp); - struct vxfs_fsh *pfp, *sfp; - struct vxfs_inode_info *vip, *tip; + struct vxfs_fsh *pfp = NULL; + struct vxfs_fsh *sfp = NULL; + struct inode *ip1; + struct inode *ip2; - vip = vxfs_blkiget(sbp, infp->vsi_iext, infp->vsi_fshino); - if (!vip) { + infp->vsi_fship = ip1 = vxfs_blkiget(sbp, infp->vsi_iext, infp->vsi_fshino); + if (!ip1 || IS_ERR(ip1)) { printk(KERN_ERR "vxfs: unable to read fsh inode\n"); return -EINVAL; } - if (!VXFS_ISFSH(vip)) { + + if (!VXFS_ISFSH(VXFS_INO(ip1))) { printk(KERN_ERR "vxfs: fsh list inode is of wrong type (%x)\n", - vip->vii_mode & VXFS_TYPE_MASK); - goto out_free_fship; + VXFS_INO(ip1)->vii_mode & VXFS_TYPE_MASK); + goto out_err; } - #ifdef DIAGNOSTIC printk("vxfs: fsh inode dump:\n"); - vxfs_dumpi(vip, infp->vsi_fshino); + vxfs_dumpi(VXFS_INO(ip1), infp->vsi_fshino); #endif - infp->vsi_fship = vxfs_get_fake_inode(sbp, vip); - if (!infp->vsi_fship) { - printk(KERN_ERR "vxfs: unable to get fsh inode\n"); - goto out_free_fship; - } - sfp = vxfs_getfsh(infp->vsi_fship, 0); if (!sfp) { printk(KERN_ERR "vxfs: unable to get structural fsh\n"); - goto out_iput_fship; - } - -#ifdef DIAGNOSTIC - vxfs_dumpfsh(sfp); -#endif + goto out_err; + } pfp = vxfs_getfsh(infp->vsi_fship, 1); if (!pfp) { printk(KERN_ERR "vxfs: unable to get primary fsh\n"); - goto out_free_sfp; + goto out_err; } -#ifdef DIAGNOSTIC - vxfs_dumpfsh(pfp); -#endif - - tip = vxfs_blkiget(sbp, infp->vsi_iext, - fs32_to_cpu(infp, sfp->fsh_ilistino[0])); - if (!tip) - goto out_free_pfp; - - infp->vsi_stilist = vxfs_get_fake_inode(sbp, tip); - if (!infp->vsi_stilist) { - printk(KERN_ERR "vxfs: unable to get structural list inode\n"); - kfree(tip); - goto out_free_pfp; + infp->vsi_stilist = + ip2 = vxfs_blkiget(sbp, infp->vsi_iext, + fs32_to_cpu(infp, sfp->fsh_ilistino[0])); + if (!ip2 || IS_ERR(ip2)) { + goto out_err; } + if (!VXFS_ISILT(VXFS_INO(infp->vsi_stilist))) { printk(KERN_ERR "vxfs: structural list inode is of wrong type (%x)\n", - VXFS_INO(infp->vsi_stilist)->vii_mode & VXFS_TYPE_MASK); - goto out_iput_stilist; + VXFS_INO(infp->vsi_stilist)->vii_mode & VXFS_TYPE_MASK); + goto out_err; } - tip = vxfs_stiget(sbp, fs32_to_cpu(infp, pfp->fsh_ilistino[0])); - if (!tip) - goto out_iput_stilist; - infp->vsi_ilist = vxfs_get_fake_inode(sbp, tip); - if (!infp->vsi_ilist) { + infp->vsi_ilist = ip2 = vxfs_stiget(sbp, + fs32_to_cpu(infp, pfp->fsh_ilistino[0])); + if (!ip2 || IS_ERR(ip2)) { printk(KERN_ERR "vxfs: unable to get inode list inode\n"); - kfree(tip); - goto out_iput_stilist; + goto out_err; } + if (!VXFS_ISILT(VXFS_INO(infp->vsi_ilist))) { printk(KERN_ERR "vxfs: inode list inode is of wrong type (%x)\n", VXFS_INO(infp->vsi_ilist)->vii_mode & VXFS_TYPE_MASK); - goto out_iput_ilist; + goto out_err; } - + kfree(pfp); + kfree(sfp); return 0; - - out_iput_ilist: - iput(infp->vsi_ilist); - out_iput_stilist: - iput(infp->vsi_stilist); - out_free_pfp: +out_err: + inode_err_put(&infp->vsi_fship); + inode_err_put(&infp->vsi_ilist); + inode_err_put(&infp->vsi_stilist); kfree(pfp); - out_free_sfp: - kfree(sfp); - out_iput_fship: - iput(infp->vsi_fship); - return -EINVAL; - out_free_fship: - kfree(vip); + kfree(sfp); return -EINVAL; } diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index ab54ab3..582d652 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -41,7 +41,8 @@ #include "vxfs_extern.h" -struct kmem_cache *vxfs_inode_cachep; +static struct kmem_cache *vxfs_inode_cachep; +static void vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip); #ifdef DIAGNOSTIC @@ -111,35 +112,41 @@ static inline void dip2vip_cpy(struct vxfs_sb_info *sbi, * buffercache. This function should not be used outside the * read_super() method, otherwise the data may be incoherent. */ -struct vxfs_inode_info * +struct inode * vxfs_blkiget(struct super_block *sbp, u_long extent, ino_t ino) { struct buffer_head *bp; u_long block, offset; + struct inode *ip; block = extent + ((ino * VXFS_ISIZE) / sbp->s_blocksize); offset = ((ino % (sbp->s_blocksize / VXFS_ISIZE)) * VXFS_ISIZE); + ip = new_inode(sbp); + if (!ip) + return ERR_PTR(-ENOMEM); + bp = sb_bread(sbp, block); + ip->i_ino = get_next_ino(); + ip->i_mapping->a_ops = &vxfs_aops; if (bp && buffer_mapped(bp)) { - struct vxfs_inode_info *vip; + struct vxfs_inode_info *vip = VXFS_INO(ip); struct vxfs_dinode *dip; - if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL))) - goto fail; dip = (struct vxfs_dinode *)(bp->b_data + offset); dip2vip_cpy(VXFS_SBI(sbp), vip, dip); + vxfs_iinit(ip, vip); #ifdef DIAGNOSTIC vxfs_dumpi(vip, ino); #endif - brelse(bp); - return (vip); + } else { + printk(KERN_WARNING "vxfs: unable to read block %ld\n", block); + iput(ip); + ip = NULL; } -fail: - printk(KERN_WARNING "vxfs: unable to read block %ld\n", block); brelse(bp); - return NULL; + return ip; } /** @@ -154,21 +161,18 @@ fail: * Returns the matching VxFS inode on success, else an error code. */ static struct vxfs_inode_info * -__vxfs_iget(ino_t ino, struct inode *ilistp) +__vxfs_iget(struct inode *ip, struct inode *ilistp, ino_t ino) { - struct page *pp; - u_long offset; + struct page *pp; + u_long offset = (ino % (PAGE_SIZE / VXFS_ISIZE)) * VXFS_ISIZE; - offset = (ino % (PAGE_SIZE / VXFS_ISIZE)) * VXFS_ISIZE; pp = vxfs_get_page(ilistp->i_mapping, ino * VXFS_ISIZE / PAGE_SIZE); if (!IS_ERR(pp)) { - struct vxfs_inode_info *vip; + struct vxfs_inode_info *vip = VXFS_INO(ip); struct vxfs_dinode *dip; caddr_t kaddr = (char *)page_address(pp); - if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL))) - goto fail; dip = (struct vxfs_dinode *)(kaddr + offset); dip2vip_cpy(VXFS_SBI(ilistp->i_sb), vip, dip); #ifdef DIAGNOSTIC @@ -180,11 +184,6 @@ __vxfs_iget(ino_t ino, struct inode *ilistp) printk(KERN_WARNING "vxfs: error on page %p\n", pp); return ERR_CAST(pp); - -fail: - printk(KERN_WARNING "vxfs: unable to read inode %ld\n", (unsigned long)ino); - vxfs_put_page(pp); - return ERR_PTR(-ENOMEM); } /** @@ -197,13 +196,24 @@ fail: * the structural inode list. * Returns the matching VxFS inode on success, else a NULL pointer. */ -struct vxfs_inode_info * +struct inode * vxfs_stiget(struct super_block *sbp, ino_t ino) { - struct vxfs_inode_info *vip; + struct inode *ip = new_inode(sbp); + struct vxfs_inode_info *ii; - vip = __vxfs_iget(ino, VXFS_SBI(sbp)->vsi_stilist); - return IS_ERR(vip) ? NULL : vip; + if (!ip) + return NULL; + + ip->i_ino = get_next_ino(); + ip->i_mapping->a_ops = &vxfs_aops; + ii = __vxfs_iget(ip, VXFS_SBI(sbp)->vsi_stilist, ino); + if (IS_ERR(ii)) { + iput(ip); + } else { + vxfs_iinit(ip, ii); + } + return IS_ERR(ii) ? NULL : ip; } /** @@ -271,41 +281,6 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip) } -/** - * vxfs_get_fake_inode - get fake inode structure - * @sbp: filesystem superblock - * @vip: fspriv inode - * - * Description: - * vxfs_fake_inode gets a fake inode (not in the inode hash) for a - * superblock, vxfs_inode pair. - * Returns the filled VFS inode. - */ -struct inode * -vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip) -{ - struct inode *ip = NULL; - - if ((ip = new_inode(sbp))) { - ip->i_ino = get_next_ino(); - vxfs_iinit(ip, vip); - ip->i_mapping->a_ops = &vxfs_aops; - } - return (ip); -} - -/** - * vxfs_put_fake_inode - free faked inode - * *ip: VFS inode - * - * Description: - * vxfs_put_fake_inode frees all data associated with @ip. - */ -void -vxfs_put_fake_inode(struct inode *ip) -{ - iput(ip); -} /** * vxfs_iget - get an inode @@ -329,7 +304,7 @@ vxfs_iget(struct super_block *sbp, ino_t ino) if (!(ip->i_state & I_NEW)) return ip; - vip = __vxfs_iget(ino, VXFS_SBI(sbp)->vsi_ilist); + vip = __vxfs_iget(ip, VXFS_SBI(sbp)->vsi_ilist, ino); if (IS_ERR(vip)) { iget_failed(ip); return ERR_CAST(vip); @@ -367,21 +342,65 @@ vxfs_iget(struct super_block *sbp, ino_t ino) static void vxfs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); - kmem_cache_free(vxfs_inode_cachep, inode->i_private); + struct vxfs_vfs_inode *i = container_of(inode, struct vxfs_vfs_inode, ino); + + kmem_cache_free(vxfs_inode_cachep, i); +} + +void vxfs_destroy_inode(struct inode *ip) +{ + call_rcu(&ip->i_rcu, vxfs_i_callback); } /** - * vxfs_evict_inode - remove inode from main memory + * vxfs_evict_inode - commit inode buffers if needed, may delete on-disk inode. * @ip: inode to discard. * * Description: - * vxfs_evict_inode() is called on the final iput and frees the private - * inode area. + * vxfs_evict_inode() may be called on the final iput. */ void vxfs_evict_inode(struct inode *ip) { truncate_inode_pages_final(&ip->i_data); + invalidate_inode_buffers(ip); clear_inode(ip); - call_rcu(&ip->i_rcu, vxfs_i_callback); } + + + + +struct inode *vxfs_inode_alloc(struct super_block *sb) +{ + struct vxfs_vfs_inode *i = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL); + + return i ? &i->ino : NULL; +} + +static void vxfs_inode_constructor(void *ptr) +{ + struct vxfs_vfs_inode *i = (struct vxfs_vfs_inode *) ptr; + + inode_init_once(&i->ino); +} + +int vxfs_ii_cache_init(void) +{ + vxfs_inode_cachep = kmem_cache_create("vxfs_inode", + sizeof(struct vxfs_vfs_inode), 0, + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, vxfs_inode_constructor); + + return vxfs_inode_cachep ? 0 : -ENOMEM; +} + + +void vxfs_ii_cache_destroy(void) +{ + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + kmem_cache_destroy(vxfs_inode_cachep); +} + diff --git a/fs/freevxfs/vxfs_inode.h b/fs/freevxfs/vxfs_inode.h index 3a29662..f372677 100644 --- a/fs/freevxfs/vxfs_inode.h +++ b/fs/freevxfs/vxfs_inode.h @@ -37,7 +37,6 @@ * inodes of the Veritas Filesystem. */ - #define VXFS_ISIZE 0x100 /* Inode size */ #define VXFS_NDADDR 10 /* Number of direct addrs in inode */ @@ -178,6 +177,12 @@ struct vxfs_inode_info { } vii_org; }; +struct vxfs_vfs_inode { + struct inode ino; + struct vxfs_inode_info cpufs_ino; +}; + + #define vii_rdev vii_ftarea.rdev #define vii_dotdot vii_ftarea.dotdot @@ -185,4 +190,13 @@ struct vxfs_inode_info { #define vii_ext4 vii_org.ext4 #define vii_typed vii_org.typed +/* + * Get filesystem private data from VFS inode. + */ +static inline struct vxfs_inode_info *VXFS_INO(struct inode *i) +{ + return &(container_of(i, struct vxfs_vfs_inode, ino))->cpufs_ino; +} + + #endif /* _VXFS_INODE_H_ */ diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 6124091..efa3bc2 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -59,6 +59,8 @@ static int vxfs_statfs(struct dentry *, struct kstatfs *); static int vxfs_remount(struct super_block *, int *, char *); static const struct super_operations vxfs_super_ops = { + .alloc_inode = vxfs_inode_alloc, + .destroy_inode = vxfs_destroy_inode, .evict_inode = vxfs_evict_inode, .put_super = vxfs_put_super, .statfs = vxfs_statfs, @@ -79,9 +81,9 @@ vxfs_put_super(struct super_block *sbp) { struct vxfs_sb_info *infp = VXFS_SBI(sbp); - vxfs_put_fake_inode(infp->vsi_fship); - vxfs_put_fake_inode(infp->vsi_ilist); - vxfs_put_fake_inode(infp->vsi_stilist); + iput(infp->vsi_fship); + iput(infp->vsi_ilist); + iput(infp->vsi_stilist); brelse(infp->vsi_bp); kfree(infp); @@ -253,17 +255,18 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) goto out; } + if (vxfs_read_olt(sbp, bsize)) { printk(KERN_WARNING "vxfs: unable to read olt\n"); goto out; } + sbp->s_op = &vxfs_super_ops; if (vxfs_read_fshead(sbp)) { printk(KERN_WARNING "vxfs: unable to read fshead\n"); goto out; } - sbp->s_op = &vxfs_super_ops; root = vxfs_iget(sbp, VXFS_ROOT_INO); if (IS_ERR(root)) { ret = PTR_ERR(root); @@ -278,9 +281,9 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) return 0; out_free_ilist: - vxfs_put_fake_inode(infp->vsi_fship); - vxfs_put_fake_inode(infp->vsi_ilist); - vxfs_put_fake_inode(infp->vsi_stilist); + iput(infp->vsi_fship); + iput(infp->vsi_ilist); + iput(infp->vsi_stilist); out: brelse(infp->vsi_bp); kfree(infp); @@ -309,29 +312,23 @@ MODULE_ALIAS("vxfs"); static int __init vxfs_init(void) { - int rv; + int rc = vxfs_ii_cache_init(); - vxfs_inode_cachep = kmem_cache_create("vxfs_inode", - sizeof(struct vxfs_inode_info), 0, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); - if (!vxfs_inode_cachep) - return -ENOMEM; - rv = register_filesystem(&vxfs_fs_type); - if (rv < 0) - kmem_cache_destroy(vxfs_inode_cachep); - return rv; + if (!rc) { + rc = register_filesystem(&vxfs_fs_type); + if (rc < 0) + vxfs_ii_cache_destroy(); + } + printk(KERN_DEBUG "%s: **** %s %s rc %d\n", __func__, __DATE__, __TIME__, rc); + + return rc; } static void __exit vxfs_cleanup(void) { unregister_filesystem(&vxfs_fs_type); - /* - * Make sure all delayed rcu free inodes are flushed before we - * destroy cache. - */ - rcu_barrier(); - kmem_cache_destroy(vxfs_inode_cachep); + vxfs_ii_cache_destroy(); } module_init(vxfs_init); -- 1.7.3.4 > > > Please take a look at the branch above. The only major thing that > > should be missing is your directory code refactoring. > > Thanks. yes, the old readdir has a bug. this time my change logs are > more verbose. > > > -- > Krzysztof Blaszkowski > > -- > To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Krzysztof Blaszkowski -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html