On Fri, Jan 7, 2011 at 4:36 AM, Theodore Ts'o <tytso@xxxxxxx> wrote: > Replace the jbd2_inode structure (which is 48 bytes) with a pointer > and only allocate the jbd2_inode when it is needed --- that is, when > the file system has a journal present and the inode has been opened > for writing. This allows us to further slim down the ext4_inode_info > structure. > > Signed-off-by: "Theodore Ts'o" <tytso@xxxxxxx> > --- > > I looked into release the jinode structure in ext4_release_file(), but > that doesn't work because we need to wait until the delayed allocation > is completed. At some point I'll look into adding something that checks > to see if we have finished doing the background writeout, but I'm going > to save that for another patch. > > fs/ext4/ext4.h | 2 +- > fs/ext4/ext4_jbd2.h | 2 +- > fs/ext4/file.c | 22 ++++++++++++++++++++++ > fs/ext4/inode.c | 17 ++++++++++++----- > fs/ext4/super.c | 16 +++++++--------- > fs/jbd2/journal.c | 20 +++++++++++++------- > include/linux/jbd2.h | 20 ++++++++++++++++++-- > 7 files changed, 74 insertions(+), 25 deletions(-) > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index 2569c23..2d20424 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -811,7 +811,7 @@ struct ext4_inode_info { > */ > struct rw_semaphore i_data_sem; > struct inode vfs_inode; > - struct jbd2_inode jinode; > + struct jbd2_inode *jinode; > > struct ext4_ext_cache i_cached_extent; > /* > diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h > index b0bd792..d8b992e 100644 > --- a/fs/ext4/ext4_jbd2.h > +++ b/fs/ext4/ext4_jbd2.h > @@ -253,7 +253,7 @@ static inline int ext4_journal_force_commit(journal_t *journal) > static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) > { > if (ext4_handle_valid(handle)) > - return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode); > + return jbd2_journal_file_inode(handle, EXT4_I(inode)->jinode); > return 0; > } > > diff --git a/fs/ext4/file.c b/fs/ext4/file.c > index 5a5c55d..b1b4884 100644 > --- a/fs/ext4/file.c > +++ b/fs/ext4/file.c > @@ -104,6 +104,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) > { > struct super_block *sb = inode->i_sb; > struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); > + struct ext4_inode_info *ei = EXT4_I(inode); > struct vfsmount *mnt = filp->f_path.mnt; > struct path path; > char buf[64], *cp; > @@ -127,6 +128,27 @@ static int ext4_file_open(struct inode * inode, struct file * filp) > ext4_mark_super_dirty(sb); > } > } > + /* > + * Set up the jbd2_inode if we are opening the inode for > + * writing and the journal is present > + */ > + if (sbi->s_journal && !ei->jinode && (filp->f_mode & FMODE_WRITE)) { > + struct jbd2_inode *jinode = jbd2_alloc_inode(GFP_KERNEL); > + > + spin_lock(&inode->i_lock); > + if (!ei->jinode) { > + if (!jinode) { > + spin_unlock(&inode->i_lock); > + return -ENOMEM; > + } > + ei->jinode = jinode; > + jbd2_journal_init_jbd_inode(ei->jinode, inode); > + jinode = NULL; > + } > + if (unlikely(jinode != NULL)) > + jbd2_free_inode(jinode); actually, I though, jbd2_free_inode would be better off outside the spinlock, but if you're going to do it here you might just as well change this to: + else + jbd2_free_inode(jinode); > + spin_unlock(&inode->i_lock); > + } > return dquot_file_open(inode, filp); > } > > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index d34ddc6..fb5fe73 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -55,10 +55,17 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, > loff_t new_size) > { > trace_ext4_begin_ordered_truncate(inode, new_size); > - return jbd2_journal_begin_ordered_truncate( > - EXT4_SB(inode->i_sb)->s_journal, > - &EXT4_I(inode)->jinode, > - new_size); > + /* > + * If jinode is zero, then we never opened the file for > + * writing, so there's no need to call > + * jbd2_journal_begin_ordered_truncate() since there's no > + * outstanding writes we need to flush. > + */ > + if (!EXT4_I(inode)->jinode) > + return 0; > + return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode), > + EXT4_I(inode)->jinode, > + new_size); > } > > static void ext4_invalidatepage(struct page *page, unsigned long offset); > @@ -4054,7 +4061,7 @@ int ext4_block_truncate_page(handle_t *handle, > if (ext4_should_journal_data(inode)) { > err = ext4_handle_dirty_metadata(handle, inode, bh); > } else { > - if (ext4_should_order_data(inode)) > + if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode) > err = ext4_jbd2_file_inode(handle, inode); > mark_buffer_dirty(bh); > } > diff --git a/fs/ext4/super.c b/fs/ext4/super.c > index f5960d6..1cd4326 100644 > --- a/fs/ext4/super.c > +++ b/fs/ext4/super.c > @@ -818,12 +818,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) > memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); > INIT_LIST_HEAD(&ei->i_prealloc_list); > spin_lock_init(&ei->i_prealloc_lock); > - /* > - * Note: We can be called before EXT4_SB(sb)->s_journal is set, > - * therefore it can be null here. Don't check it, just initialize > - * jinode. > - */ > - jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); > ei->i_reserved_data_blocks = 0; > ei->i_reserved_meta_blocks = 0; > ei->i_allocated_meta_blocks = 0; > @@ -832,6 +826,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) > #ifdef CONFIG_QUOTA > ei->i_reserved_quota = 0; > #endif > + ei->jinode = NULL; > INIT_LIST_HEAD(&ei->i_completed_io_list); > spin_lock_init(&ei->i_completed_io_lock); > ei->cur_aio_dio = NULL; > @@ -900,9 +895,12 @@ void ext4_clear_inode(struct inode *inode) > end_writeback(inode); > dquot_drop(inode); > ext4_discard_preallocations(inode); > - if (EXT4_JOURNAL(inode)) > - jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, > - &EXT4_I(inode)->jinode); > + if (EXT4_I(inode)->jinode) { > + jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), > + EXT4_I(inode)->jinode); > + jbd2_free_inode(EXT4_I(inode)->jinode); > + EXT4_I(inode)->jinode = NULL; > + } > } > > static inline void ext4_show_quota_options(struct seq_file *seq, > diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c > index 2447bd8..9e46869 100644 > --- a/fs/jbd2/journal.c > +++ b/fs/jbd2/journal.c > @@ -94,6 +94,7 @@ EXPORT_SYMBOL(jbd2_journal_file_inode); > EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); > EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); > EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); > +EXPORT_SYMBOL(jbd2_inode_cache); > > static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); > static void __journal_abort_soft (journal_t *journal, int errno); > @@ -2286,17 +2287,19 @@ static void __exit jbd2_remove_jbd_stats_proc_entry(void) > > #endif > > -struct kmem_cache *jbd2_handle_cache; > +struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; > > static int __init journal_init_handle_cache(void) > { > - jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle", > - sizeof(handle_t), > - 0, /* offset */ > - SLAB_TEMPORARY, /* flags */ > - NULL); /* ctor */ > + jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); > if (jbd2_handle_cache == NULL) { > - printk(KERN_EMERG "JBD: failed to create handle cache\n"); > + printk(KERN_EMERG "JBD2: failed to create handle cache\n"); > + return -ENOMEM; > + } > + jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0); > + if (jbd2_inode_cache == NULL) { > + printk(KERN_EMERG "JBD2: failed to create inode cache\n"); > + kmem_cache_destroy(jbd2_handle_cache); > return -ENOMEM; > } > return 0; > @@ -2306,6 +2309,9 @@ static void jbd2_journal_destroy_handle_cache(void) > { > if (jbd2_handle_cache) > kmem_cache_destroy(jbd2_handle_cache); > + if (jbd2_inode_cache) > + kmem_cache_destroy(jbd2_inode_cache); > + > } > > /* > diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h > index 2ae86aa..27e79c2 100644 > --- a/include/linux/jbd2.h > +++ b/include/linux/jbd2.h > @@ -94,7 +94,7 @@ extern void jbd2_free(void *ptr, size_t size); > * > * This is an opaque datatype. > **/ > -typedef struct handle_s handle_t; /* Atomic operation type */ > +typedef struct jbd2_journal_handle handle_t; /* Atomic operation type */ > > > /** > @@ -416,7 +416,7 @@ struct jbd2_revoke_table_s; > * in so it can be fixed later. > */ > > -struct handle_s > +struct jbd2_journal_handle > { > /* Which compound transaction is this update a part of? */ > transaction_t *h_transaction; > @@ -1158,6 +1158,22 @@ static inline void jbd2_free_handle(handle_t *handle) > kmem_cache_free(jbd2_handle_cache, handle); > } > > +/* > + * jbd2_inode management (optional, for those file systems that want to use > + * dynamically allocated jbd2_inode structures) > + */ > +extern struct kmem_cache *jbd2_inode_cache; > + > +static inline struct jbd2_inode *jbd2_alloc_inode(gfp_t gfp_flags) > +{ > + return kmem_cache_alloc(jbd2_inode_cache, gfp_flags); > +} > + > +static inline void jbd2_free_inode(struct jbd2_inode *jinode) > +{ > + kmem_cache_free(jbd2_inode_cache, jinode); > +} > + > /* Primary revoke support */ > #define JOURNAL_REVOKE_DEFAULT_HASH 256 > extern int jbd2_journal_init_revoke(journal_t *, int); > -- > 1.7.3.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html