new_inode() dirties a contended cache line to get inode numbers. Solve this problem by providing to each cpu a per_cpu variable, feeded by the shared last_ino, but once every 1024 allocations. This reduce contention on the shared last_ino. Note : last_ino_get() method must be called with preemption disabled on SMP. (socket8 bench result : no differences, but this is because inode_lock cost is too heavy) Signed-off-by: Eric Dumazet <dada1@xxxxxxxxxxxxx> --- fs/inode.c | 27 +++++++++++++++++++++++++-- 1 files changed, 25 insertions(+), 2 deletions(-)
diff --git a/fs/inode.c b/fs/inode.c index 0487ddb..d850050 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -534,6 +534,30 @@ repeat: return node ? inode : NULL; } +#ifdef CONFIG_SMP +/* + * each cpu owns a block of 1024 numbers. + * The global 'last_ino' is dirtied once every 1024 allocations + */ +static DEFINE_PER_CPU(int, cpu_ino_alloc) = {0}; +static int last_ino_get(void) +{ + static atomic_t last_ino; + int *ptr = &__raw_get_cpu_var(cpu_ino_alloc); + + if (unlikely((*ptr & 1023) == 0)) + *ptr = atomic_add_return(1024, &last_ino); + return --(*ptr); +} +#else +static int last_ino_get(void) +{ + static int last_ino; + + return ++last_ino; +} +#endif + /** * new_inode - obtain an inode * @sb: superblock @@ -553,7 +577,6 @@ struct inode *new_inode(struct super_block *sb) * error if st_ino won't fit in target struct field. Use 32bit counter * here to attempt to avoid that. */ - static unsigned int last_ino; struct inode * inode; spin_lock_prefetch(&inode_lock); @@ -564,7 +587,7 @@ struct inode *new_inode(struct super_block *sb) inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); list_add(&inode->i_sb_list, &sb->s_inodes); - inode->i_ino = ++last_ino; + inode->i_ino = last_ino_get(); inode->i_state = 0; spin_unlock(&inode_lock); }