Improve scalability of files_lock by adding per-cpu, per-sb files lists, protected with per-cpu locking. Effectively turning it into a big-writer lock. Signed-off-by: Nick Piggin <npiggin@xxxxxxx> --- fs/file_table.c | 161 +++++++++++++++++++++++++++++++++++++++-------------- fs/super.c | 16 +++++ include/linux/fs.h | 7 ++ 3 files changed, 143 insertions(+), 41 deletions(-) Index: linux-2.6/fs/file_table.c =================================================================== --- linux-2.6.orig/fs/file_table.c +++ linux-2.6/fs/file_table.c @@ -22,6 +22,7 @@ #include <linux/fsnotify.h> #include <linux/sysctl.h> #include <linux/percpu_counter.h> +#include <linux/percpu.h> #include <asm/atomic.h> @@ -30,7 +31,7 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +static DEFINE_PER_CPU(spinlock_t, files_cpulock); /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -124,6 +125,9 @@ struct file *get_empty_filp(void) goto fail_sec; INIT_LIST_HEAD(&f->f_u.fu_list); +#ifdef CONFIG_SMP + f->f_sb_list_cpu = -1; +#endif atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); f->f_cred = get_cred(cred); @@ -357,42 +361,104 @@ void put_filp(struct file *file) void file_sb_list_add(struct file *file, struct super_block *sb) { - spin_lock(&files_lock); + spinlock_t *lock; + struct list_head *list; +#ifdef CONFIG_SMP + int cpu; +#endif + + lock = &get_cpu_var(files_cpulock); +#ifdef CONFIG_SMP + BUG_ON(file->f_sb_list_cpu != -1); + cpu = smp_processor_id(); + list = per_cpu_ptr(sb->s_files, cpu); + file->f_sb_list_cpu = cpu; +#else + list = &sb->s_files; +#endif + spin_lock(lock); BUG_ON(!list_empty(&file->f_u.fu_list)); - list_add(&file->f_u.fu_list, &sb->s_files); - spin_unlock(&files_lock); + list_add(&file->f_u.fu_list, list); + spin_unlock(lock); + put_cpu_var(files_cpulock); } void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - spin_lock(&files_lock); + spinlock_t *lock; + +#ifdef CONFIG_SMP + BUG_ON(file->f_sb_list_cpu == -1); + lock = &per_cpu(files_cpulock, file->f_sb_list_cpu); + file->f_sb_list_cpu = -1; +#else + lock = &__get_cpu_var(files_cpulock); +#endif + spin_lock(lock); list_del_init(&file->f_u.fu_list); - spin_unlock(&files_lock); + spin_unlock(lock); + } +} + +static void file_list_lock_all(void) +{ + int i; + int nr = 0; + + for_each_possible_cpu(i) { + spinlock_t *lock; + + lock = &per_cpu(files_cpulock, i); + spin_lock_nested(lock, nr); + nr++; + } +} + +static void file_list_unlock_all(void) +{ + int i; + + for_each_possible_cpu(i) { + spinlock_t *lock; + + lock = &per_cpu(files_cpulock, i); + spin_unlock(lock); } } int fs_may_remount_ro(struct super_block *sb) { - struct file *file; + int i; /* Check that no files are currently opened for writing. */ - spin_lock(&files_lock); - list_for_each_entry(file, &sb->s_files, f_u.fu_list) { - struct inode *inode = file->f_path.dentry->d_inode; - - /* File with pending delete? */ - if (inode->i_nlink == 0) - goto too_bad; - - /* Writeable file? */ - if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) - goto too_bad; + file_list_lock_all(); + for_each_possible_cpu(i) { + struct file *file; + struct list_head *list; + +#ifdef CONFIG_SMP + list = per_cpu_ptr(sb->s_files, i); +#else + list = &sb->s_files; +#endif + list_for_each_entry(file, list, f_u.fu_list) { + struct inode *inode = file->f_path.dentry->d_inode; + + /* File with pending delete? */ + if (inode->i_nlink == 0) + goto too_bad; + + /* Writeable file? */ + if (S_ISREG(inode->i_mode) && + (file->f_mode & FMODE_WRITE)) + goto too_bad; + } } - spin_unlock(&files_lock); + file_list_unlock_all(); return 1; /* Tis' cool bro. */ too_bad: - spin_unlock(&files_lock); + file_list_unlock_all(); return 0; } @@ -405,35 +471,46 @@ too_bad: */ void mark_files_ro(struct super_block *sb) { - struct file *f; + int i; retry: - spin_lock(&files_lock); - list_for_each_entry(f, &sb->s_files, f_u.fu_list) { - struct vfsmount *mnt; - if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) - continue; - if (!file_count(f)) - continue; - if (!(f->f_mode & FMODE_WRITE)) - continue; - f->f_mode &= ~FMODE_WRITE; - if (file_check_writeable(f) != 0) - continue; - file_release_write(f); - mnt = mntget(f->f_path.mnt); - /* This can sleep, so we can't hold the spinlock. */ - spin_unlock(&files_lock); - mnt_drop_write(mnt); - mntput(mnt); - goto retry; + file_list_lock_all(); + for_each_possible_cpu(i) { + struct file *f; + struct list_head *list; + +#ifdef CONFIG_SMP + list = per_cpu_ptr(sb->s_files, i); +#else + list = &sb->s_files; +#endif + list_for_each_entry(f, list, f_u.fu_list) { + struct vfsmount *mnt; + if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) + continue; + if (!file_count(f)) + continue; + if (!(f->f_mode & FMODE_WRITE)) + continue; + f->f_mode &= ~FMODE_WRITE; + if (file_check_writeable(f) != 0) + continue; + file_release_write(f); + mnt = mntget(f->f_path.mnt); + /* This can sleep, so we can't hold the spinlock. */ + file_list_unlock_all(); + mnt_drop_write(mnt); + mntput(mnt); + goto retry; + } } - spin_unlock(&files_lock); + file_list_unlock_all(); } void __init files_init(unsigned long mempages) { int n; + int i; filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); @@ -448,5 +525,7 @@ void __init files_init(unsigned long mem if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; files_defer_init(); + for_each_possible_cpu(i) + spin_lock_init(&per_cpu(files_cpulock, i)); percpu_counter_init(&nr_files, 0); } Index: linux-2.6/fs/super.c =================================================================== --- linux-2.6.orig/fs/super.c +++ linux-2.6/fs/super.c @@ -65,7 +65,23 @@ static struct super_block *alloc_super(s INIT_LIST_HEAD(&s->s_dirty); INIT_LIST_HEAD(&s->s_io); INIT_LIST_HEAD(&s->s_more_io); +#ifdef CONFIG_SMP + s->s_files = alloc_percpu(struct list_head); + if (!s->s_files) { + security_sb_free(s); + kfree(s); + s = NULL; + goto out; + } else { + int i; + + for_each_possible_cpu(i) + INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); + } +#else INIT_LIST_HEAD(&s->s_files); +#endif + INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); Index: linux-2.6/include/linux/fs.h =================================================================== --- linux-2.6.orig/include/linux/fs.h +++ linux-2.6/include/linux/fs.h @@ -916,6 +916,9 @@ struct file { #define f_vfsmnt f_path.mnt const struct file_operations *f_op; spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ +#ifdef CONFIG_SMP + int f_sb_list_cpu; +#endif atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; @@ -1337,7 +1340,11 @@ struct super_block { struct list_head s_io; /* parked for writeback */ struct list_head s_more_io; /* parked for more writeback */ struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ +#ifdef CONFIG_SMP + struct list_head *s_files; +#else struct list_head s_files; +#endif /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ struct list_head s_dentry_lru; /* unused dentry lru */ int s_nr_dentry_unused; /* # of dentry on lru */ -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html