Most mnt_want/drop_write() pairs are really close in the code; they aren't held for very long. So, in practice is hard to get bounced between cpus between when you mnt_want_write() and mnt_drop_write(). The exception to this is the pair in may_open() and __fput(). Between those two it is pretty common to move between cpus. During a kernel compile of around 900 files on a 4-way, I saw it happen ~400 times. This patch assumes that the cpu doing the allocating of the 'struct file' is also the one doing the mnt_want_write(). It is OK that it is wrong sometimes, it just means that we regress back to the spinlock-protected search of all of the cpus' counts. My kernel compile from before went from 400 misses during a compile to just 20 with this patch. It might also be helpful to do the writer count per-node which would _greatly_ decrease the number of migrations that we see. --- lxc-dave/fs/file_table.c | 2 +- lxc-dave/fs/namespace.c | 17 +++++++++-------- lxc-dave/fs/open.c | 4 ++++ lxc-dave/include/linux/fs.h | 1 + lxc-dave/include/linux/mount.h | 1 + 5 files changed, 16 insertions(+), 9 deletions(-) diff -puN fs/file_table.c~fput-cpu fs/file_table.c --- lxc/fs/file_table.c~fput-cpu 2007-02-20 17:59:48.000000000 -0800 +++ lxc-dave/fs/file_table.c 2007-02-20 17:59:49.000000000 -0800 @@ -215,7 +215,7 @@ void fastcall __fput(struct file *file) if (file->f_mode & FMODE_WRITE) { put_write_access(inode); if(!special_file(inode->i_mode)) - mnt_drop_write(mnt); + __mnt_drop_write(mnt, file->f_write_cpu); } put_pid(file->f_owner.pid); file_kill(file); diff -puN fs/namespace.c~fput-cpu fs/namespace.c --- lxc/fs/namespace.c~fput-cpu 2007-02-20 17:59:48.000000000 -0800 +++ lxc-dave/fs/namespace.c 2007-02-20 18:00:27.000000000 -0800 @@ -89,8 +89,8 @@ struct vfsmount *alloc_vfsmnt(const char int mnt_want_write(struct vfsmount *mnt) { int ret = 0; - atomic_t *cpu_writecount; int cpu = get_cpu(); + atomic_t *cpu_writecount; retry: /* * Not strictly required, but quick and cheap @@ -122,22 +122,17 @@ out: put_cpu(); return ret; } -EXPORT_SYMBOL_GPL(mnt_want_write); -void mnt_drop_write(struct vfsmount *mnt) +void __mnt_drop_write(struct vfsmount *mnt, int cpu) { static int miss = 0; atomic_t *cpu_writecount; - int cpu; int borrowed = 0; int retries = 0; retry: - cpu = get_cpu(); cpu_writecount = per_cpu_ptr(mnt->writers, cpu); - if (atomic_add_unless(cpu_writecount, -1, 0)) { - put_cpu(); + if (atomic_add_unless(cpu_writecount, -1, 0)) return; - } spin_lock(&vfsmount_lock); /* * Holding the spinlock, and only checking cpus that @@ -167,6 +162,12 @@ retry: if (!borrowed) goto retry; } +void mnt_drop_write(struct vfsmount *mnt) +{ + int cpu = get_cpu(); + __mnt_drop_write(mnt, cpu); + put_cpu(); +} EXPORT_SYMBOL_GPL(mnt_drop_write); /* diff -puN fs/open.c~fput-cpu fs/open.c --- lxc/fs/open.c~fput-cpu 2007-02-20 17:59:48.000000000 -0800 +++ lxc-dave/fs/open.c 2007-02-20 17:59:49.000000000 -0800 @@ -715,6 +715,10 @@ static struct file *__dentry_open(struct f->f_path.mnt = mnt; f->f_pos = 0; f->f_op = fops_get(inode->i_fop); + /* + * This is OK to race because it is just a hint + */ + f->f_write_cpu = smp_processor_id(); file_move(f, &inode->i_sb->s_files); if (!open && f->f_op) diff -puN include/linux/fs.h~fput-cpu include/linux/fs.h --- lxc/include/linux/fs.h~fput-cpu 2007-02-20 17:59:48.000000000 -0800 +++ lxc-dave/include/linux/fs.h 2007-02-20 17:59:49.000000000 -0800 @@ -766,6 +766,7 @@ struct file { struct fown_struct f_owner; unsigned int f_uid, f_gid; struct file_ra_state f_ra; + int f_write_cpu; unsigned long f_version; #ifdef CONFIG_SECURITY diff -puN include/linux/mount.h~fput-cpu include/linux/mount.h --- lxc/include/linux/mount.h~fput-cpu 2007-02-20 17:59:49.000000000 -0800 +++ lxc-dave/include/linux/mount.h 2007-02-20 17:59:49.000000000 -0800 @@ -94,6 +94,7 @@ static inline int __mnt_is_readonly(stru extern int mnt_want_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); +extern void __mnt_drop_write(struct vfsmount *mnt, int cpu); extern void mntput_no_expire(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt); extern void mnt_unpin(struct vfsmount *mnt); _ - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html