In the syscall test of UnixBench, performance regression occurred due to false sharing. The lock and atomic members, including file::f_lock, file::f_count and file::f_pos_lock are highly contended and frequently updated in the high-concurrency test scenarios. perf c2c indentified one affected read access, file::f_op. To prevent false sharing, the layout of file struct is changed as following (A) f_lock, f_count and f_pos_lock are put together to share the same cache line. (B) The read mostly members, including f_path, f_inode, f_op are put into a separate cache line. (C) f_mode is put together with f_count, since they are used frequently at the same time. The optimization has been validated in the syscall test of UnixBench. performance gain is 30~50%, when the number of parallel jobs is 16. Signed-off-by: chenzhiyin <zhiyin.chen@xxxxxxxxx> --- include/linux/fs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 21a981680856..01c55e3a1b96 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -962,23 +962,23 @@ struct file { struct rcu_head f_rcuhead; unsigned int f_iocb_flags; }; - struct path f_path; - struct inode *f_inode; /* cached value */ - const struct file_operations *f_op; /* * Protects f_ep, f_flags. * Must not be taken from IRQ context. */ spinlock_t f_lock; - atomic_long_t f_count; - unsigned int f_flags; fmode_t f_mode; + atomic_long_t f_count; struct mutex f_pos_lock; + unsigned int f_flags; loff_t f_pos; struct fown_struct f_owner; const struct cred *f_cred; struct file_ra_state f_ra; + struct path f_path; + struct inode *f_inode; /* cached value */ + const struct file_operations *f_op; u64 f_version; #ifdef CONFIG_SECURITY -- 2.39.1