Re: [PATCH v3] fs: Fix data race in inode_set_ctime_to_ts

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 2024-11-22 at 21:06 +0800, Hao-ran Zheng wrote:
> A data race may occur when the function `inode_set_ctime_to_ts()` and
> the function `inode_get_ctime_sec()` are executed concurrently. When
> two threads call `aio_read` and `aio_write` respectively, they will
> be distributed to the read and write functions of the corresponding
> file system respectively. Taking the btrfs file system as an example,
> the `btrfs_file_read_iter` and `btrfs_file_write_iter` functions are
> finally called. These two functions created a data race when they
> finally called `inode_get_ctime_sec()` and `inode_set_ctime_to_ns()`.
> The specific call stack that appears during testing is as follows:
> 
> ============DATA_RACE============
> btrfs_delayed_update_inode+0x1f61/0x7ce0 [btrfs]
> btrfs_update_inode+0x45e/0xbb0 [btrfs]
> btrfs_dirty_inode+0x2b8/0x530 [btrfs]
> btrfs_update_time+0x1ad/0x230 [btrfs]
> touch_atime+0x211/0x440
> filemap_read+0x90f/0xa20
> btrfs_file_read_iter+0xeb/0x580 [btrfs]
> aio_read+0x275/0x3a0
> io_submit_one+0xd22/0x1ce0
> __se_sys_io_submit+0xb3/0x250
> do_syscall_64+0xc1/0x190
> entry_SYSCALL_64_after_hwframe+0x77/0x7f
> ============OTHER_INFO============
> btrfs_write_check+0xa15/0x1390 [btrfs]
> btrfs_buffered_write+0x52f/0x29d0 [btrfs]
> btrfs_do_write_iter+0x53d/0x1590 [btrfs]
> btrfs_file_write_iter+0x41/0x60 [btrfs]
> aio_write+0x41e/0x5f0
> io_submit_one+0xd42/0x1ce0
> __se_sys_io_submit+0xb3/0x250
> do_syscall_64+0xc1/0x190
> entry_SYSCALL_64_after_hwframe+0x77/0x7f
> 
> To address this issue, it is recommended to add WRITE_ONCE
> when writing the `inode->i_ctime_sec` variable.and add
> READ_ONCE when reading in function `inode_get_ctime_sec()`
> and `inode_get_ctime_nsec()`.
> 
> Signed-off-by: Hao-ran Zheng <zhenghaoran@xxxxxxxxxxx>
> ---
> V2 -> V3: Added READ_ONCE in inode_get_ctime_nsec() and addressed review comments
> V1 -> V2: Added READ_ONCE in inode_get_ctime_sec()
> ---
>  include/linux/fs.h | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 3559446279c1..c18f9a9ee5e7 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1655,12 +1655,12 @@ static inline struct timespec64 inode_set_mtime(struct inode *inode,
>  
>  static inline time64_t inode_get_ctime_sec(const struct inode *inode)
>  {
> -	return inode->i_ctime_sec;
> +	return READ_ONCE(inode->i_ctime_sec);
>  }
>  
>  static inline long inode_get_ctime_nsec(const struct inode *inode)
>  {
> -	return inode->i_ctime_nsec;
> +	return READ_ONCE(inode->i_ctime_nsec);
>  }
>  
>  static inline struct timespec64 inode_get_ctime(const struct inode *inode)
> @@ -1674,8 +1674,8 @@ static inline struct timespec64 inode_get_ctime(const struct inode *inode)
>  static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
>  						      struct timespec64 ts)
>  {
> -	inode->i_ctime_sec = ts.tv_sec;
> -	inode->i_ctime_nsec = ts.tv_nsec;
> +	WRITE_ONCE(inode->i_ctime_sec, ts.tv_sec);
> +	WRITE_ONCE(inode->i_ctime_nsec, ts.tv_nsec);
>  	return ts;
>  }
>  

Looks reasonable. There are also bare fetches and stores of the
i_ctime_sec field in inode_set_ctime_current(). Do we need something
like this in addition to the above?


diff --git a/fs/inode.c b/fs/inode.c
index b13b778257ae..d869ee6f1c6b 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2788,7 +2788,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
 	 */
 	cns = smp_load_acquire(&inode->i_ctime_nsec);
 	if (cns & I_CTIME_QUERIED) {
-		struct timespec64 ctime = { .tv_sec = inode->i_ctime_sec,
+		struct timespec64 ctime = { .tv_sec = READ_ONCE(inode->i_ctime_sec),
 					    .tv_nsec = cns & ~I_CTIME_QUERIED };
 
 		if (timespec64_compare(&now, &ctime) <= 0) {
@@ -2809,7 +2809,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
 	/* Try to swap the nsec value into place. */
 	if (try_cmpxchg(&inode->i_ctime_nsec, &cur, now.tv_nsec)) {
 		/* If swap occurred, then we're (mostly) done */
-		inode->i_ctime_sec = now.tv_sec;
+		WRITE_ONCE(inode->i_ctime_sec, now.tv_sec);
 		trace_ctime_ns_xchg(inode, cns, now.tv_nsec, cur);
 		mgtime_counter_inc(mg_ctime_swaps);
 	} else {
@@ -2824,7 +2824,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
 			goto retry;
 		}
 		/* Otherwise, keep the existing ctime */
-		now.tv_sec = inode->i_ctime_sec;
+		now.tv_sec = READ_ONCE(inode->i_ctime_sec);
 		now.tv_nsec = cur & ~I_CTIME_QUERIED;
 	}
 out:






[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux