Re: [PATCH v2 1/2] iomap: fix zero padding data issue in concurrent append writes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Nov 13, 2024 at 05:19:06PM +0800, Long Li wrote:
> During concurrent append writes to XFS filesystem, zero padding data
> may appear in the file after power failure. This happens due to imprecise
> disk size updates when handling write completion.
> 
> Consider this scenario with concurrent append writes same file:
> 
>   Thread 1:                  Thread 2:
>   ------------               -----------
>   write [A, A+B]
>   update inode size to A+B
>   submit I/O [A, A+BS]
>                              write [A+B, A+B+C]
>                              update inode size to A+B+C
>   <I/O completes, updates disk size to A+B+C>
>   <power failure>
> 
> After reboot, file has zero padding in range [A+B, A+B+C]:
> 
>   |<         Block Size (BS)      >|
>   |DDDDDDDDDDDDDDDD0000000000000000|
>   ^               ^        ^
>   A              A+B      A+B+C (EOF)
> 
>   D = Valid Data
>   0 = Zero Padding
> 
> The issue stems from disk size being set to min(io_offset + io_size,
> inode->i_size) at I/O completion. Since io_offset+io_size is block
> size granularity, it may exceed the actual valid file data size. In
> the case of concurrent append writes, inode->i_size may be larger
> than the actual range of valid file data written to disk, leading to
> inaccurate disk size updates.
> 
> This patch changes the meaning of io_size to represent the size of
> valid data in ioend, while the extent size of ioend can be obtained
> by rounding up based on block size. It ensures more precise disk
> size updates and avoids the zero padding issue.  Another benefit is
> that it makes the xfs_ioend_is_append() check more accurate, which
> can reduce unnecessary end bio callbacks of xfs_end_bio() in certain
> scenarios, such as repeated writes at the file tail without extending
> the file size.
> 
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Signed-off-by: Long Li <leo.lilong@xxxxxxxxxx>


How does this differs from V1? Please, if you are sending a new version, add the
changes you've made since the previous one, so nobody needs to keep comparing
both.

Carlos
 
> ---
>  fs/iomap/buffered-io.c | 21 +++++++++++++++------
>  include/linux/iomap.h  |  7 ++++++-
>  2 files changed, 21 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index ce73d2a48c1e..a2a75876cda6 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -1599,6 +1599,8 @@ EXPORT_SYMBOL_GPL(iomap_finish_ioends);
>  static bool
>  iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next)
>  {
> +	size_t size = iomap_ioend_extent_size(ioend);
> +
>  	if (ioend->io_bio.bi_status != next->io_bio.bi_status)
>  		return false;
>  	if ((ioend->io_flags & IOMAP_F_SHARED) ^
> @@ -1607,7 +1609,7 @@ iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next)
>  	if ((ioend->io_type == IOMAP_UNWRITTEN) ^
>  	    (next->io_type == IOMAP_UNWRITTEN))
>  		return false;
> -	if (ioend->io_offset + ioend->io_size != next->io_offset)
> +	if (ioend->io_offset + size != next->io_offset)
>  		return false;
>  	/*
>  	 * Do not merge physically discontiguous ioends. The filesystem
> @@ -1619,7 +1621,7 @@ iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next)
>  	 * submission so does not point to the start sector of the bio at
>  	 * completion.
>  	 */
> -	if (ioend->io_sector + (ioend->io_size >> 9) != next->io_sector)
> +	if (ioend->io_sector + (size >> 9) != next->io_sector)
>  		return false;
>  	return true;
>  }
> @@ -1636,7 +1638,7 @@ iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends)
>  		if (!iomap_ioend_can_merge(ioend, next))
>  			break;
>  		list_move_tail(&next->io_list, &ioend->io_list);
> -		ioend->io_size += next->io_size;
> +		ioend->io_size = iomap_ioend_extent_size(ioend) + next->io_size;
>  	}
>  }
>  EXPORT_SYMBOL_GPL(iomap_ioend_try_merge);
> @@ -1736,7 +1738,7 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos)
>  		return false;
>  	if (wpc->iomap.type != wpc->ioend->io_type)
>  		return false;
> -	if (pos != wpc->ioend->io_offset + wpc->ioend->io_size)
> +	if (pos != wpc->ioend->io_offset + iomap_ioend_extent_size(wpc->ioend))
>  		return false;
>  	if (iomap_sector(&wpc->iomap, pos) !=
>  	    bio_end_sector(&wpc->ioend->io_bio))
> @@ -1768,6 +1770,8 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
>  {
>  	struct iomap_folio_state *ifs = folio->private;
>  	size_t poff = offset_in_folio(folio, pos);
> +	loff_t isize = i_size_read(inode);
> +	struct iomap_ioend *ioend;
>  	int error;
>  
>  	if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos)) {
> @@ -1778,12 +1782,17 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
>  		wpc->ioend = iomap_alloc_ioend(wpc, wbc, inode, pos);
>  	}
>  
> -	if (!bio_add_folio(&wpc->ioend->io_bio, folio, len, poff))
> +	ioend = wpc->ioend;
> +	if (!bio_add_folio(&ioend->io_bio, folio, len, poff))
>  		goto new_ioend;
>  
>  	if (ifs)
>  		atomic_add(len, &ifs->write_bytes_pending);
> -	wpc->ioend->io_size += len;
> +
> +	ioend->io_size = iomap_ioend_extent_size(ioend) + len;
> +	if (ioend->io_offset + ioend->io_size > isize)
> +		ioend->io_size = isize - ioend->io_offset;
> +
>  	wbc_account_cgroup_owner(wbc, folio, len);
>  	return 0;
>  }
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index f61407e3b121..2984eccfa213 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -330,7 +330,7 @@ struct iomap_ioend {
>  	u16			io_type;
>  	u16			io_flags;	/* IOMAP_F_* */
>  	struct inode		*io_inode;	/* file being written to */
> -	size_t			io_size;	/* size of the extent */
> +	size_t			io_size;	/* size of valid data */
>  	loff_t			io_offset;	/* offset in the file */
>  	sector_t		io_sector;	/* start sector of ioend */
>  	struct bio		io_bio;		/* MUST BE LAST! */
> @@ -341,6 +341,11 @@ static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio)
>  	return container_of(bio, struct iomap_ioend, io_bio);
>  }
>  
> +static inline size_t iomap_ioend_extent_size(struct iomap_ioend *ioend)
> +{
> +	return round_up(ioend->io_size, i_blocksize(ioend->io_inode));
> +}
> +
>  struct iomap_writeback_ops {
>  	/*
>  	 * Required, maps the blocks so that writeback can be performed on
> -- 
> 2.39.2
> 




[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux