Re: [PATCH] zonefs: Fix O_APPEND async write handling

"Darrick J. Wong" <djwong@xxxxxxxxxx> · Wed, 10 Mar 2021 19:36:24 -0800

On Thu, Mar 11, 2021 at 12:22:30PM +0900, Damien Le Moal wrote:
> zonefs updates the size of a sequential zone file inode only on
> completion of direct writes. When executing asynchronous append writes
> (with a file open with O_APPEND or using RWF_APPEND), the use of the
> current inode size in generic_write_checks() to set an iocb offset thus
> leads to unaligned write if an application issues an append write
> operation with another write already being executed.

Ah, I /had/ wondered if setting i_size to the zone size (instead of the
write pointer) would have side effects...

> Fix this problem by introducing zonefs_write_checks() as a modified
> version of generic_write_checks() using the file inode wp_offset for an
> append write iocb offset. Also introduce zonefs_write_check_limits() to
> replace generic_write_check_limits() call. This zonefs special helper
> makes sure that the maximum file limit used is the maximum size of the
> file being accessed.
> 
> Since zonefs_write_checks() already truncates the iov_iter, the calls
> to iov_iter_truncate() in zonefs_file_dio_write() and
> zonefs_file_buffered_write() are removed.
> 
> Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system")
> Cc: <stable@xxxxxxxxxxxxxxx>
> Signed-off-by: Damien Le Moal <damien.lemoal@xxxxxxx>
> ---
>  fs/zonefs/super.c | 76 ++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 66 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
> index b6ff4a21abac..11aa990b3a4c 100644
> --- a/fs/zonefs/super.c
> +++ b/fs/zonefs/super.c
> @@ -727,6 +727,68 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
>  	return ret;
>  }
>  
> +/*
> + * Do not exceed the LFS limits nor the file zone size. If pos is under the
> + * limit it becomes a short access. If it exceeds the limit, return -EFBIG.
> + */
> +static loff_t zonefs_write_check_limits(struct file *file, loff_t pos,
> +					loff_t count)
> +{
> +	struct inode *inode = file_inode(file);
> +	struct zonefs_inode_info *zi = ZONEFS_I(inode);
> +	loff_t limit = rlimit(RLIMIT_FSIZE);
> +	loff_t max_size = zi->i_max_size;
> +
> +	if (limit != RLIM_INFINITY) {
> +		if (pos >= limit) {
> +			send_sig(SIGXFSZ, current, 0);
> +			return -EFBIG;
> +		}
> +		count = min(count, limit - pos);
> +	}
> +
> +	if (!(file->f_flags & O_LARGEFILE))
> +		max_size = min_t(loff_t, MAX_NON_LFS, max_size);
> +
> +	if (unlikely(pos >= max_size))
> +		return -EFBIG;
> +
> +	return min(count, max_size - pos);
> +}
> +
> +static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from)
> +{
> +	struct file *file = iocb->ki_filp;
> +	struct inode *inode = file_inode(file);
> +	struct zonefs_inode_info *zi = ZONEFS_I(inode);
> +	loff_t count;
> +
> +	if (IS_SWAPFILE(inode))
> +		return -ETXTBSY;

...but can zonefs really do swap files now?

--D

> +
> +	if (!iov_iter_count(from))
> +		return 0;
> +
> +	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
> +		return -EINVAL;
> +
> +	if (iocb->ki_flags & IOCB_APPEND) {
> +		if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
> +			return -EINVAL;
> +		mutex_lock(&zi->i_truncate_mutex);
> +		iocb->ki_pos = zi->i_wpoffset;
> +		mutex_unlock(&zi->i_truncate_mutex);
> +	}
> +
> +	count = zonefs_write_check_limits(file, iocb->ki_pos,
> +					  iov_iter_count(from));
> +	if (count < 0)
> +		return count;
> +
> +	iov_iter_truncate(from, count);
> +	return iov_iter_count(from);
> +}
> +
>  /*
>   * Handle direct writes. For sequential zone files, this is the only possible
>   * write path. For these files, check that the user is issuing writes
> @@ -744,8 +806,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
>  	struct super_block *sb = inode->i_sb;
>  	bool sync = is_sync_kiocb(iocb);
>  	bool append = false;
> -	size_t count;
> -	ssize_t ret;
> +	ssize_t ret, count;
>  
>  	/*
>  	 * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
> @@ -763,13 +824,10 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
>  		inode_lock(inode);
>  	}
>  
> -	ret = generic_write_checks(iocb, from);
> -	if (ret <= 0)
> +	count = zonefs_write_checks(iocb, from);
> +	if (count <= 0)
>  		goto inode_unlock;
>  
> -	iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos);
> -	count = iov_iter_count(from);
> -
>  	if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
>  		ret = -EINVAL;
>  		goto inode_unlock;
> @@ -828,12 +886,10 @@ static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
>  		inode_lock(inode);
>  	}
>  
> -	ret = generic_write_checks(iocb, from);
> +	ret = zonefs_write_checks(iocb, from);
>  	if (ret <= 0)
>  		goto inode_unlock;
>  
> -	iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos);
> -
>  	ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops);
>  	if (ret > 0)
>  		iocb->ki_pos += ret;
> -- 
> 2.29.2
>