Re: [PATCH v5] Return bytes transferred for partial direct I/O

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Nov 22, 2017 at 06:29:01AM -0600, Goldwyn Rodrigues wrote:
> From: Goldwyn Rodrigues <rgoldwyn@xxxxxxxx>
> 
> In case direct I/O encounters an error midway, it returns the error.
> Instead it should be returning the number of bytes transferred so far.
> 
> Test case for filesystems (with ENOSPC):
> 1. Create an almost full filesystem
> 2. Create a file, say /mnt/lastfile, until the filesystem is full.
> 3. Direct write() with count > sizeof /mnt/lastfile.
> 
> Result: write() returns -ENOSPC. However, file content has data written
> in step 3.
> 
> Changes since v1:
>  - incorporated iomap and block devices
> 
> Changes since v2:
>  - realized that file size was not increasing when performing a (partial)
>    direct I/O because end_io function was receiving the error instead of
>    size. Fixed.
> 
> Changes since v3:
>  - [hch] initialize transferred with dio->size and use transferred instead
>    of dio->size.
> 
> Changes since v4:
>  - Refreshed to v4.14
> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@xxxxxxxx>
> Reviewed-by: Christoph Hellwig <hch@xxxxxx>

Hmmm, I shoved this into 4.15-rc6 and saw this horrible splat running
generic/472 (that is the test that goes with this patch, right?) on XFS:

[ 2545.534331] XFS: Assertion failed: ret < 0 || ret == count, file: /storage/home/djwong/cdev/work/linux-xfs/fs/xfs/xfs_file.c, line: 598
[ 2545.538177] WARNING: CPU: 2 PID: 16340 at /storage/home/djwong/cdev/work/linux-xfs/fs/xfs/xfs_message.c:116 assfail+0x58/0x90 [xfs]
[ 2545.539748] Modules linked in: xfs libcrc32c dax_pmem device_dax nd_pmem sch_fq_codel af_packet [last unloaded: xfs]
[ 2545.541191] CPU: 2 PID: 16340 Comm: xfs_io Not tainted 4.15.0-rc6-xfsx #5
[ 2545.542119] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.10.2-1ubuntu1djwong0 04/01/2014
[ 2545.543414] RIP: 0010:assfail+0x58/0x90 [xfs]
[ 2545.544032] RSP: 0018:ffff88002b56f990 EFLAGS: 00010246
[ 2545.544760] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
[ 2545.545721] RDX: 0000000000000004 RSI: 000000000000000a RDI: ffffffffa0a1eac4
[ 2545.546681] RBP: ffff88002b56fdd0 R08: ffff88002b56f6e0 R09: 0000000000000000
[ 2545.547646] R10: 1ffff1000fe14f6e R11: 0000000000000000 R12: 1ffff100056adf3a
[ 2545.548613] R13: 0000000000400000 R14: ffff88002b56fc38 R15: 0000000000000000
[ 2545.549579] FS:  00007fc1f601e700(0000) GS:ffff88007f000000(0000) knlGS:0000000000000000
[ 2545.550666] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2545.551460] CR2: 00007fc1f4bac000 CR3: 0000000075165001 CR4: 00000000001606e0
[ 2545.552423] Call Trace:
[ 2545.552850]  xfs_file_dio_aio_write+0x6e3/0xe40 [xfs]
[ 2545.553581]  ? kvm_clock_read+0x1f/0x30
[ 2545.554191]  ? xfs_file_dax_write+0x6a0/0x6a0 [xfs]
[ 2545.554885]  ? kvm_clock_read+0x1f/0x30
[ 2545.555439]  ? kvm_sched_clock_read+0x5/0x10
[ 2545.556046]  ? sched_clock+0x5/0x10
[ 2545.556553]  ? sched_clock_cpu+0x18/0x1e0
[ 2545.557136]  ? __lock_acquire+0xbbf/0x40f0
[ 2545.557719]  ? kvm_clock_read+0x1f/0x30
[ 2545.558272]  ? sched_clock+0x5/0x10
[ 2545.558848]  xfs_file_write_iter+0x34a/0xb50 [xfs]
[ 2545.559544]  do_iter_readv_writev+0x44c/0x700
[ 2545.560170]  ? _copy_from_user+0x96/0xd0
[ 2545.560729]  ? vfs_dedupe_file_range+0x820/0x820
[ 2545.561398]  do_iter_write+0x12c/0x550
[ 2545.561939]  ? rcu_lockdep_current_cpu_online+0xd7/0x120
[ 2545.562682]  ? rcu_read_lock_sched_held+0xa3/0x120
[ 2545.563366]  vfs_writev+0x175/0x2d0
[ 2545.563874]  ? vfs_iter_write+0xc0/0xc0
[ 2545.564434]  ? get_lock_stats+0x16/0x90
[ 2545.564992]  ? lock_downgrade+0x580/0x580
[ 2545.565583]  ? __fget+0x1e7/0x350
[ 2545.566077]  ? entry_SYSCALL_64_fastpath+0x5/0x96
[ 2545.566744]  ? do_pwritev+0x125/0x160
[ 2545.567277]  do_pwritev+0x125/0x160
[ 2545.567787]  entry_SYSCALL_64_fastpath+0x1f/0x96
[ 2545.568437] RIP: 0033:0x7fc1f56d6110
[ 2545.568948] RSP: 002b:00007ffc07f11340 EFLAGS: 00000293 ORIG_RAX: 0000000000000128
[ 2545.569971] RAX: ffffffffffffffda RBX: 0000000001ef9170 RCX: 00007fc1f56d6110
[ 2545.570932] RDX: 0000000000000001 RSI: 0000000001ef9170 RDI: 0000000000000003
[ 2545.571897] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000400000
[ 2545.572857] R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000000000
[ 2545.573818] R13: 0000000000000000 R14: 0000000000400000 R15: 0000000001ef9170
[ 2545.574802] Code: df 48 89 fa 48 c1 ea 03 0f b6 04 02 48 89 fa 83 e2 07 38 d0 7f 04 84 c0 75 15 0f b6 1d d6 51 65 00 80 fb 01 77 1e 83 e3 01 75 0b <0f> ff 5b c3 e8 0f 59 1b e1 eb e4 0f 0b 48 c7 c7 20 40 8c a0 e8 
[ 2545.577451] ---[ end trace 7bcb2da267d05648 ]---

I suspect that all we need to do is remove the ASSERT(ret...) at the bottom of
xfs_file_dio_aio_write?

--D

> ---
>  fs/block_dev.c |  2 +-
>  fs/direct-io.c |  4 +---
>  fs/iomap.c     | 20 ++++++++++----------
>  3 files changed, 12 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index 789f55e851ae..4d3e4603f687 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -421,7 +421,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
>  
>  	if (!ret)
>  		ret = blk_status_to_errno(dio->bio.bi_status);
> -	if (likely(!ret))
> +	if (likely(dio->size))
>  		ret = dio->size;
>  
>  	bio_put(&dio->bio);
> diff --git a/fs/direct-io.c b/fs/direct-io.c
> index b53e66d9abd7..a8d2710f4ee9 100644
> --- a/fs/direct-io.c
> +++ b/fs/direct-io.c
> @@ -262,8 +262,6 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
>  		ret = dio->page_errors;
>  	if (ret == 0)
>  		ret = dio->io_error;
> -	if (ret == 0)
> -		ret = transferred;
>  
>  	if (dio->end_io) {
>  		// XXX: ki_pos??
> @@ -310,7 +308,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
>  	}
>  
>  	kmem_cache_free(dio_cache, dio);
> -	return ret;
> +	return transferred ? transferred : ret;
>  }
>  
>  static void dio_aio_complete_work(struct work_struct *work)
> diff --git a/fs/iomap.c b/fs/iomap.c
> index d4801f8dd4fd..6e37acba578d 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -715,23 +715,23 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
>  	struct kiocb *iocb = dio->iocb;
>  	struct inode *inode = file_inode(iocb->ki_filp);
>  	loff_t offset = iocb->ki_pos;
> -	ssize_t ret;
> +	ssize_t err;
> +	ssize_t transferred = dio->size;
>  
>  	if (dio->end_io) {
> -		ret = dio->end_io(iocb,
> -				dio->error ? dio->error : dio->size,
> +		err = dio->end_io(iocb,
> +				transferred ? transferred : dio->error,
>  				dio->flags);
>  	} else {
> -		ret = dio->error;
> +		err = dio->error;
>  	}
>  
> -	if (likely(!ret)) {
> -		ret = dio->size;
> +	if (likely(transferred)) {
>  		/* check for short read */
> -		if (offset + ret > dio->i_size &&
> +		if (offset + transferred > dio->i_size &&
>  		    !(dio->flags & IOMAP_DIO_WRITE))
> -			ret = dio->i_size - offset;
> -		iocb->ki_pos += ret;
> +			transferred = dio->i_size - offset;
> +		iocb->ki_pos += transferred;
>  	}
>  
>  	/*
> @@ -758,7 +758,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
>  	inode_dio_end(file_inode(iocb->ki_filp));
>  	kfree(dio);
>  
> -	return ret;
> +	return transferred ? transferred : err;
>  }
>  
>  static void iomap_dio_complete_work(struct work_struct *work)
> -- 
> 2.14.2
> 



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux