Re: [PATCH 07/19] iomap: use a srcmap for a read-modify-write I/O

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, 2019-09-09 at 20:27 +0200, Christoph Hellwig wrote:
> From: Goldwyn Rodrigues <rgoldwyn@xxxxxxxx>
> 
> The srcmap is used to identify where the read is to be performed
> from.
> It is passed to ->iomap_begin, which can fill it in if we need to
> read
> data for partially written blocks from a different location than the
> write target.  The srcmap is only supported for buffered writes so
> far.
> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@xxxxxxxx>
> [hch: merged two patches, removed the IOMAP_F_COW flag, use iomap as
>       srcmap if not set, adjust length down to srcmap end as well]
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> ---
>  fs/dax.c               |  9 ++++--
>  fs/ext2/inode.c        |  2 +-
>  fs/ext4/inode.c        |  2 +-
>  fs/gfs2/bmap.c         |  3 +-
>  fs/iomap/apply.c       | 23 +++++++++++----
>  fs/iomap/buffered-io.c | 65 +++++++++++++++++++++++-----------------
> --
>  fs/iomap/direct-io.c   |  2 +-
>  fs/iomap/fiemap.c      |  4 +--
>  fs/iomap/seek.c        |  4 +--
>  fs/iomap/swapfile.c    |  3 +-
>  fs/xfs/xfs_iomap.c     |  9 ++++--
>  include/linux/iomap.h  |  5 ++--
>  12 files changed, 79 insertions(+), 52 deletions(-)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index a237141d8787..8016be24bbd9 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1090,7 +1090,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range);
>  
>  static loff_t
>  dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void
> *data,
> -		struct iomap *iomap)
> +		struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct block_device *bdev = iomap->bdev;
>  	struct dax_device *dax_dev = iomap->dax_dev;
> @@ -1248,6 +1248,7 @@ static vm_fault_t dax_iomap_pte_fault(struct
> vm_fault *vmf, pfn_t *pfnp,
>  	unsigned long vaddr = vmf->address;
>  	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
>  	struct iomap iomap = { 0 };
> +	struct iomap srcmap = { 0 };
>  	unsigned flags = IOMAP_FAULT;
>  	int error, major = 0;
>  	bool write = vmf->flags & FAULT_FLAG_WRITE;
> @@ -1292,7 +1293,7 @@ static vm_fault_t dax_iomap_pte_fault(struct
> vm_fault *vmf, pfn_t *pfnp,
>  	 * the file system block size to be equal the page size,
> which means
>  	 * that we never have to deal with more than a single extent
> here.
>  	 */
> -	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags,
> &iomap);
> +	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags,
> &iomap, &srcmap);
>  	if (iomap_errp)
>  		*iomap_errp = error;
>  	if (error) {
> @@ -1472,6 +1473,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct
> vm_fault *vmf, pfn_t *pfnp,
>  	struct inode *inode = mapping->host;
>  	vm_fault_t result = VM_FAULT_FALLBACK;
>  	struct iomap iomap = { 0 };
> +	struct iomap srcmap = { 0 };
>  	pgoff_t max_pgoff;
>  	void *entry;
>  	loff_t pos;
> @@ -1546,7 +1548,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct
> vm_fault *vmf, pfn_t *pfnp,
>  	 * to look up our filesystem block.
>  	 */
>  	pos = (loff_t)xas.xa_index << PAGE_SHIFT;
> -	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags,
> &iomap);
> +	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags,
> &iomap,
> +			&srcmap);
>  	if (error)
>  		goto unlock_entry;
>  
> diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
> index 7004ce581a32..467c13ff6b40 100644
> --- a/fs/ext2/inode.c
> +++ b/fs/ext2/inode.c
> @@ -801,7 +801,7 @@ int ext2_get_block(struct inode *inode, sector_t
> iblock,
>  
>  #ifdef CONFIG_FS_DAX
>  static int ext2_iomap_begin(struct inode *inode, loff_t offset,
> loff_t length,
> -		unsigned flags, struct iomap *iomap)
> +		unsigned flags, struct iomap *iomap, struct iomap
> *srcmap)
>  {
>  	unsigned int blkbits = inode->i_blkbits;
>  	unsigned long first_block = offset >> blkbits;
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 420fe3deed39..e2116e8228d2 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3453,7 +3453,7 @@ static bool ext4_inode_datasync_dirty(struct
> inode *inode)
>  }
>  
>  static int ext4_iomap_begin(struct inode *inode, loff_t offset,
> loff_t length,
> -			    unsigned flags, struct iomap *iomap)
> +		unsigned flags, struct iomap *iomap, struct iomap
> *srcmap)
>  {
>  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
>  	unsigned int blkbits = inode->i_blkbits;
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index 79581b9bdebb..0bf8e8fa82bd 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -1123,7 +1123,8 @@ static int gfs2_iomap_begin_write(struct inode
> *inode, loff_t pos,
>  }
>  
>  static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t
> length,
> -			    unsigned flags, struct iomap *iomap)
> +			    unsigned flags, struct iomap *iomap,
> +			    struct iomap *srcmap)
>  {
>  	struct gfs2_inode *ip = GFS2_I(inode);
>  	struct metapath mp = { .mp_aheight = 1, };
> diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c
> index 54c02aecf3cd..67efd86675e6 100644
> --- a/fs/iomap/apply.c
> +++ b/fs/iomap/apply.c
> @@ -24,7 +24,9 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t
> length, unsigned flags,
>  		const struct iomap_ops *ops, void *data,
> iomap_actor_t actor)
>  {
>  	struct iomap iomap = { 0 };
> +	struct iomap srcmap = { 0 };
>  	loff_t written = 0, ret;
> +	u64 end;
>  
>  	/*
>  	 * Need to map a range from start position for length bytes.
> This can
> @@ -38,7 +40,7 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t
> length, unsigned flags,
>  	 * expose transient stale data. If the reserve fails, we can
> safely
>  	 * back out at this point as there is nothing to undo.
>  	 */
> -	ret = ops->iomap_begin(inode, pos, length, flags, &iomap);
> +	ret = ops->iomap_begin(inode, pos, length, flags, &iomap,
> &srcmap);
>  	if (ret)
>  		return ret;
>  	if (WARN_ON(iomap.offset > pos))
> @@ -50,15 +52,26 @@ iomap_apply(struct inode *inode, loff_t pos,
> loff_t length, unsigned flags,
>  	 * Cut down the length to the one actually provided by the
> filesystem,
>  	 * as it might not be able to give us the whole size that we
> requested.
>  	 */
> -	if (iomap.offset + iomap.length < pos + length)
> -		length = iomap.offset + iomap.length - pos;
> +	end = iomap.offset + iomap.length;
> +	if (srcmap.type)
> +		end = min(end, srcmap.offset + srcmap.length);
> +	if (pos + length > end)
> +		length = end - pos;
>  


Yes, that looks more correct. However, can we be smart and not bother
setting the minimum of end and srcmap.offset + srcmap.length if it is
not required? ie in situations where end coincides with block boundary.
 Or if srcmap.length falls short, until the last block boundary of
iomap? 

I did think about this scenario. This case is specific to CoW and
thought this is best handled by filesystem's iomap_begin(). If this
goes in, the filesystems would have to "falsify" srcmap length
information to maximize the amount of I/O that goes in one iteration.

-- 
Goldwyn




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux