Re: [PATCH v3] UDF: Add support for O_DIRECT

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



  Hello,

  first, you have my address wrong (you had suze instead of suse) which is
why I wasn't getting your email and not replying (missed the patch in LKML
traffic). Second, it's good to CC also linux-fsdevel for UDF related
matters (I tend to use that for UDF announcements etc. so people caring
about UDF can watch there and don't have to read high-volume LKML).

On Tue 04-09-12 10:49:39, Ian Abbott wrote:
> Add support for the O_DIRECT flag.  There are two cases to deal with:
  Out of curiosity, do you have a use for this feature or is it mostly
academic interest?

> 1. Small files stored in the ICB (inode control block?): just return 0
> from the new udf_adinicb_direct_IO() handler to fall back to buffered
> I/O.  For direct writes, there is a "gotcha" to deal with when
> generic_file_direct_write() in mm/filemap.c invalidates the pages.  In
> the udf_adinicb_writepage() handler, only part of the page data will be
> valid and the rest will be zeroed out, so only copy the valid part into
> the ICB.  (This is actually a bit inefficient as udf_adinicb_write_end()
> will have already copied the data into the ICB once, but it's pretty
> likely that the file will grow to the point where its data can no longer
> be stored in the ICB and will be moved to a different area of the file
> system.  At that point, a different direct_IO handler will be used - see
> below.)
  Sorry, I didn't quite get this. What is the problem with copying all the
data to inode in udf_adinicb_writepage() as it is now?

								Honza

> 2. Larger files, not stored in the ICB: nothing special here.  Just call
> blockdev_direct_IO() from our new udf_direct_IO() handler and tidy up
> any blocks instantiated outside i_size on error.  This is pretty
> standard.  Factor error handling code out of udf_write_begin() into new
> function udf_write_failed() so it can also be called by udf_direct_IO().
> 
> Also change the whitespace in udf_aops and udf_adinicb_aops to make them
> a bit neater.
> 
> Signed-off-by: Ian Abbott <abbotti@xxxxxxxxx>
> ---
> v2: Rework error handling in udf_direct_IO to avoid calling deprecated
>     vmtruncate().
> v3: Rebased to next-20120904.
> ---
>  fs/udf/file.c  | 29 +++++++++++++++++++++++++----
>  fs/udf/inode.c | 52 ++++++++++++++++++++++++++++++++++++----------------
>  2 files changed, 61 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/udf/file.c b/fs/udf/file.c
> index 7f3f7ba..f5f9ddd 100644
> --- a/fs/udf/file.c
> +++ b/fs/udf/file.c
> @@ -34,6 +34,7 @@
>  #include <linux/errno.h>
>  #include <linux/pagemap.h>
>  #include <linux/buffer_head.h>
> +#include <linux/writeback.h>
>  #include <linux/aio.h>
>  
>  #include "udf_i.h"
> @@ -64,12 +65,23 @@ static int udf_adinicb_writepage(struct page *page,
>  	struct inode *inode = page->mapping->host;
>  	char *kaddr;
>  	struct udf_inode_info *iinfo = UDF_I(inode);
> +	loff_t start, end, page_start, page_offset;
>  
>  	BUG_ON(!PageLocked(page));
>  
>  	kaddr = kmap(page);
> -	memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, inode->i_size);
> -	mark_inode_dirty(inode);
> +	/* The beginning and/or end of the page data is likely to be invalid
> +	 * for O_DIRECT, so only copy the valid part. */
> +	page_start = (loff_t)page->index << PAGE_CACHE_SHIFT;
> +	start = max(page_start, wbc->range_start);
> +	end = min3(page_start + (loff_t)PAGE_CACHE_SIZE - 1,
> +		   wbc->range_end, inode->i_size - 1);
> +	if (likely(start <= end)) {
> +		page_offset = start - page_start;
> +		memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + start,
> +		       kaddr + page_offset, (end + 1 - start));
> +		mark_inode_dirty(inode);
> +	}
>  	SetPageUptodate(page);
>  	kunmap(page);
>  	unlock_page(page);
> @@ -95,11 +107,20 @@ static int udf_adinicb_write_end(struct file *file,
>  	return simple_write_end(file, mapping, pos, len, copied, page, fsdata);
>  }
>  
> +static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
> +				     const struct iovec *iov,
> +				     loff_t offset, unsigned long nr_segs)
> +{
> +	/* Fallback to buffered I/O. */
> +	return 0;
> +}
> +
>  const struct address_space_operations udf_adinicb_aops = {
>  	.readpage	= udf_adinicb_readpage,
>  	.writepage	= udf_adinicb_writepage,
> -	.write_begin = simple_write_begin,
> -	.write_end = udf_adinicb_write_end,
> +	.write_begin	= simple_write_begin,
> +	.write_end	= udf_adinicb_write_end,
> +	.direct_IO	= udf_adinicb_direct_IO,
>  };
>  
>  static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
> diff --git a/fs/udf/inode.c b/fs/udf/inode.c
> index 1a0588e..b905448 100644
> --- a/fs/udf/inode.c
> +++ b/fs/udf/inode.c
> @@ -95,6 +95,22 @@ void udf_evict_inode(struct inode *inode)
>  	}
>  }
>  
> +static void udf_write_failed(struct address_space *mapping, loff_t to)
> +{
> +	struct inode *inode = mapping->host;
> +	struct udf_inode_info *iinfo = UDF_I(inode);
> +	loff_t isize = inode->i_size;
> +
> +	if (to > isize) {
> +		truncate_pagecache(inode, to, isize);
> +		if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
> +			down_write(&iinfo->i_data_sem);
> +			udf_truncate_extents(inode);
> +			up_write(&iinfo->i_data_sem);
> +		}
> +	}
> +}
> +
>  static int udf_writepage(struct page *page, struct writeback_control *wbc)
>  {
>  	return block_write_full_page(page, udf_get_block, wbc);
> @@ -124,21 +140,24 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
>  	int ret;
>  
>  	ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block);
> -	if (unlikely(ret)) {
> -		struct inode *inode = mapping->host;
> -		struct udf_inode_info *iinfo = UDF_I(inode);
> -		loff_t isize = inode->i_size;
> -
> -		if (pos + len > isize) {
> -			truncate_pagecache(inode, pos + len, isize);
> -			if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
> -				down_write(&iinfo->i_data_sem);
> -				udf_truncate_extents(inode);
> -				up_write(&iinfo->i_data_sem);
> -			}
> -		}
> -	}
> +	if (unlikely(ret))
> +		udf_write_failed(mapping, pos + len);
> +	return ret;
> +}
>  
> +static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
> +			     const struct iovec *iov,
> +			     loff_t offset, unsigned long nr_segs)
> +{
> +	struct file *file = iocb->ki_filp;
> +	struct address_space *mapping = file->f_mapping;
> +	struct inode *inode = mapping->host;
> +	ssize_t ret;
> +
> +	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
> +				  udf_get_block);
> +	if (unlikely(ret < 0 && (rw && WRITE)))
> +		udf_write_failed(mapping, offset + iov_length(iov, nr_segs));
>  	return ret;
>  }
>  
> @@ -152,8 +171,9 @@ const struct address_space_operations udf_aops = {
>  	.readpages	= udf_readpages,
>  	.writepage	= udf_writepage,
>  	.writepages	= udf_writepages,
> -	.write_begin		= udf_write_begin,
> -	.write_end		= generic_write_end,
> +	.write_begin	= udf_write_begin,
> +	.write_end	= generic_write_end,
> +	.direct_IO	= udf_direct_IO,
>  	.bmap		= udf_bmap,
>  };
>  
> -- 
> 1.7.12
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
-- 
Jan Kara <jack@xxxxxxx>
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux