Re: [RFC][PATCH v2 0/5] Experiments with overlayfs filemap

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jan 24, 2019 at 11:35 PM Amir Goldstein <amir73il@xxxxxxxxx> wrote:
>
> On Thu, Jan 24, 2019 at 7:18 PM Amir Goldstein <amir73il@xxxxxxxxx> wrote:
> >
> > >
> > > Can you please point me in the direction of how to implement proper
> > > aops? Or better yet, send me a demo patch?
> > > I figure we need to call vfs_iter_read/vfs_iter_write of real file
> > > with IOCB_DIRECT for page io? But I am very uncertain about locking
> > > order requirements and whether there is already some infrastructure
> > > that we can use (some splice variant)?
> > >
> > > Any other thoughts?
> > >
> >
> > Miklos,
> >
> > I've made a small progress.
> >
> > Pushed two more commits to ovl-lazy-copyup-wip:
> > ea9a0083d1ed - ovl: readahead upper inode page cache on overlay page fault
> > d1ff6e6366d5 - ovl: implement overlay stacked readpage
>
> Pushed two more:
> 838c119934a2 - ovl: implement write_begin/write_end aops
> 44b9e89154ba - ovl: return overlay inode i_size for stat()
>
> >
> > That implements readpage by reading and copying from upper page cache.
> >
> > With that change, the basic unionmount test on tmpfs passes.
>
> That is not surprising. overlay filemap ops are disabled for upper
> tmpfs in current
> implementation, because tmpfs has no readpage().
>
> > With unionmount over xfs, tests fail because partial page write
> > is not working correctly, although mmap partial write works well.
> >
>
> But that is fixed now, so ./run --ov --samefs also passes over xfs.
> Which says something bad about our test coverage...
>
> > I will try to follow similar pattern to implement writepage by copy to
> > upper page cache.
> >
>
> ...Luckily, I have implemented ./run --ov --recycle way back, otherwise we
> would have had very poor test coverage for persistency of overlayfs writes.
>
> So we currently have 6 xfstests failing on write persistency:
> overlay/018 overlay/032 overlay/044 overlay/047 overlay/050 overlay/051
>
> and 2 unionmount tests (over /base xfs):
> ./run --ov --samefs --recycle rename-new-pop-dir
> ./run --ov --samefs --recycle rename-mass-dir

Great progress.  Thanks!

I don't think you need to mess with dcache flushing, since this is all
kernel (page cache) pages without any possible dcache aliasing issues
that plague user pages.  So a simple copy_page() should do.

It would be even better to do it with O_DIRECT to remove cache
duplication.  Something like the attached...

Thanks,
Miklos
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 8e4056b262db..6f1e50202779 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -35,9 +35,19 @@ static struct file *ovl_open_realfile(const struct file *file,
 	struct inode *inode = file_inode(file);
 	struct file *realfile;
 	const struct cred *old_cred;
+	int flags = file->f_flags | O_NOATIME;
+
+	if (realinode == ovl_inode_upper(inode)) {
+		if (flags & O_WRONLY)
+			flags = (flags & ~O_ACCMODE) | O_RDWR;
+
+		if (realinode->i_mapping->a_ops &&
+		    realinode->i_mapping->a_ops->direct_IO)
+			flags |= O_DIRECT;
+	}
 
 	old_cred = ovl_override_creds(inode->i_sb);
-	realfile = open_with_fake_path(&file->f_path, file->f_flags | O_NOATIME,
+	realfile = open_with_fake_path(&file->f_path, flags,
 				       realinode, current_cred());
 	revert_creds(old_cred);
 
@@ -122,6 +132,9 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
 		return PTR_ERR_OR_ZERO(real->file);
 	}
 
+	if (realinode == ovl_inode_upper(inode))
+		return 0;
+
 	/* Did the flags change since open? */
 	if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME))
 		return ovl_change_flags(real->file, file->f_flags);
@@ -145,7 +158,6 @@ static bool ovl_filemap_support(struct file *file)
 static bool ovl_should_use_filemap_meta(struct file *file, bool meta_only)
 {
 	struct inode *inode = file_inode(file);
-	struct inode *upper;
 	int err;
 
 	if (!ovl_filemap_support(file))
@@ -168,12 +180,6 @@ static bool ovl_should_use_filemap_meta(struct file *file, bool meta_only)
 		}
 	}
 
-	/* Does upper inode support page cache operations? */
-	upper = ovl_inode_upper(inode);
-	if (!upper || unlikely(!upper->i_mapping ||
-			       !upper->i_mapping->a_ops->readpage))
-		return false;
-
 	/*
 	 * If file was opened O_RDWR and @meta_only is true, we use overlay
 	 * inode filemap operations, but defer data copy up further.
@@ -186,7 +192,7 @@ static bool ovl_should_use_filemap_meta(struct file *file, bool meta_only)
 	 * including pure upper inodes, so ovl_sync_fs() can sync all dirty
 	 * overlay inodes without having to sync all upper fs dirty inodes.
 	 */
-	return upper && ovl_has_upperdata(inode);
+	return ovl_inode_upper(inode) && ovl_has_upperdata(inode);
 }
 
 static bool ovl_should_use_filemap(struct file *file)
@@ -404,52 +410,43 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	return ovl_real_fsync(file, start, end, datasync);
 }
 
-static struct page *ovl_real_get_page(struct file *file, pgoff_t index)
-{
-	struct file *realfile = file->private_data;
-	struct page *page;
 
-	page = read_mapping_page(file_inode(realfile)->i_mapping, index, NULL);
-	if (IS_ERR(page))
-		return page;
-	if (!PageUptodate(page)) {
-		put_page(page);
-		return ERR_PTR(-EIO);
-	}
-	lock_page(page);
-
-	return page;
-}
-
-static void ovl_copy_page(struct page *dst_page, struct page *src_page)
+static int ovl_real_readpage(struct file *realfile, struct page *page)
 {
-	void *src_addr = kmap_atomic(src_page);
-	void *dst_addr = kmap_atomic(dst_page);
+	struct bio_vec bvec = {
+		.bv_page = page,
+		.bv_len = PAGE_SIZE,
+		.bv_offset = 0,
+	};
+	loff_t pos = page->index << PAGE_SHIFT;
+	struct iov_iter iter;
+	ssize_t ret;
 
-	flush_dcache_page(src_page);
-	memcpy(dst_addr, src_addr, PAGE_SIZE);
+	iov_iter_bvec(&iter, READ, &bvec, 1, PAGE_SIZE);
 
-	kunmap_atomic(dst_addr);
-	kunmap_atomic(src_addr);
+	ret = vfs_iter_read(realfile, &iter, &pos, 0);
+
+	return ret < 0 ? ret : 0;
 }
 
 static int ovl_do_readpage(struct file *file, struct page *page)
 {
-	struct page *realpage;
-	int ret = 0;
+	int ret;
+	struct fd real;
+	const struct cred *old_cred;
 
-	realpage = ovl_real_get_page(file, page->index);
-	if (!IS_ERR(realpage)) {
-		ovl_copy_page(page, realpage);
-		unlock_page(realpage);
-		put_page(realpage);
-	} else {
-		ret = PTR_ERR(realpage);
-		clear_highpage(page);
-	}
+	ret = ovl_real_fdget(file, &real);
+	if (ret)
+		return ret;
+
+	old_cred = ovl_override_creds(file_inode(file)->i_sb);
+	ret = ovl_real_readpage(real.file, page);
+	revert_creds(old_cred);
+
+	fdput(real);
 
-	flush_dcache_page(page);
-	SetPageUptodate(page);
+	if (!ret)
+		SetPageUptodate(page);
 
 	return ret;
 }
@@ -470,6 +467,7 @@ static int ovl_write_begin(struct file *file, struct address_space *mapping,
 {
 	struct page *page;
 	pgoff_t index;
+	int err;
 
 	index = pos >> PAGE_SHIFT;
 
@@ -477,11 +475,20 @@ static int ovl_write_begin(struct file *file, struct address_space *mapping,
 	if (!page)
 		return -ENOMEM;
 
-	*pagep = page;
 
-	if (!PageUptodate(page) && (len != PAGE_SIZE))
-		return ovl_do_readpage(file, page);
+	if (!PageUptodate(page) && len != PAGE_SIZE) {
+		err = ovl_do_readpage(file, page);
+		if (err) {
+			pr_warn("ovl_do_readpage: %i", err);
+
+			unlock_page(page);
+			put_page(page);
 
+			return -EIO;
+		}
+	}
+
+	*pagep = page;
 	return 0;
 }
 

[Index of Archives]     [Linux Filesystems Devel]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux