Re: [RFC][PATCH v2 0/5] Experiments with overlayfs filemap

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Jan 28, 2019 at 10:17 PM Miklos Szeredi <miklos@xxxxxxxxxx> wrote:

> Trying to put some code together...

Very rudimentary patch attached.  It doens't do direct IO yet, but
demonstrates what I meant about achoring the upper file in the inode.

Also found the trick to actually make writeback work:
super_setup_bdi() call in fill_super...

Thanks,
Miklos
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index c922682ff1b2..7a8a6cd55e1f 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -37,14 +37,6 @@ static struct file *ovl_open_realfile(const struct file *file,
 	const struct cred *old_cred;
 	int flags = file->f_flags | O_NOATIME;
 
-	if (realinode == ovl_inode_upper(inode)) {
-		/* tmpfs has no readpage a_op, so need to read realfile */
-		if ((flags & O_WRONLY) &&
-		    (!realinode->i_mapping ||
-		     !realinode->i_mapping->a_ops->readpage))
-			flags = (flags & ~O_ACCMODE) | O_RDWR;
-	}
-
 	old_cred = ovl_override_creds(inode->i_sb);
 	realfile = open_with_fake_path(&file->f_path, flags,
 				       realinode, current_cred());
@@ -57,6 +49,43 @@ static struct file *ovl_open_realfile(const struct file *file,
 	return realfile;
 }
 
+static struct file *ovl_open_upper(const struct file *file,
+				   struct inode *realinode)
+{
+	struct inode *inode = file_inode(file);
+	struct ovl_inode *oi = OVL_I(inode);
+	struct file *realfile;
+	const struct cred *old_cred;
+	struct path realpath;
+
+	realfile = READ_ONCE(oi->upper_file);
+	if (realfile)
+		return realfile;
+
+	ovl_path_upper(file_dentry(file), &realpath);
+
+	old_cred = ovl_override_creds(inode->i_sb);
+	realfile = open_with_fake_path(&realpath, O_RDWR | O_NOATIME,
+				       realinode, current_cred());
+	revert_creds(old_cred);
+
+	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
+		 file, file, ovl_whatisit(inode, realinode), file->f_flags,
+		 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
+
+	if (!IS_ERR(realfile)) {
+		struct file *old = cmpxchg(&oi->upper_file, NULL, realfile);
+
+		if (old) {
+			fput(realfile);
+			realfile = old;
+		}
+	}
+
+	return realfile;
+}
+
+#if 0
 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
 
 static int ovl_change_flags(struct file *file, unsigned int flags)
@@ -94,13 +123,14 @@ static int ovl_change_flags(struct file *file, unsigned int flags)
 
 	return 0;
 }
+#endif
 
 static bool ovl_filemap_support(const struct file *file)
 {
-	struct ovl_fs *ofs = file_inode(file)->i_sb->s_fs_info;
+	//struct ovl_fs *ofs = file_inode(file)->i_sb->s_fs_info;
 
 	/* TODO: implement aops to upper inode data */
-	return ofs->upper_mnt && ovl_aops.writepage;
+	return true;
 }
 
 static int ovl_file_maybe_copy_up(const struct file *file, bool allow_meta)
@@ -119,15 +149,17 @@ static int ovl_file_maybe_copy_up(const struct file *file, bool allow_meta)
 	return ovl_maybe_copy_up(file_dentry(file), copy_up_flags);
 }
 
-static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
-			       bool allow_meta)
+static int ovl_real_fdget(const struct file *file, struct fd *real)
 {
 	struct inode *inode = file_inode(file);
+	struct ovl_inode *oi = OVL_I(inode);
 	struct inode *realinode;
 	int err;
 
 	real->flags = 0;
-	real->file = file->private_data;
+	real->file = READ_ONCE(oi->upper_file);
+	if (!real->file)
+		real->file = file->private_data;
 
 	/*
 	 * Lazy copy up caches the meta copy upper file on open O_RDWR.
@@ -136,35 +168,28 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
 	 * we may try to open a lower file O_RDWR or perform data operations
 	 * (e.g. fallocate) on the metacopy inode.
 	 */
-	err = ovl_file_maybe_copy_up(file, allow_meta);
+	err = ovl_file_maybe_copy_up(file, false);
 	if (err)
 		return err;
 
-	if (allow_meta)
-		realinode = ovl_inode_real(inode);
-	else
-		realinode = ovl_inode_realdata(inode);
+	realinode = ovl_inode_realdata(inode);
 
 	/* Has it been copied up since we'd opened it? */
 	if (unlikely(file_inode(real->file) != realinode)) {
-		real->flags = FDPUT_FPUT;
-		real->file = ovl_open_realfile(file, realinode);
+		real->file = ovl_open_upper(file, realinode);
 
 		return PTR_ERR_OR_ZERO(real->file);
 	}
 
+#if 0
 	/* Did the flags change since open? */
 	if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME))
 		return ovl_change_flags(real->file, file->f_flags);
+#endif
 
 	return 0;
 }
 
-static int ovl_real_fdget(const struct file *file, struct fd *real)
-{
-	return ovl_real_fdget_meta(file, real, false);
-}
-
 static bool ovl_should_use_filemap_meta(struct file *file, bool allow_meta)
 {
 	struct inode *inode = file_inode(file);
@@ -220,6 +245,7 @@ static int ovl_flush_filemap(struct file *file, loff_t offset, loff_t len)
 static int ovl_open(struct inode *inode, struct file *file)
 {
 	struct file *realfile;
+	struct inode *realinode;
 	int err;
 	bool allow_meta = (file->f_mode & FMODE_WRITE) &&
 			ovl_filemap_support(file);
@@ -228,22 +254,28 @@ static int ovl_open(struct inode *inode, struct file *file)
 	if (err)
 		return err;
 
-	/* No longer need these flags, so don't pass them on to underlying fs */
-	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+	realinode = allow_meta ?
+		ovl_inode_real(inode) : ovl_inode_realdata(inode);
 
-	realfile = ovl_open_realfile(file, allow_meta ? ovl_inode_real(inode) :
-				     ovl_inode_realdata(inode));
-	if (IS_ERR(realfile))
-		return PTR_ERR(realfile);
+	if (realinode == ovl_inode_upper(inode)) {
+		realfile = ovl_open_upper(file, realinode);
+		if (IS_ERR(realfile))
+			return PTR_ERR(realfile);
+	} else {
+		realfile = ovl_open_realfile(file, realinode);
+		if (IS_ERR(realfile))
+			return PTR_ERR(realfile);
 
-	file->private_data = realfile;
+		file->private_data = realfile;
+	}
 
 	return 0;
 }
 
 static int ovl_release(struct inode *inode, struct file *file)
 {
-	fput(file->private_data);
+	if (file->private_data)
+		fput(file->private_data);
 
 	return 0;
 }
@@ -284,6 +316,8 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
 	int ifl = iocb->ki_flags;
 	rwf_t flags = 0;
 
+	WARN_ON(ifl & IOCB_DIRECT);
+
 	if (ifl & IOCB_NOWAIT)
 		flags |= RWF_NOWAIT;
 	if (ifl & IOCB_HIPRI)
@@ -292,6 +326,8 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
 		flags |= RWF_DSYNC;
 	if (ifl & IOCB_SYNC)
 		flags |= RWF_SYNC;
+	if (ifl & IOCB_APPEND)
+		flags |= RWF_APPEND;
 
 	return flags;
 }
@@ -386,6 +422,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 static int ovl_real_fsync(struct file *file, loff_t start, loff_t end,
 			  int datasync)
 {
+#if 0
 	struct fd real;
 	const struct cred *old_cred;
 	int ret;
@@ -404,6 +441,9 @@ static int ovl_real_fsync(struct file *file, loff_t start, loff_t end,
 	fdput(real);
 
 	return ret;
+#endif
+
+	return 0;
 }
 
 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
@@ -770,39 +810,6 @@ const struct file_operations ovl_file_operations = {
 	.remap_file_range	= ovl_remap_file_range,
 };
 
-static struct page *ovl_real_get_page(struct file *realfile, pgoff_t index)
-{
-	struct page *page;
-
-	page = read_mapping_page(file_inode(realfile)->i_mapping, index, NULL);
-	if (IS_ERR(page))
-		return page;
-
-	if (!PageUptodate(page)) {
-		put_page(page);
-		return ERR_PTR(-EIO);
-	}
-
-	lock_page(page);
-
-	return page;
-}
-
-static int ovl_real_copy_page(struct file *realfile, struct page *page)
-{
-	struct page *realpage;
-
-	realpage = ovl_real_get_page(realfile, page->index);
-	if (IS_ERR(realpage))
-		return PTR_ERR(realpage);
-
-	copy_highpage(page, realpage);
-	unlock_page(realpage);
-	put_page(realpage);
-
-	return 0;
-}
-
 static int ovl_real_readpage(struct file *realfile, struct page *page)
 {
 	struct bio_vec bvec = {
@@ -823,16 +830,17 @@ static int ovl_real_readpage(struct file *realfile, struct page *page)
 
 static int ovl_do_readpage(struct file *file, struct page *page)
 {
-	struct file *realfile = file->private_data;
+	struct inode *inode = file_inode(file);
+	struct ovl_inode *oi = OVL_I(inode);
+	struct file *realfile = READ_ONCE(oi->upper_file);
 	const struct cred *old_cred;
 	int ret;
 
-	/* tmpfs has no readpage a_op, so need to read with f_op */
+	if (!realfile)
+		realfile = file->private_data;
+
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
-	if (!realfile->f_mapping || !realfile->f_mapping->a_ops->readpage)
-		ret = ovl_real_readpage(realfile, page);
-	else
-		ret = ovl_real_copy_page(realfile, page);
+	ret = ovl_real_readpage(realfile, page);
 	revert_creds(old_cred);
 
 	if (!ret)
@@ -881,76 +889,57 @@ static int ovl_write_begin(struct file *file, struct address_space *mapping,
 	return 0;
 }
 
-static int ovl_real_write_end(struct file *file, loff_t pos,
-			      unsigned int copied, struct page *page)
-{
-	struct file *realfile = file->private_data;
-	unsigned int offset = (pos & (PAGE_SIZE - 1));
-	struct bio_vec bvec = {
-		.bv_page = page,
-		.bv_len = copied,
-		.bv_offset = offset,
-	};
-	struct iov_iter iter;
-	const struct cred *old_cred;
-	ssize_t ret;
-
-	iov_iter_bvec(&iter, WRITE, &bvec, 1, copied);
-
-	old_cred = ovl_override_creds(file_inode(file)->i_sb);
-	ret = vfs_iter_write(realfile, &iter, &pos, 0);
-	revert_creds(old_cred);
-
-	return ret < 0 ? ret : 0;
-}
-
-extern int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
-			       struct page *page);
-
 static int ovl_write_end(struct file *file, struct address_space *mapping,
 			 loff_t pos, unsigned len, unsigned copied,
 			 struct page *page, void *fsdata)
 {
-	int err;
+	int res;
 
-	err = ovl_real_write_end(file, pos, copied, page);
-	if (err) {
-		pr_warn("ovl_write_end: %i", err);
-		unlock_page(page);
-		put_page(page);
+	res = simple_write_end(file, mapping, pos, len, copied, page, fsdata);
 
-		return -EIO;
-	}
+	pr_info("ovl_write_end: page->flags: %lx\n", page->flags);
 
-	return __generic_write_end(file_inode(file), pos, copied, page);
+	return res;
 }
 
 static int ovl_real_writepage(struct page *page, struct writeback_control *wbc)
 {
-	struct inode *realinode = ovl_inode_real(page->mapping->host);
-	struct page *realpage;
-	int ret;
+	struct ovl_inode *oi = OVL_I(page->mapping->host);
+	struct file *realfile = oi->upper_file;
+	loff_t pos = page->index << PAGE_SHIFT;
+	loff_t len = i_size_read(page->mapping->host) - pos;
+	struct bio_vec bvec = {
+		.bv_page = page,
+		.bv_len = PAGE_SIZE,
+		.bv_offset = 0,
+	};
+	struct iov_iter iter;
+	ssize_t ret;
 
-	if (!realinode->i_mapping || !realinode->i_mapping->a_ops->writepage)
-		return -EIO;
+	WARN_ON(len <= 0);
+	if (len < PAGE_SIZE)
+		bvec.bv_len = len;
 
-	realpage = grab_cache_page(realinode->i_mapping, page->index);
-	copy_highpage(realpage, page);
-	set_page_dirty(realpage);
+	pr_info("ovl_real_writepage(%lli)\n", pos);
+	iov_iter_bvec(&iter, WRITE, &bvec, 1, bvec.bv_len);
 
-	/* Start writeback on and unlock real page */
-	ret = realinode->i_mapping->a_ops->writepage(realpage, wbc);
-	put_page(realpage);
+	ret = vfs_iter_write(realfile, &iter, &pos, 0);
+	pr_info("ovl_real_writepage(%lli) = %li\n", pos, ret);
 
-	return ret;
+	return ret < 0 ? ret : 0;
 }
 
 static int ovl_writepage(struct page *page, struct writeback_control *wbc)
 {
 	int ret;
+	const struct cred *old_cred;
 
 	set_page_writeback(page);
+
+	old_cred = ovl_override_creds(page->mapping->host->i_sb);
 	ret = ovl_real_writepage(page, wbc);
+	revert_creds(old_cred);
+
 	unlock_page(page);
 
 	/*
@@ -969,5 +958,5 @@ const struct address_space_operations ovl_aops = {
 	.set_page_dirty	= __set_page_dirty_nobuffers,
 	.writepage	= ovl_writepage,
 	/* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
-	.direct_IO	= noop_direct_IO,
+//	.direct_IO	= noop_direct_IO,
 };
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 766505598ed1..d1f04f5526f8 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -148,7 +148,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 	enum ovl_path_type type;
 	struct path realpath;
 	const struct cred *old_cred;
-	bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
+	struct inode *inode = d_inode(dentry);
+	bool is_dir = S_ISDIR(inode->i_mode);
 	bool samefs = ovl_same_sb(dentry->d_sb);
 	struct ovl_layer *lower_layer = NULL;
 	int err;
@@ -162,6 +163,13 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 	if (err)
 		goto out;
 
+	/*
+	 * With overlay page cache, overlay inode i_size is more uptodate than
+	 * real inode i_size. Perhaps we should generic_fillattr() and only
+	 * update individual stats from real inode?
+	 */
+	stat->size = i_size_read(inode);
+
 	/*
 	 * For non-dir or same fs, we use st_ino of the copy up origin.
 	 * This guaranties constant st_dev/st_ino across copy up.
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index ec237035333a..ab10c75c4b98 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -99,6 +99,7 @@ struct ovl_inode {
 	struct inode vfs_inode;
 	struct dentry *__upperdentry;
 	struct inode *lower;
+	struct file *upper_file;
 
 	/* synchronize copy up and more */
 	struct mutex lock;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 0116735cc321..0973a7b8abfc 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -9,6 +9,7 @@
 
 #include <uapi/linux/magic.h>
 #include <linux/fs.h>
+#include <linux/file.h>
 #include <linux/namei.h>
 #include <linux/xattr.h>
 #include <linux/mount.h>
@@ -185,6 +186,7 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
 	oi->__upperdentry = NULL;
 	oi->lower = NULL;
 	oi->lowerdata = NULL;
+	oi->upper_file = NULL;
 	mutex_init(&oi->lock);
 
 	return &oi->vfs_inode;
@@ -201,6 +203,8 @@ static void ovl_destroy_inode(struct inode *inode)
 {
 	struct ovl_inode *oi = OVL_I(inode);
 
+	if (oi->upper_file)
+		fput(oi->upper_file);
 	dput(oi->__upperdentry);
 	iput(oi->lower);
 	if (S_ISDIR(inode->i_mode))
@@ -1427,6 +1431,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	struct cred *cred;
 	int err;
 
+	err = super_setup_bdi(sb);
+	if (err)
+		goto out;
+
 	err = -ENOMEM;
 	ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
 	if (!ofs)

[Index of Archives]     [Linux Filesystems Devel]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux