Re: [RFC][PATCH v2 0/5] Experiments with overlayfs filemap

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Feb 1, 2019 at 2:25 PM Amir Goldstein <amir73il@xxxxxxxxx> wrote:

> Wrong st_mtime/st_ctime:
> generic/003 generic/080 generic/215

Fixed.  Albeit, I feel it's pretty hackish with various forms of
ovl_copyattr sprinkled all over the place.  Need to think about proper
management of attributes between upper and overlay inode.

>
> EIO on clone/dedupe:
> generic/303 generic/304
>
> Wrong st_blocks/fiemap:
> generic/353 generic/392 generic/422

Hmm, generic/353 passes for me...

>
> New lazy copy up semantics:
> overlay/060

Need to fix the testcase?

> BTW, your patch includes another change, but:
> RWF_DIRECT flag for vfs_read_iter() should depend on upper
> direct_IO() support.

Right, fixed.

Thanks,
Miklos
diff --git a/fs/attr.c b/fs/attr.c
index d22e8187477f..2832024b5665 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -262,7 +262,8 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
 
 	now = current_time(inode);
 
-	attr->ia_ctime = now;
+	if (!(ia_valid & ATTR_CTIME_SET))
+		attr->ia_ctime = now;
 	if (!(ia_valid & ATTR_ATIME_SET))
 		attr->ia_atime = now;
 	if (!(ia_valid & ATTR_MTIME_SET))
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 82c129bfe58d..e7a8ad995ae4 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -699,8 +699,11 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
 			ovl_type_origin(old));
 	if (err)
 		iput(inode);
+	else
+		ovl_copy_ctime(ovl_inode_upper(inode), inode);
 
 	ovl_nlink_end(old);
+
 out_drop_write:
 	ovl_drop_write(old);
 out:
@@ -871,7 +874,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 	 */
 	upperdentry = ovl_dentry_upper(dentry);
 	if (upperdentry)
-		ovl_copyattr(d_inode(upperdentry), d_inode(dentry));
+		ovl_copy_ctime(d_inode(upperdentry), d_inode(dentry));
 
 out_drop_write:
 	ovl_drop_write(dentry);
@@ -1213,9 +1216,9 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 			 (d_inode(new) && ovl_type_origin(new)));
 
 	/* copy ctime: */
-	ovl_copyattr(d_inode(olddentry), d_inode(old));
+	ovl_copy_ctime(d_inode(olddentry), d_inode(old));
 	if (d_inode(new) && ovl_dentry_upper(new))
-		ovl_copyattr(d_inode(newdentry), d_inode(new));
+		ovl_copy_ctime(d_inode(newdentry), d_inode(new));
 
 out_dput:
 	dput(newdentry);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index f954f1425186..9427fbb93cef 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -414,7 +414,7 @@ static ssize_t ovl_real_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 	file_end_write(real.file);
 	revert_creds(old_cred);
 
-	/* Update size */
+	/* Update size/c/mtime */
 	ovl_copyattr_size(ovl_inode_real(inode), inode);
 
 	fdput(real);
@@ -597,7 +597,7 @@ static long ovl_fallocate(struct file *file, int mode, loff_t offset,
 	ret = vfs_fallocate(real.file, mode, offset, len);
 	revert_creds(old_cred);
 
-	/* Update size */
+	/* Update size/c/mtime */
 	ovl_copyattr_size(ovl_inode_real(inode), inode);
 
 	/* and invalidate mappings and page cache */
@@ -681,14 +681,21 @@ static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			return ret;
 
 		ret = ovl_maybe_copy_up(file_dentry(file), OVL_COPY_UP_DATA);
-		if (!ret) {
-			ret = ovl_real_ioctl(file, cmd, arg);
+		if (ret)
+			goto drop_write;
 
-			inode_lock(inode);
-			ovl_copyflags(ovl_inode_real(inode), inode);
-			inode_unlock(inode);
-		}
+		ret = ovl_flush_filemap(file, 0, LLONG_MAX);
+		if (ret)
+			goto drop_write;
 
+		ret = ovl_real_ioctl(file, cmd, arg);
+
+		inode_lock(inode);
+		ovl_copyflags(ovl_inode_real(inode), inode);
+		if (!ret)
+			ovl_copy_ctime(ovl_inode_real(inode), inode);
+		inode_unlock(inode);
+drop_write:
 		mnt_drop_write_file(file);
 		break;
 
@@ -771,7 +778,7 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
 	}
 	revert_creds(old_cred);
 
-	/* Update size */
+	/* Update size/c/mtime */
 	ovl_copyattr_size(ovl_inode_real(inode_out), inode_out);
 
 	if (op != OVL_DEDUPE)
@@ -850,10 +857,16 @@ static int ovl_real_readpage(struct file *realfile, struct page *page)
 	loff_t pos = page->index << PAGE_SHIFT;
 	struct iov_iter iter;
 	ssize_t ret;
+	rwf_t flags = 0;
+
+	if (realfile->f_mapping->a_ops &&
+	    realfile->f_mapping->a_ops->direct_IO) {
+		flags |= RWF_DIRECT;
+	}
 
 	iov_iter_bvec(&iter, READ, &bvec, 1, PAGE_SIZE);
 
-	ret = vfs_iter_read(realfile, &iter, &pos, RWF_DIRECT);
+	ret = vfs_iter_read(realfile, &iter, &pos, flags);
 	if (ret >= 0 && ret < PAGE_SIZE)
 		zero_user_segment(page, ret, PAGE_SIZE);
 
@@ -960,10 +973,11 @@ static int ovl_write_end(struct file *file, struct address_space *mapping,
 
 static int ovl_real_writepage(struct page *page, struct writeback_control *wbc)
 {
-	struct ovl_inode *oi = OVL_I(page->mapping->host);
+	struct inode *inode = page->mapping->host;
+	struct ovl_inode *oi = OVL_I(inode);
 	struct file *realfile = oi->upper_file;
 	loff_t pos = page->index << PAGE_SHIFT;
-	loff_t size = i_size_read(page->mapping->host);
+	loff_t size = i_size_read(inode);
 	size_t len = PAGE_SIZE;
 	struct bio_vec bvec = {
 		.bv_page = page,
@@ -978,7 +992,7 @@ static int ovl_real_writepage(struct page *page, struct writeback_control *wbc)
 		pr_info("ovl_writepage: size = %lli pos = %lli\n", size, pos);
 		return 0;
 	}
-	
+
 	if (size < pos + PAGE_SIZE) {
 		/* Can't do direct I/O for tail pages */
 		len = size - pos;
@@ -987,7 +1001,7 @@ static int ovl_real_writepage(struct page *page, struct writeback_control *wbc)
 		flags |= RWF_DIRECT;
 	}
 
-	//pr_info("ovl_real_writepage(%lli)\n", pos);
+	pr_info("ovl_real_writepage(%p/%lli)...\n", inode, pos);
 	iov_iter_bvec(&iter, WRITE, &bvec, 1, len);
 
 	ret = vfs_iter_write(realfile, &iter, &pos, flags);
@@ -997,7 +1011,7 @@ static int ovl_real_writepage(struct page *page, struct writeback_control *wbc)
 
 	/* FADV_DONTNEED for tail pages*/
 
-	//pr_info("ovl_real_writepage(%lli) = %li\n", pos, ret);
+	pr_info("ovl_real_writepage(%p/%lli) = %li\n", inode, pos, ret);
 
 	return ret < 0 ? ret : 0;
 }
@@ -1021,6 +1035,9 @@ static int ovl_writepage(struct page *page, struct writeback_control *wbc)
 	 */
 	end_page_writeback(page);
 
+	/* Redirty ovl inode due to just having modified upper c/mtime */
+	__mark_inode_dirty(page->mapping->host, I_DIRTY_TIME | I_DIRTY_SYNC);
+
 	return ret;
 }
 
@@ -1045,7 +1062,7 @@ static ssize_t ovl_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 		ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, flags);
 		file_end_write(real.file);
 
-		/* Update size */
+		/* Update size/c/mtime */
 		ovl_copyattr_size(ovl_inode_real(inode), inode);
 	}
 	revert_creds(old_cred);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index dabcc92dd178..0d959c26c414 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -77,7 +77,7 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
 	err = notify_change(upperdentry, attr, NULL);
 	revert_creds(old_cred);
 	if (!err)
-		ovl_copyattr(upperinode, inode);
+		ovl_copyattr_time(upperinode, inode);
 	inode_unlock(upperinode);
 
 	if (is_truncate) {
@@ -181,7 +181,11 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 	 * real inode i_size. Perhaps we should generic_fillattr() and only
 	 * update individual stats from real inode?
 	 */
-	stat->size = i_size_read(inode);
+	if (d_inode(realpath.dentry) == ovl_inode_upper(inode)) {
+		stat->size = i_size_read(inode);
+		stat->ctime = inode->i_ctime;
+		stat->mtime = inode->i_mtime;
+	}
 
 	/*
 	 * For non-dir or same fs, we use st_ino of the copy up origin.
@@ -378,7 +382,7 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
 	revert_creds(old_cred);
 
 	/* copy c/mtime */
-	ovl_copyattr(d_inode(realdentry), inode);
+	ovl_copyattr_time(d_inode(realdentry), inode);
 
 out_drop_write:
 	ovl_drop_write(dentry);
@@ -473,7 +477,10 @@ int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
 			touch_atime(&upperpath);
 			inode->i_atime = d_inode(upperpath.dentry)->i_atime;
 		}
+		return 0;
 	}
+	pr_info("ovl_update_time(%p/%lli.%09lu/%i)\n",
+		inode, ts->tv_sec, ts->tv_nsec, flags);
 	return generic_update_time(inode, ts, flags);
 }
 
@@ -585,7 +592,8 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
 		inode->i_ino = get_next_ino();
 	}
 	inode->i_mode = mode;
-	inode->i_flags |= S_NOCMTIME;
+	if (!S_ISREG(mode))
+		inode->i_flags |= S_NOCMTIME;
 #ifdef CONFIG_FS_POSIX_ACL
 	inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
 #endif
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 576488162257..e52385700aa3 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -377,16 +377,26 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
 	to->i_gid = from->i_gid;
 	to->i_mode = from->i_mode;
 	to->i_atime = from->i_atime;
+}
+
+static inline void ovl_copyattr_time(struct inode *from, struct inode *to)
+{
+	ovl_copyattr(from, to);
 	to->i_mtime = from->i_mtime;
 	to->i_ctime = from->i_ctime;
 }
 
 static inline void ovl_copyattr_size(struct inode *from, struct inode *to)
 {
-	ovl_copyattr(from, to);
+	ovl_copyattr_time(from, to);
 	i_size_write(to, i_size_read(from));
 }
 
+static inline void ovl_copy_ctime(struct inode *from, struct inode *to)
+{
+	to->i_ctime = from->i_ctime;
+}
+
 static inline void ovl_copyflags(struct inode *from, struct inode *to)
 {
 	unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index cfbf1d8994c0..fe01fcc667c2 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -220,6 +220,32 @@ static void ovl_destroy_inode(struct inode *inode)
 	call_rcu(&inode->i_rcu, ovl_i_callback);
 }
 
+static int ovl_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	int err;
+	const struct cred *old_cred;
+	struct dentry *upperdentry = ovl_i_dentry_upper(inode);
+	struct inode *upperinode = d_inode(upperdentry);
+	struct iattr attr = {
+		.ia_valid = ATTR_CTIME | ATTR_CTIME_SET |
+			    ATTR_MTIME | ATTR_MTIME_SET,
+		.ia_ctime = inode->i_ctime,
+		.ia_mtime = inode->i_mtime,
+	};
+
+	pr_info("ovl_write_inode(%p)...\n", inode);
+
+	inode_lock(upperinode);
+	old_cred = ovl_override_creds(inode->i_sb);
+	err = notify_change(upperdentry, &attr, NULL);
+	revert_creds(old_cred);
+	inode_unlock(upperinode);
+
+	pr_info("ovl_write_inode(%p): %i\n", inode, err);
+
+	return err;
+}
+
 static void ovl_free_fs(struct ovl_fs *ofs)
 {
 	unsigned i;
@@ -385,6 +411,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
 static const struct super_operations ovl_super_operations = {
 	.alloc_inode	= ovl_alloc_inode,
 	.destroy_inode	= ovl_destroy_inode,
+	.write_inode	= ovl_write_inode,
 	.put_super	= ovl_put_super,
 	.sync_fs	= ovl_sync_fs,
 	.statfs		= ovl_statfs,
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index b1989c479947..2aa9efdc869e 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -456,7 +456,7 @@ static void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
 void ovl_dir_modified(struct dentry *dentry, bool impurity)
 {
 	/* Copy mtime/ctime */
-	ovl_copyattr(d_inode(ovl_dentry_upper(dentry)), d_inode(dentry));
+	ovl_copyattr_time(d_inode(ovl_dentry_upper(dentry)), d_inode(dentry));
 
 	ovl_dentry_version_inc(dentry, impurity);
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2901ffc43c11..902af18f52ec 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -183,7 +183,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define ATTR_CTIME	(1 << 6)
 #define ATTR_ATIME_SET	(1 << 7)
 #define ATTR_MTIME_SET	(1 << 8)
-#define ATTR_FORCE	(1 << 9) /* Not a change, but a change it */
+#define ATTR_CTIME_SET  (1 << 9) /* kernel internal, for now... */
+#define ATTR_FORCE	(1 << 10) /* Not a change, but a change it */
 #define ATTR_KILL_SUID	(1 << 11)
 #define ATTR_KILL_SGID	(1 << 12)
 #define ATTR_FILE	(1 << 13)

[Index of Archives]     [Linux Filesystems Devel]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux