On Mon, Jan 28, 2019 at 10:17 PM Miklos Szeredi <miklos@xxxxxxxxxx> wrote: > Trying to put some code together... Very rudimentary patch attached. It doens't do direct IO yet, but demonstrates what I meant about achoring the upper file in the inode. Also found the trick to actually make writeback work: super_setup_bdi() call in fill_super... Thanks, Miklos
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index c922682ff1b2..7a8a6cd55e1f 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -37,14 +37,6 @@ static struct file *ovl_open_realfile(const struct file *file, const struct cred *old_cred; int flags = file->f_flags | O_NOATIME; - if (realinode == ovl_inode_upper(inode)) { - /* tmpfs has no readpage a_op, so need to read realfile */ - if ((flags & O_WRONLY) && - (!realinode->i_mapping || - !realinode->i_mapping->a_ops->readpage)) - flags = (flags & ~O_ACCMODE) | O_RDWR; - } - old_cred = ovl_override_creds(inode->i_sb); realfile = open_with_fake_path(&file->f_path, flags, realinode, current_cred()); @@ -57,6 +49,43 @@ static struct file *ovl_open_realfile(const struct file *file, return realfile; } +static struct file *ovl_open_upper(const struct file *file, + struct inode *realinode) +{ + struct inode *inode = file_inode(file); + struct ovl_inode *oi = OVL_I(inode); + struct file *realfile; + const struct cred *old_cred; + struct path realpath; + + realfile = READ_ONCE(oi->upper_file); + if (realfile) + return realfile; + + ovl_path_upper(file_dentry(file), &realpath); + + old_cred = ovl_override_creds(inode->i_sb); + realfile = open_with_fake_path(&realpath, O_RDWR | O_NOATIME, + realinode, current_cred()); + revert_creds(old_cred); + + pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n", + file, file, ovl_whatisit(inode, realinode), file->f_flags, + realfile, IS_ERR(realfile) ? 0 : realfile->f_flags); + + if (!IS_ERR(realfile)) { + struct file *old = cmpxchg(&oi->upper_file, NULL, realfile); + + if (old) { + fput(realfile); + realfile = old; + } + } + + return realfile; +} + +#if 0 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT) static int ovl_change_flags(struct file *file, unsigned int flags) @@ -94,13 +123,14 @@ static int ovl_change_flags(struct file *file, unsigned int flags) return 0; } +#endif static bool ovl_filemap_support(const struct file *file) { - struct ovl_fs *ofs = file_inode(file)->i_sb->s_fs_info; + //struct ovl_fs *ofs = file_inode(file)->i_sb->s_fs_info; /* TODO: implement aops to upper inode data */ - return ofs->upper_mnt && ovl_aops.writepage; + return true; } static int ovl_file_maybe_copy_up(const struct file *file, bool allow_meta) @@ -119,15 +149,17 @@ static int ovl_file_maybe_copy_up(const struct file *file, bool allow_meta) return ovl_maybe_copy_up(file_dentry(file), copy_up_flags); } -static int ovl_real_fdget_meta(const struct file *file, struct fd *real, - bool allow_meta) +static int ovl_real_fdget(const struct file *file, struct fd *real) { struct inode *inode = file_inode(file); + struct ovl_inode *oi = OVL_I(inode); struct inode *realinode; int err; real->flags = 0; - real->file = file->private_data; + real->file = READ_ONCE(oi->upper_file); + if (!real->file) + real->file = file->private_data; /* * Lazy copy up caches the meta copy upper file on open O_RDWR. @@ -136,35 +168,28 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real, * we may try to open a lower file O_RDWR or perform data operations * (e.g. fallocate) on the metacopy inode. */ - err = ovl_file_maybe_copy_up(file, allow_meta); + err = ovl_file_maybe_copy_up(file, false); if (err) return err; - if (allow_meta) - realinode = ovl_inode_real(inode); - else - realinode = ovl_inode_realdata(inode); + realinode = ovl_inode_realdata(inode); /* Has it been copied up since we'd opened it? */ if (unlikely(file_inode(real->file) != realinode)) { - real->flags = FDPUT_FPUT; - real->file = ovl_open_realfile(file, realinode); + real->file = ovl_open_upper(file, realinode); return PTR_ERR_OR_ZERO(real->file); } +#if 0 /* Did the flags change since open? */ if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME)) return ovl_change_flags(real->file, file->f_flags); +#endif return 0; } -static int ovl_real_fdget(const struct file *file, struct fd *real) -{ - return ovl_real_fdget_meta(file, real, false); -} - static bool ovl_should_use_filemap_meta(struct file *file, bool allow_meta) { struct inode *inode = file_inode(file); @@ -220,6 +245,7 @@ static int ovl_flush_filemap(struct file *file, loff_t offset, loff_t len) static int ovl_open(struct inode *inode, struct file *file) { struct file *realfile; + struct inode *realinode; int err; bool allow_meta = (file->f_mode & FMODE_WRITE) && ovl_filemap_support(file); @@ -228,22 +254,28 @@ static int ovl_open(struct inode *inode, struct file *file) if (err) return err; - /* No longer need these flags, so don't pass them on to underlying fs */ - file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); + realinode = allow_meta ? + ovl_inode_real(inode) : ovl_inode_realdata(inode); - realfile = ovl_open_realfile(file, allow_meta ? ovl_inode_real(inode) : - ovl_inode_realdata(inode)); - if (IS_ERR(realfile)) - return PTR_ERR(realfile); + if (realinode == ovl_inode_upper(inode)) { + realfile = ovl_open_upper(file, realinode); + if (IS_ERR(realfile)) + return PTR_ERR(realfile); + } else { + realfile = ovl_open_realfile(file, realinode); + if (IS_ERR(realfile)) + return PTR_ERR(realfile); - file->private_data = realfile; + file->private_data = realfile; + } return 0; } static int ovl_release(struct inode *inode, struct file *file) { - fput(file->private_data); + if (file->private_data) + fput(file->private_data); return 0; } @@ -284,6 +316,8 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb) int ifl = iocb->ki_flags; rwf_t flags = 0; + WARN_ON(ifl & IOCB_DIRECT); + if (ifl & IOCB_NOWAIT) flags |= RWF_NOWAIT; if (ifl & IOCB_HIPRI) @@ -292,6 +326,8 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb) flags |= RWF_DSYNC; if (ifl & IOCB_SYNC) flags |= RWF_SYNC; + if (ifl & IOCB_APPEND) + flags |= RWF_APPEND; return flags; } @@ -386,6 +422,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) static int ovl_real_fsync(struct file *file, loff_t start, loff_t end, int datasync) { +#if 0 struct fd real; const struct cred *old_cred; int ret; @@ -404,6 +441,9 @@ static int ovl_real_fsync(struct file *file, loff_t start, loff_t end, fdput(real); return ret; +#endif + + return 0; } static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) @@ -770,39 +810,6 @@ const struct file_operations ovl_file_operations = { .remap_file_range = ovl_remap_file_range, }; -static struct page *ovl_real_get_page(struct file *realfile, pgoff_t index) -{ - struct page *page; - - page = read_mapping_page(file_inode(realfile)->i_mapping, index, NULL); - if (IS_ERR(page)) - return page; - - if (!PageUptodate(page)) { - put_page(page); - return ERR_PTR(-EIO); - } - - lock_page(page); - - return page; -} - -static int ovl_real_copy_page(struct file *realfile, struct page *page) -{ - struct page *realpage; - - realpage = ovl_real_get_page(realfile, page->index); - if (IS_ERR(realpage)) - return PTR_ERR(realpage); - - copy_highpage(page, realpage); - unlock_page(realpage); - put_page(realpage); - - return 0; -} - static int ovl_real_readpage(struct file *realfile, struct page *page) { struct bio_vec bvec = { @@ -823,16 +830,17 @@ static int ovl_real_readpage(struct file *realfile, struct page *page) static int ovl_do_readpage(struct file *file, struct page *page) { - struct file *realfile = file->private_data; + struct inode *inode = file_inode(file); + struct ovl_inode *oi = OVL_I(inode); + struct file *realfile = READ_ONCE(oi->upper_file); const struct cred *old_cred; int ret; - /* tmpfs has no readpage a_op, so need to read with f_op */ + if (!realfile) + realfile = file->private_data; + old_cred = ovl_override_creds(file_inode(file)->i_sb); - if (!realfile->f_mapping || !realfile->f_mapping->a_ops->readpage) - ret = ovl_real_readpage(realfile, page); - else - ret = ovl_real_copy_page(realfile, page); + ret = ovl_real_readpage(realfile, page); revert_creds(old_cred); if (!ret) @@ -881,76 +889,57 @@ static int ovl_write_begin(struct file *file, struct address_space *mapping, return 0; } -static int ovl_real_write_end(struct file *file, loff_t pos, - unsigned int copied, struct page *page) -{ - struct file *realfile = file->private_data; - unsigned int offset = (pos & (PAGE_SIZE - 1)); - struct bio_vec bvec = { - .bv_page = page, - .bv_len = copied, - .bv_offset = offset, - }; - struct iov_iter iter; - const struct cred *old_cred; - ssize_t ret; - - iov_iter_bvec(&iter, WRITE, &bvec, 1, copied); - - old_cred = ovl_override_creds(file_inode(file)->i_sb); - ret = vfs_iter_write(realfile, &iter, &pos, 0); - revert_creds(old_cred); - - return ret < 0 ? ret : 0; -} - -extern int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied, - struct page *page); - static int ovl_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { - int err; + int res; - err = ovl_real_write_end(file, pos, copied, page); - if (err) { - pr_warn("ovl_write_end: %i", err); - unlock_page(page); - put_page(page); + res = simple_write_end(file, mapping, pos, len, copied, page, fsdata); - return -EIO; - } + pr_info("ovl_write_end: page->flags: %lx\n", page->flags); - return __generic_write_end(file_inode(file), pos, copied, page); + return res; } static int ovl_real_writepage(struct page *page, struct writeback_control *wbc) { - struct inode *realinode = ovl_inode_real(page->mapping->host); - struct page *realpage; - int ret; + struct ovl_inode *oi = OVL_I(page->mapping->host); + struct file *realfile = oi->upper_file; + loff_t pos = page->index << PAGE_SHIFT; + loff_t len = i_size_read(page->mapping->host) - pos; + struct bio_vec bvec = { + .bv_page = page, + .bv_len = PAGE_SIZE, + .bv_offset = 0, + }; + struct iov_iter iter; + ssize_t ret; - if (!realinode->i_mapping || !realinode->i_mapping->a_ops->writepage) - return -EIO; + WARN_ON(len <= 0); + if (len < PAGE_SIZE) + bvec.bv_len = len; - realpage = grab_cache_page(realinode->i_mapping, page->index); - copy_highpage(realpage, page); - set_page_dirty(realpage); + pr_info("ovl_real_writepage(%lli)\n", pos); + iov_iter_bvec(&iter, WRITE, &bvec, 1, bvec.bv_len); - /* Start writeback on and unlock real page */ - ret = realinode->i_mapping->a_ops->writepage(realpage, wbc); - put_page(realpage); + ret = vfs_iter_write(realfile, &iter, &pos, 0); + pr_info("ovl_real_writepage(%lli) = %li\n", pos, ret); - return ret; + return ret < 0 ? ret : 0; } static int ovl_writepage(struct page *page, struct writeback_control *wbc) { int ret; + const struct cred *old_cred; set_page_writeback(page); + + old_cred = ovl_override_creds(page->mapping->host->i_sb); ret = ovl_real_writepage(page, wbc); + revert_creds(old_cred); + unlock_page(page); /* @@ -969,5 +958,5 @@ const struct address_space_operations ovl_aops = { .set_page_dirty = __set_page_dirty_nobuffers, .writepage = ovl_writepage, /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */ - .direct_IO = noop_direct_IO, +// .direct_IO = noop_direct_IO, }; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 766505598ed1..d1f04f5526f8 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -148,7 +148,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat, enum ovl_path_type type; struct path realpath; const struct cred *old_cred; - bool is_dir = S_ISDIR(dentry->d_inode->i_mode); + struct inode *inode = d_inode(dentry); + bool is_dir = S_ISDIR(inode->i_mode); bool samefs = ovl_same_sb(dentry->d_sb); struct ovl_layer *lower_layer = NULL; int err; @@ -162,6 +163,13 @@ int ovl_getattr(const struct path *path, struct kstat *stat, if (err) goto out; + /* + * With overlay page cache, overlay inode i_size is more uptodate than + * real inode i_size. Perhaps we should generic_fillattr() and only + * update individual stats from real inode? + */ + stat->size = i_size_read(inode); + /* * For non-dir or same fs, we use st_ino of the copy up origin. * This guaranties constant st_dev/st_ino across copy up. diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index ec237035333a..ab10c75c4b98 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -99,6 +99,7 @@ struct ovl_inode { struct inode vfs_inode; struct dentry *__upperdentry; struct inode *lower; + struct file *upper_file; /* synchronize copy up and more */ struct mutex lock; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 0116735cc321..0973a7b8abfc 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -9,6 +9,7 @@ #include <uapi/linux/magic.h> #include <linux/fs.h> +#include <linux/file.h> #include <linux/namei.h> #include <linux/xattr.h> #include <linux/mount.h> @@ -185,6 +186,7 @@ static struct inode *ovl_alloc_inode(struct super_block *sb) oi->__upperdentry = NULL; oi->lower = NULL; oi->lowerdata = NULL; + oi->upper_file = NULL; mutex_init(&oi->lock); return &oi->vfs_inode; @@ -201,6 +203,8 @@ static void ovl_destroy_inode(struct inode *inode) { struct ovl_inode *oi = OVL_I(inode); + if (oi->upper_file) + fput(oi->upper_file); dput(oi->__upperdentry); iput(oi->lower); if (S_ISDIR(inode->i_mode)) @@ -1427,6 +1431,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) struct cred *cred; int err; + err = super_setup_bdi(sb); + if (err) + goto out; + err = -ENOMEM; ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); if (!ofs)