On Tue, Jul 19, 2016 at 02:27:44PM +0200, Miklos Szeredi wrote: > This patch allows flock, posix locks, ofd locks and leases to work > correctly on overlayfs. > > Instead of using the underlying inode for storing lock context use the > overlay inode. This allows locks to be persistent across copy-up. Remind me when the overlay inode is created--is it immediately on any (even read-only) open? --b. > > This is done by introducing locks_inode() helper and using it instead of > file_inode() to get the inode in locking code. For non-overlayfs the two > are equivalent, except for an extra pointer dereference in locks_inode(). > > Since lock operations are in "struct file_operations" we must also make > sure not to call underlying filesystem's lock operations. Introcude a > super block flag MS_NOREMOTELOCK to this effect. > > Finally for correct operation of leases i_writecount too needs to be > modified on the overlay inode. > > Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx> > --- > fs/file_table.c | 2 +- > fs/locks.c | 50 +++++++++++++++++++++++++++---------------------- > fs/namespace.c | 2 +- > fs/open.c | 9 +++++---- > fs/overlayfs/super.c | 2 +- > include/linux/fs.h | 20 ++++++++++++++++---- > include/uapi/linux/fs.h | 1 + > kernel/fork.c | 2 +- > mm/mmap.c | 4 ++-- > 9 files changed, 56 insertions(+), 36 deletions(-) > > diff --git a/fs/file_table.c b/fs/file_table.c > index ad17e05ebf95..bb59284c220c 100644 > --- a/fs/file_table.c > +++ b/fs/file_table.c > @@ -216,7 +216,7 @@ static void __fput(struct file *file) > if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) > i_readcount_dec(inode); > if (file->f_mode & FMODE_WRITER) { > - put_write_access(inode); > + put_write_access(locks_inode(file)); > __mnt_drop_write(mnt); > } > file->f_path.dentry = NULL; > diff --git a/fs/locks.c b/fs/locks.c > index ee1b15f6fc13..c1656cff53ee 100644 > --- a/fs/locks.c > +++ b/fs/locks.c > @@ -139,6 +139,11 @@ > #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT)) > #define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) > > +static inline bool is_remote_lock(struct file *filp) > +{ > + return likely(!(filp->f_path.dentry->d_sb->s_flags & MS_NOREMOTELOCK)); > +} > + > static bool lease_breaking(struct file_lock *fl) > { > return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING); > @@ -791,7 +796,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) > { > struct file_lock *cfl; > struct file_lock_context *ctx; > - struct inode *inode = file_inode(filp); > + struct inode *inode = locks_inode(filp); > > ctx = smp_load_acquire(&inode->i_flctx); > if (!ctx || list_empty_careful(&ctx->flc_posix)) { > @@ -1192,7 +1197,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, > int posix_lock_file(struct file *filp, struct file_lock *fl, > struct file_lock *conflock) > { > - return posix_lock_inode(file_inode(filp), fl, conflock); > + return posix_lock_inode(locks_inode(filp), fl, conflock); > } > EXPORT_SYMBOL(posix_lock_file); > > @@ -1232,7 +1237,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) > int locks_mandatory_locked(struct file *file) > { > int ret; > - struct inode *inode = file_inode(file); > + struct inode *inode = locks_inode(file); > struct file_lock_context *ctx; > struct file_lock *fl; > > @@ -1572,7 +1577,7 @@ EXPORT_SYMBOL(lease_get_mtime); > int fcntl_getlease(struct file *filp) > { > struct file_lock *fl; > - struct inode *inode = file_inode(filp); > + struct inode *inode = locks_inode(filp); > struct file_lock_context *ctx; > int type = F_UNLCK; > LIST_HEAD(dispose); > @@ -1580,7 +1585,7 @@ int fcntl_getlease(struct file *filp) > ctx = smp_load_acquire(&inode->i_flctx); > if (ctx && !list_empty_careful(&ctx->flc_lease)) { > spin_lock(&ctx->flc_lock); > - time_out_leases(file_inode(filp), &dispose); > + time_out_leases(inode, &dispose); > list_for_each_entry(fl, &ctx->flc_lease, fl_list) { > if (fl->fl_file != filp) > continue; > @@ -1628,7 +1633,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr > { > struct file_lock *fl, *my_fl = NULL, *lease; > struct dentry *dentry = filp->f_path.dentry; > - struct inode *inode = file_inode(filp); > + struct inode *inode = dentry->d_inode; > struct file_lock_context *ctx; > bool is_deleg = (*flp)->fl_flags & FL_DELEG; > int error; > @@ -1742,7 +1747,7 @@ static int generic_delete_lease(struct file *filp, void *owner) > { > int error = -EAGAIN; > struct file_lock *fl, *victim = NULL; > - struct inode *inode = file_inode(filp); > + struct inode *inode = locks_inode(filp); > struct file_lock_context *ctx; > LIST_HEAD(dispose); > > @@ -1782,7 +1787,7 @@ static int generic_delete_lease(struct file *filp, void *owner) > int generic_setlease(struct file *filp, long arg, struct file_lock **flp, > void **priv) > { > - struct inode *inode = file_inode(filp); > + struct inode *inode = locks_inode(filp); > int error; > > if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE)) > @@ -1830,7 +1835,7 @@ EXPORT_SYMBOL(generic_setlease); > int > vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv) > { > - if (filp->f_op->setlease) > + if (filp->f_op->setlease && is_remote_lock(filp)) > return filp->f_op->setlease(filp, arg, lease, priv); > else > return generic_setlease(filp, arg, lease, priv); > @@ -1979,7 +1984,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) > if (error) > goto out_free; > > - if (f.file->f_op->flock) > + if (f.file->f_op->flock && is_remote_lock(f.file)) > error = f.file->f_op->flock(f.file, > (can_sleep) ? F_SETLKW : F_SETLK, > lock); > @@ -2005,7 +2010,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) > */ > int vfs_test_lock(struct file *filp, struct file_lock *fl) > { > - if (filp->f_op->lock) > + if (filp->f_op->lock && is_remote_lock(filp)) > return filp->f_op->lock(filp, F_GETLK, fl); > posix_test_lock(filp, fl); > return 0; > @@ -2129,7 +2134,7 @@ out: > */ > int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) > { > - if (filp->f_op->lock) > + if (filp->f_op->lock && is_remote_lock(filp)) > return filp->f_op->lock(filp, cmd, fl); > else > return posix_lock_file(filp, fl, conf); > @@ -2191,7 +2196,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, > if (file_lock == NULL) > return -ENOLCK; > > - inode = file_inode(filp); > + inode = locks_inode(filp); > > /* > * This might block, so we do it before checking the inode. > @@ -2343,7 +2348,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, > if (copy_from_user(&flock, l, sizeof(flock))) > goto out; > > - inode = file_inode(filp); > + inode = locks_inode(filp); > > /* Don't allow mandatory locks on files that may be memory mapped > * and shared. > @@ -2426,6 +2431,7 @@ out: > void locks_remove_posix(struct file *filp, fl_owner_t owner) > { > int error; > + struct inode *inode = locks_inode(filp); > struct file_lock lock; > struct file_lock_context *ctx; > > @@ -2434,7 +2440,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) > * posix_lock_file(). Another process could be setting a lock on this > * file at the same time, but we wouldn't remove that lock anyway. > */ > - ctx = smp_load_acquire(&file_inode(filp)->i_flctx); > + ctx = smp_load_acquire(&inode->i_flctx); > if (!ctx || list_empty(&ctx->flc_posix)) > return; > > @@ -2452,7 +2458,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) > > if (lock.fl_ops && lock.fl_ops->fl_release_private) > lock.fl_ops->fl_release_private(&lock); > - trace_locks_remove_posix(file_inode(filp), &lock, error); > + trace_locks_remove_posix(inode, &lock, error); > } > > EXPORT_SYMBOL(locks_remove_posix); > @@ -2469,12 +2475,12 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx) > .fl_type = F_UNLCK, > .fl_end = OFFSET_MAX, > }; > - struct inode *inode = file_inode(filp); > + struct inode *inode = locks_inode(filp); > > if (list_empty(&flctx->flc_flock)) > return; > > - if (filp->f_op->flock) > + if (filp->f_op->flock && is_remote_lock(filp)) > filp->f_op->flock(filp, F_SETLKW, &fl); > else > flock_lock_inode(inode, &fl); > @@ -2508,7 +2514,7 @@ void locks_remove_file(struct file *filp) > { > struct file_lock_context *ctx; > > - ctx = smp_load_acquire(&file_inode(filp)->i_flctx); > + ctx = smp_load_acquire(&locks_inode(filp)->i_flctx); > if (!ctx) > return; > > @@ -2552,7 +2558,7 @@ EXPORT_SYMBOL(posix_unblock_lock); > */ > int vfs_cancel_lock(struct file *filp, struct file_lock *fl) > { > - if (filp->f_op->lock) > + if (filp->f_op->lock && is_remote_lock(filp)) > return filp->f_op->lock(filp, F_CANCELLK, fl); > return 0; > } > @@ -2580,7 +2586,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, > fl_pid = fl->fl_pid; > > if (fl->fl_file != NULL) > - inode = file_inode(fl->fl_file); > + inode = locks_inode(fl->fl_file); > > seq_printf(f, "%lld:%s ", id, pfx); > if (IS_POSIX(fl)) { > @@ -2682,7 +2688,7 @@ static void __show_fd_locks(struct seq_file *f, > void show_fd_locks(struct seq_file *f, > struct file *filp, struct files_struct *files) > { > - struct inode *inode = file_inode(filp); > + struct inode *inode = locks_inode(filp); > struct file_lock_context *ctx; > int id = 0; > > diff --git a/fs/namespace.c b/fs/namespace.c > index 419f746d851d..05daf3f98d86 100644 > --- a/fs/namespace.c > +++ b/fs/namespace.c > @@ -2722,7 +2722,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, > > flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | > MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | > - MS_STRICTATIME); > + MS_STRICTATIME | MS_NOREMOTELOCK); > > if (flags & MS_REMOUNT) > retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, > diff --git a/fs/open.c b/fs/open.c > index bf66cf1a9f5c..48f38e33e95b 100644 > --- a/fs/open.c > +++ b/fs/open.c > @@ -685,6 +685,7 @@ static int do_dentry_open(struct file *f, > const struct cred *cred) > { > static const struct file_operations empty_fops = {}; > + struct inode *lkinode = locks_inode(f); > int error; > > f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | > @@ -701,12 +702,12 @@ static int do_dentry_open(struct file *f, > } > > if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { > - error = get_write_access(inode); > + error = get_write_access(lkinode); > if (unlikely(error)) > goto cleanup_file; > error = __mnt_want_write(f->f_path.mnt); > if (unlikely(error)) { > - put_write_access(inode); > + put_write_access(lkinode); > goto cleanup_file; > } > f->f_mode |= FMODE_WRITER; > @@ -726,7 +727,7 @@ static int do_dentry_open(struct file *f, > if (error) > goto cleanup_all; > > - error = break_lease(inode, f->f_flags); > + error = break_lease(lkinode, f->f_flags); > if (error) > goto cleanup_all; > > @@ -755,7 +756,7 @@ static int do_dentry_open(struct file *f, > cleanup_all: > fops_put(f->f_op); > if (f->f_mode & FMODE_WRITER) { > - put_write_access(inode); > + put_write_access(lkinode); > __mnt_drop_write(f->f_path.mnt); > } > cleanup_file: > diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c > index ed4aa34211a6..4c91d9ed4689 100644 > --- a/fs/overlayfs/super.c > +++ b/fs/overlayfs/super.c > @@ -1280,7 +1280,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) > sb->s_xattr = ovl_xattr_noacl_handlers; > sb->s_root = root_dentry; > sb->s_fs_info = ufs; > - sb->s_flags |= MS_POSIXACL; > + sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK; > > return 0; > > diff --git a/include/linux/fs.h b/include/linux/fs.h > index bacc0733663c..be919d18fa6a 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1090,6 +1090,18 @@ struct file_lock_context { > > extern void send_sigio(struct fown_struct *fown, int fd, int band); > > +/* > + * Return the inode to use for locking > + * > + * For overlayfs this should be the overlay inode, not the real inode returned > + * by file_inode(). For any other fs file_inode(filp) and locks_inode(filp) are > + * equal. > + */ > +static inline struct inode *locks_inode(const struct file *f) > +{ > + return f->f_path.dentry->d_inode; > +} > + > #ifdef CONFIG_FILE_LOCKING > extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *); > extern int fcntl_setlk(unsigned int, struct file *, unsigned int, > @@ -1277,7 +1289,7 @@ static inline struct dentry *file_dentry(const struct file *file) > > static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) > { > - return locks_lock_inode_wait(file_inode(filp), fl); > + return locks_lock_inode_wait(locks_inode(filp), fl); > } > > struct fasync_struct { > @@ -2132,7 +2144,7 @@ static inline int mandatory_lock(struct inode *ino) > > static inline int locks_verify_locked(struct file *file) > { > - if (mandatory_lock(file_inode(file))) > + if (mandatory_lock(locks_inode(file))) > return locks_mandatory_locked(file); > return 0; > } > @@ -2584,7 +2596,7 @@ static inline int get_write_access(struct inode *inode) > } > static inline int deny_write_access(struct file *file) > { > - struct inode *inode = file_inode(file); > + struct inode *inode = locks_inode(file); > return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY; > } > static inline void put_write_access(struct inode * inode) > @@ -2594,7 +2606,7 @@ static inline void put_write_access(struct inode * inode) > static inline void allow_write_access(struct file *file) > { > if (file) > - atomic_inc(&file_inode(file)->i_writecount); > + atomic_inc(&locks_inode(file)->i_writecount); > } > static inline bool inode_is_open_for_write(const struct inode *inode) > { > diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h > index 3b00f7c8943f..2473272169f2 100644 > --- a/include/uapi/linux/fs.h > +++ b/include/uapi/linux/fs.h > @@ -132,6 +132,7 @@ struct inodes_stat_t { > #define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ > > /* These sb flags are internal to the kernel */ > +#define MS_NOREMOTELOCK (1<<27) > #define MS_NOSEC (1<<28) > #define MS_BORN (1<<29) > #define MS_ACTIVE (1<<30) > diff --git a/kernel/fork.c b/kernel/fork.c > index 4a7ec0c6c88c..104d687f63f1 100644 > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -476,7 +476,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) > tmp->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; > file = tmp->vm_file; > if (file) { > - struct inode *inode = file_inode(file); > + struct inode *inode = locks_inode(file); > struct address_space *mapping = file->f_mapping; > > get_file(file); > diff --git a/mm/mmap.c b/mm/mmap.c > index de2c1769cc68..a023caff19d5 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -126,7 +126,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma, > struct file *file, struct address_space *mapping) > { > if (vma->vm_flags & VM_DENYWRITE) > - atomic_inc(&file_inode(file)->i_writecount); > + atomic_inc(&locks_inode(file)->i_writecount); > if (vma->vm_flags & VM_SHARED) > mapping_unmap_writable(mapping); > > @@ -537,7 +537,7 @@ static void __vma_link_file(struct vm_area_struct *vma) > struct address_space *mapping = file->f_mapping; > > if (vma->vm_flags & VM_DENYWRITE) > - atomic_dec(&file_inode(file)->i_writecount); > + atomic_dec(&locks_inode(file)->i_writecount); > if (vma->vm_flags & VM_SHARED) > atomic_inc(&mapping->i_mmap_writable); > > -- > 2.5.5 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html