On Thu, 28 Feb 2013 19:25:28 +0400 Pavel Shilovsky <piastry@xxxxxxxxxxx> wrote: > If O_DENYMAND flag is specified, O_DENYREAD/WRITE/MAND flags are > translated to flock's flags: > > !O_DENYREAD -> LOCK_READ > !O_DENYWRITE -> LOCK_WRITE > O_DENYMAND -> LOCK_MAND > > and set through flock_lock_file on a file. > > This change affects opens that use O_DENYMAND flag - all other > native Linux opens don't care about these flags. It allow us to > enable this feature for applications that need it (e.g. NFS and > Samba servers that export the same directory for Windows clients, > or Wine applications that access the same files simultaneously). > > Create codepath is slightly changed to prevent data races on > newely created files: when open with O_CREAT can return with -ETXTBSY > error for successfully created files due to a deny lock set by > another task. > > Signed-off-by: Pavel Shilovsky <piastry@xxxxxxxxxxx> > --- > fs/locks.c | 116 +++++++++++++++++++++++++++++++++++++++++++++++------ > fs/namei.c | 44 ++++++++++++++++++-- > include/linux/fs.h | 6 +++ > 3 files changed, 151 insertions(+), 15 deletions(-) > > diff --git a/fs/locks.c b/fs/locks.c > index a94e331..0cc7d1b 100644 > --- a/fs/locks.c > +++ b/fs/locks.c > @@ -605,20 +605,81 @@ static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *s > return (locks_conflict(caller_fl, sys_fl)); > } > > -/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific > - * checking before calling the locks_conflict(). > +static unsigned int > +deny_flags_to_cmd(unsigned int flags) > +{ > + unsigned int cmd = LOCK_MAND; > + > + if (!(flags & O_DENYREAD)) > + cmd |= LOCK_READ; > + if (!(flags & O_DENYWRITE)) > + cmd |= LOCK_WRITE; > + > + return cmd; > +} > + > +/* > + * locks_mand_conflict - Determine if there's a share reservation conflict > + * @caller_fl: lock we're attempting to acquire > + * @sys_fl: lock already present on system that we're checking against > + * > + * Check to see if there's a share_reservation conflict. LOCK_READ/LOCK_WRITE > + * tell us whether the reservation allows other readers and writers. > + * > + * We only check against other LOCK_MAND locks, so applications that want to > + * use share mode locking will only conflict against one another. "normal" > + * applications that open files won't be affected by and won't themselves > + * affect the share reservations. > */ > -static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) > +static int > +locks_mand_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) > { > - /* FLOCK locks referring to the same filp do not conflict with > + unsigned char caller_type = caller_fl->fl_type; > + unsigned char sys_type = sys_fl->fl_type; > + fmode_t caller_fmode = caller_fl->fl_file->f_mode; > + fmode_t sys_fmode = sys_fl->fl_file->f_mode; > + > + /* they can only conflict if they're both LOCK_MAND */ > + if (!(caller_type & LOCK_MAND) || !(sys_type & LOCK_MAND)) > + return 0; > + > + if (!(caller_type & LOCK_READ) && (sys_fmode & FMODE_READ)) > + return 1; > + if (!(caller_type & LOCK_WRITE) && (sys_fmode & FMODE_WRITE)) > + return 1; > + if (!(sys_type & LOCK_READ) && (caller_fmode & FMODE_READ)) > + return 1; > + if (!(sys_type & LOCK_WRITE) && (caller_fmode & FMODE_WRITE)) > + return 1; > + > + return 0; > +} > + > +/* > + * Determine if lock sys_fl blocks lock caller_fl. FLOCK specific checking > + * before calling the locks_conflict(). Boolean is_mand indicates whether > + * we should use a share reservation scheme or not. > + */ > +static int > +flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl, > + bool is_mand) I'm not sure you really need to add this new is_mand bool. Won't that be equivalent to (caller_fl->fl_type & LOCK_MAND)? > +{ > + /* > + * FLOCK locks referring to the same filp do not conflict with > * each other. > */ > - if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file)) > - return (0); > - if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) > + if (!IS_FLOCK(sys_fl)) > + return 0; > + if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) { > + if (is_mand) > + return locks_mand_conflict(caller_fl, sys_fl); > + else > + return 0; > + } > + if (caller_fl->fl_file == sys_fl->fl_file) > return 0; > > - return (locks_conflict(caller_fl, sys_fl)); > + return locks_conflict(caller_fl, sys_fl); > } > > void > @@ -697,14 +758,19 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, > return 0; > } > > -/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks > +/* > + * Try to create a FLOCK lock on filp. We always insert new FLOCK locks > * after any leases, but before any posix locks. > * > * Note that if called with an FL_EXISTS argument, the caller may determine > * whether or not a lock was successfully freed by testing the return > * value for -ENOENT. > + * > + * Take @is_conflict callback that determines how to check if locks have > + * conflicts or not. > */ > -static int flock_lock_file(struct file *filp, struct file_lock *request) > +static int > +flock_lock_file(struct file *filp, struct file_lock *request, bool is_mand) Ditto here on the is_mand bool. I think you can determine that from request->fl_type. Right? > { > struct file_lock *new_fl = NULL; > struct file_lock **before; > @@ -760,7 +826,7 @@ find_conflict: > break; > if (IS_LEASE(fl)) > continue; > - if (!flock_locks_conflict(request, fl)) > + if (!flock_locks_conflict(request, fl, is_mand)) > continue; > error = -EAGAIN; > if (!(request->fl_flags & FL_SLEEP)) > @@ -783,6 +849,32 @@ out: > return error; > } > > +/* > + * Determine if a file is allowed to be opened with specified access and deny > + * modes. Lock the file and return 0 if checks passed, otherwise return a error > + * code. > + */ > +int > +deny_lock_file(struct file *filp) > +{ > + struct file_lock *lock; > + int error = 0; > + > + if (!(filp->f_flags & O_DENYMAND)) > + return error; > + > + error = flock_make_lock(filp, &lock, deny_flags_to_cmd(filp->f_flags)); > + if (error) > + return error; > + > + error = flock_lock_file(filp, lock, true); > + if (error == -EAGAIN) > + error = -ETXTBSY; > + I think EBUSY would be a better return code here. ETXTBSY is returned in more specific circumstances -- mostly when you're opening a file for write that is being executed. > + locks_free_lock(lock); > + return error; > +} > + > static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) > { > struct file_lock *fl; > @@ -1589,7 +1681,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl) > int error; > might_sleep(); > for (;;) { > - error = flock_lock_file(filp, fl); > + error = flock_lock_file(filp, fl, false); > if (error != FILE_LOCK_DEFERRED) > break; > error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); > diff --git a/fs/namei.c b/fs/namei.c > index 43a97ee..c1f7e08 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -2559,9 +2559,14 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, > * here. > */ > error = may_open(&file->f_path, acc_mode, open_flag); > - if (error) > + if (error) { > fput(file); > + goto out; > + } > > + error = deny_lock_file(file); > + if (error) > + fput(file); > out: > dput(dentry); > return error; > @@ -2771,9 +2776,9 @@ retry_lookup: > } > mutex_lock(&dir->d_inode->i_mutex); > error = lookup_open(nd, path, file, op, got_write, opened); > - mutex_unlock(&dir->d_inode->i_mutex); > > if (error <= 0) { > + mutex_unlock(&dir->d_inode->i_mutex); > if (error) > goto out; > > @@ -2791,8 +2796,32 @@ retry_lookup: > will_truncate = false; > acc_mode = MAY_OPEN; > path_to_nameidata(path, nd); > - goto finish_open_created; > + > + /* > + * Unlock parent i_mutex later when the open finishes - prevent > + * races when a file can be locked with a deny lock by another > + * task that opens the file. > + */ > + error = may_open(&nd->path, acc_mode, open_flag); > + if (error) { > + mutex_unlock(&dir->d_inode->i_mutex); > + goto out; > + } > + file->f_path.mnt = nd->path.mnt; > + error = finish_open(file, nd->path.dentry, NULL, opened); > + if (error) { > + mutex_unlock(&dir->d_inode->i_mutex); > + if (error == -EOPENSTALE) > + goto stale_open; > + goto out; > + } > + error = deny_lock_file(file); > + mutex_unlock(&dir->d_inode->i_mutex); > + if (error) > + goto exit_fput; > + goto opened; > } > + mutex_unlock(&dir->d_inode->i_mutex); > > /* > * create/update audit record if it already exists. > @@ -2885,6 +2914,15 @@ finish_open_created: > goto stale_open; > goto out; > } > + /* > + * Lock parent i_mutex to prevent races with deny locks on newely > + * created files. > + */ > + mutex_lock(&dir->d_inode->i_mutex); > + error = deny_lock_file(file); > + mutex_unlock(&dir->d_inode->i_mutex); > + if (error) > + goto exit_fput; > opened: > error = open_check_o_direct(file); > if (error) > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 7617ee0..347e1de 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1005,6 +1005,7 @@ extern int lease_modify(struct file_lock **, int); > extern int lock_may_read(struct inode *, loff_t start, unsigned long count); > extern int lock_may_write(struct inode *, loff_t start, unsigned long count); > extern void locks_delete_block(struct file_lock *waiter); > +extern int deny_lock_file(struct file *); > extern void lock_flocks(void); > extern void unlock_flocks(void); > #else /* !CONFIG_FILE_LOCKING */ > @@ -1153,6 +1154,11 @@ static inline void locks_delete_block(struct file_lock *waiter) > { > } > > +static inline int deny_lock_file(struct file *filp) > +{ > + return 0; > +} > + > static inline void lock_flocks(void) > { > } -- Jeff Layton <jlayton@xxxxxxxxxx> -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html