Re: [PATCH v3 2/7] vfs: Add O_DENYREAD/WRITE flags support for open syscall

Jeff Layton <jlayton@xxxxxxxxxx> · Mon, 11 Mar 2013 14:46:09 -0400

On Thu, 28 Feb 2013 19:25:28 +0400
Pavel Shilovsky <piastry@xxxxxxxxxxx> wrote:

> If O_DENYMAND flag is specified, O_DENYREAD/WRITE/MAND flags are
> translated to flock's flags:
> 
> !O_DENYREAD  -> LOCK_READ
> !O_DENYWRITE -> LOCK_WRITE
> O_DENYMAND   -> LOCK_MAND
> 
> and set through flock_lock_file on a file.
> 
> This change affects opens that use O_DENYMAND flag - all other
> native Linux opens don't care about these flags. It allow us to
> enable this feature for applications that need it (e.g. NFS and
> Samba servers that export the same directory for Windows clients,
> or Wine applications that access the same files simultaneously).
> 
> Create codepath is slightly changed to prevent data races on
> newely created files: when open with O_CREAT can return with -ETXTBSY
> error for successfully created files due to a deny lock set by
> another task.
> 
> Signed-off-by: Pavel Shilovsky <piastry@xxxxxxxxxxx>
> ---
>  fs/locks.c         | 116 +++++++++++++++++++++++++++++++++++++++++++++++------
>  fs/namei.c         |  44 ++++++++++++++++++--
>  include/linux/fs.h |   6 +++
>  3 files changed, 151 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/locks.c b/fs/locks.c
> index a94e331..0cc7d1b 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -605,20 +605,81 @@ static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *s
>  	return (locks_conflict(caller_fl, sys_fl));
>  }
>  
> -/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
> - * checking before calling the locks_conflict().
> +static unsigned int
> +deny_flags_to_cmd(unsigned int flags)
> +{
> +	unsigned int cmd = LOCK_MAND;
> +
> +	if (!(flags & O_DENYREAD))
> +		cmd |= LOCK_READ;
> +	if (!(flags & O_DENYWRITE))
> +		cmd |= LOCK_WRITE;
> +
> +	return cmd;
> +}
> +
> +/*
> + * locks_mand_conflict - Determine if there's a share reservation conflict
> + * @caller_fl: lock we're attempting to acquire
> + * @sys_fl: lock already present on system that we're checking against
> + *
> + * Check to see if there's a share_reservation conflict. LOCK_READ/LOCK_WRITE
> + * tell us whether the reservation allows other readers and writers.
> + *
> + * We only check against other LOCK_MAND locks, so applications that want to
> + * use share mode locking will only conflict against one another. "normal"
> + * applications that open files won't be affected by and won't themselves
> + * affect the share reservations.
>   */
> -static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
> +static int
> +locks_mand_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
>  {
> -	/* FLOCK locks referring to the same filp do not conflict with
> +	unsigned char caller_type = caller_fl->fl_type;
> +	unsigned char sys_type = sys_fl->fl_type;
> +	fmode_t caller_fmode = caller_fl->fl_file->f_mode;
> +	fmode_t sys_fmode = sys_fl->fl_file->f_mode;
> +
> +	/* they can only conflict if they're both LOCK_MAND */
> +	if (!(caller_type & LOCK_MAND) || !(sys_type & LOCK_MAND))
> +		return 0;
> +
> +	if (!(caller_type & LOCK_READ) && (sys_fmode & FMODE_READ))
> +		return 1;
> +	if (!(caller_type & LOCK_WRITE) && (sys_fmode & FMODE_WRITE))
> +		return 1;
> +	if (!(sys_type & LOCK_READ) && (caller_fmode & FMODE_READ))
> +		return 1;
> +	if (!(sys_type & LOCK_WRITE) && (caller_fmode & FMODE_WRITE))
> +		return 1;
> +
> +	return 0;
> +}
> +
> +/*
> + * Determine if lock sys_fl blocks lock caller_fl. FLOCK specific checking
> + * before calling the locks_conflict(). Boolean is_mand indicates whether
> + * we should use a share reservation scheme or not.
> + */
> +static int
> +flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl,
> +		     bool is_mand)

I'm not sure you really need to add this new is_mand bool. Won't that
be equivalent to (caller_fl->fl_type & LOCK_MAND)?

> +{
> +	/*
> +	 * FLOCK locks referring to the same filp do not conflict with
>  	 * each other.
>  	 */
> -	if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file))
> -		return (0);
> -	if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
> +	if (!IS_FLOCK(sys_fl))
> +		return 0;
> +	if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) {
> +		if (is_mand)
> +			return locks_mand_conflict(caller_fl, sys_fl);
> +		else
> +			return 0;
> +	}
> +	if (caller_fl->fl_file == sys_fl->fl_file)
>  		return 0;
>  
> -	return (locks_conflict(caller_fl, sys_fl));
> +	return locks_conflict(caller_fl, sys_fl);
>  }
>  
>  void
> @@ -697,14 +758,19 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
>  	return 0;
>  }
>  
> -/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
> +/*
> + * Try to create a FLOCK lock on filp. We always insert new FLOCK locks
>   * after any leases, but before any posix locks.
>   *
>   * Note that if called with an FL_EXISTS argument, the caller may determine
>   * whether or not a lock was successfully freed by testing the return
>   * value for -ENOENT.
> + *
> + * Take @is_conflict callback that determines how to check if locks have
> + * conflicts or not.
>   */
> -static int flock_lock_file(struct file *filp, struct file_lock *request)
> +static int
> +flock_lock_file(struct file *filp, struct file_lock *request, bool is_mand)

Ditto here on the is_mand bool. I think you can determine that from
request->fl_type. Right?

>  {
>  	struct file_lock *new_fl = NULL;
>  	struct file_lock **before;
> @@ -760,7 +826,7 @@ find_conflict:
>  			break;
>  		if (IS_LEASE(fl))
>  			continue;
> -		if (!flock_locks_conflict(request, fl))
> +		if (!flock_locks_conflict(request, fl, is_mand))
>  			continue;
>  		error = -EAGAIN;
>  		if (!(request->fl_flags & FL_SLEEP))
> @@ -783,6 +849,32 @@ out:
>  	return error;
>  }
>  
> +/*
> + * Determine if a file is allowed to be opened with specified access and deny
> + * modes. Lock the file and return 0 if checks passed, otherwise return a error
> + * code.
> + */
> +int
> +deny_lock_file(struct file *filp)
> +{
> +	struct file_lock *lock;
> +	int error = 0;
> +
> +	if (!(filp->f_flags & O_DENYMAND))
> +		return error;
> +
> +	error = flock_make_lock(filp, &lock, deny_flags_to_cmd(filp->f_flags));
> +	if (error)
> +		return error;
> +
> +	error = flock_lock_file(filp, lock, true);
> +	if (error == -EAGAIN)
> +		error = -ETXTBSY;
> +

I think EBUSY would be a better return code here. ETXTBSY is returned
in more specific circumstances -- mostly when you're opening a file for
write that is being executed.

> +	locks_free_lock(lock);
> +	return error;
> +}
> +
>  static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
>  {
>  	struct file_lock *fl;
> @@ -1589,7 +1681,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
>  	int error;
>  	might_sleep();
>  	for (;;) {
> -		error = flock_lock_file(filp, fl);
> +		error = flock_lock_file(filp, fl, false);
>  		if (error != FILE_LOCK_DEFERRED)
>  			break;
>  		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
> diff --git a/fs/namei.c b/fs/namei.c
> index 43a97ee..c1f7e08 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -2559,9 +2559,14 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
>  	 * here.
>  	 */
>  	error = may_open(&file->f_path, acc_mode, open_flag);
> -	if (error)
> +	if (error) {
>  		fput(file);
> +		goto out;
> +	}
>  
> +	error = deny_lock_file(file);
> +	if (error)
> +		fput(file);
>  out:
>  	dput(dentry);
>  	return error;
> @@ -2771,9 +2776,9 @@ retry_lookup:
>  	}
>  	mutex_lock(&dir->d_inode->i_mutex);
>  	error = lookup_open(nd, path, file, op, got_write, opened);
> -	mutex_unlock(&dir->d_inode->i_mutex);
>  
>  	if (error <= 0) {
> +		mutex_unlock(&dir->d_inode->i_mutex);
>  		if (error)
>  			goto out;
>  
> @@ -2791,8 +2796,32 @@ retry_lookup:
>  		will_truncate = false;
>  		acc_mode = MAY_OPEN;
>  		path_to_nameidata(path, nd);
> -		goto finish_open_created;
> +
> +		/*
> +		 * Unlock parent i_mutex later when the open finishes - prevent
> +		 * races when a file can be locked with a deny lock by another
> +		 * task that opens the file.
> +		 */
> +		error = may_open(&nd->path, acc_mode, open_flag);
> +		if (error) {
> +			mutex_unlock(&dir->d_inode->i_mutex);
> +			goto out;
> +		}
> +		file->f_path.mnt = nd->path.mnt;
> +		error = finish_open(file, nd->path.dentry, NULL, opened);
> +		if (error) {
> +			mutex_unlock(&dir->d_inode->i_mutex);
> +			if (error == -EOPENSTALE)
> +				goto stale_open;
> +			goto out;
> +		}
> +		error = deny_lock_file(file);
> +		mutex_unlock(&dir->d_inode->i_mutex);
> +		if (error)
> +			goto exit_fput;
> +		goto opened;
>  	}
> +	mutex_unlock(&dir->d_inode->i_mutex);
>  
>  	/*
>  	 * create/update audit record if it already exists.
> @@ -2885,6 +2914,15 @@ finish_open_created:
>  			goto stale_open;
>  		goto out;
>  	}
> +	/*
> +	 * Lock parent i_mutex to prevent races with deny locks on newely
> +	 * created files.
> +	 */
> +	mutex_lock(&dir->d_inode->i_mutex);
> +	error = deny_lock_file(file);
> +	mutex_unlock(&dir->d_inode->i_mutex);
> +	if (error)
> +		goto exit_fput;
>  opened:
>  	error = open_check_o_direct(file);
>  	if (error)
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 7617ee0..347e1de 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1005,6 +1005,7 @@ extern int lease_modify(struct file_lock **, int);
>  extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
>  extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
>  extern void locks_delete_block(struct file_lock *waiter);
> +extern int deny_lock_file(struct file *);
>  extern void lock_flocks(void);
>  extern void unlock_flocks(void);
>  #else /* !CONFIG_FILE_LOCKING */
> @@ -1153,6 +1154,11 @@ static inline void locks_delete_block(struct file_lock *waiter)
>  {
>  }
>  
> +static inline int deny_lock_file(struct file *filp)
> +{
> +	return 0;
> +}
> +
>  static inline void lock_flocks(void)
>  {
>  }

-- 
Jeff Layton <jlayton@xxxxxxxxxx>
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html